30002 lines
4.1 MiB
30002 lines
4.1 MiB
nohup: ignoring input
|
|
[Episode 10] reward=-72998586.2 actor_loss=0.3017 critic_loss=138080054272.0000 entropy=4.2500 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1009 front_blocked=0
|
|
[Episode 20] reward=-53663304.6 actor_loss=0.1649 critic_loss=125096317052.1212 entropy=4.2560 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 20] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-659751.9 mean_steps=11.1
|
|
[Episode 30] reward=-67596580.2 actor_loss=0.1236 critic_loss=135641300560.8421 entropy=4.2587 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 40] reward=-49633920.3 actor_loss=0.0966 critic_loss=129638047926.0444 entropy=4.2626 approx_kl=0.0079 kl_stop=0 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 40] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442243.9 mean_steps=14.2
|
|
[Episode 50] reward=-47055867.8 actor_loss=0.1068 critic_loss=123775357542.4000 entropy=4.2568 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 60] reward=-46113177.3 actor_loss=0.1068 critic_loss=121823939606.7556 entropy=4.2502 approx_kl=0.0054 kl_stop=0 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 60] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484432.8 mean_steps=13.9
|
|
[Episode 70] reward=-46698905.7 actor_loss=0.0653 critic_loss=124424508211.2000 entropy=4.2628 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 80] reward=-57590511.7 actor_loss=0.0833 critic_loss=128289309114.8108 entropy=4.2758 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 80] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-525250.4 mean_steps=14.2
|
|
[Episode 90] reward=-44219070.8 actor_loss=0.0416 critic_loss=122204111088.9412 entropy=4.2887 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 100] reward=-77302702.0 actor_loss=0.0807 critic_loss=142857841322.6667 entropy=4.2903 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1009 front_blocked=0
|
|
[Eval 100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-598639.7 mean_steps=12.8
|
|
[Episode 110] reward=-46422296.0 actor_loss=0.0606 critic_loss=122285320192.0000 entropy=4.2979 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 120] reward=-50052675.5 actor_loss=0.0932 critic_loss=122218433974.8571 entropy=4.3092 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375009.1 mean_steps=14.9
|
|
[Episode 130] reward=-42606032.7 actor_loss=0.0601 critic_loss=119346319484.1212 entropy=4.3054 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Episode 140] reward=-53786573.3 actor_loss=0.0747 critic_loss=125960888891.5349 entropy=4.3121 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-636204.8 mean_steps=12.1
|
|
[Episode 150] reward=-62150816.4 actor_loss=0.0688 critic_loss=133417892475.5862 entropy=4.3150 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 160] reward=-51206563.5 actor_loss=0.0895 critic_loss=127842947571.5122 entropy=4.3232 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-333812.4 mean_steps=15.2
|
|
[Episode 170] reward=-50081690.4 actor_loss=0.0519 critic_loss=126633011275.8519 entropy=4.3348 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 180] reward=-56362263.5 actor_loss=0.1066 critic_loss=130564375040.0000 entropy=4.3507 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400099.9 mean_steps=14.2
|
|
[Episode 190] reward=-60468271.4 actor_loss=0.1266 critic_loss=129338251166.4762 entropy=4.3570 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 200] reward=-59245290.5 actor_loss=0.0943 critic_loss=132056779811.3103 entropy=4.3576 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502954.8 mean_steps=13.4
|
|
[Episode 210] reward=-43706327.8 actor_loss=0.0994 critic_loss=121341031671.1724 entropy=4.3593 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 220] reward=-41955441.7 actor_loss=0.1056 critic_loss=120536499814.4000 entropy=4.3749 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526870.4 mean_steps=12.5
|
|
[Episode 230] reward=-56673476.9 actor_loss=0.0769 critic_loss=130400351810.7826 entropy=4.3650 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 240] reward=-46037807.9 actor_loss=0.0824 critic_loss=122714100447.1795 entropy=4.3862 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-333726.9 mean_steps=15.5
|
|
[Episode 250] reward=-44570915.8 actor_loss=0.0533 critic_loss=121985424203.2941 entropy=4.3874 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 260] reward=-67592261.0 actor_loss=0.1019 critic_loss=133293201817.6000 entropy=4.3925 approx_kl=0.0105 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383040.6 mean_steps=15.2
|
|
[Episode 270] reward=-51418332.7 actor_loss=0.0685 critic_loss=123503266560.0000 entropy=4.3916 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 280] reward=-43717392.2 actor_loss=0.0605 critic_loss=119053747159.0400 entropy=4.4039 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464633.3 mean_steps=13.3
|
|
[Episode 290] reward=-53731592.6 actor_loss=0.0880 critic_loss=129133550055.6190 entropy=4.4206 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 300] reward=-49159773.9 actor_loss=0.0724 critic_loss=125320039992.8889 entropy=4.4136 approx_kl=0.0101 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378976.0 mean_steps=15.2
|
|
[Episode 310] reward=-48312066.2 actor_loss=0.0827 critic_loss=124921762182.0952 entropy=4.4168 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 320] reward=-53376161.7 actor_loss=0.0957 critic_loss=127305752932.1739 entropy=4.4267 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579570.8 mean_steps=12.1
|
|
[Episode 330] reward=-46968497.9 actor_loss=0.0820 critic_loss=120225463356.2353 entropy=4.4278 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 340] reward=-53077549.0 actor_loss=0.0736 critic_loss=125604483072.0000 entropy=4.4324 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-381021.5 mean_steps=15.4
|
|
[Episode 350] reward=-54568524.0 actor_loss=0.0921 critic_loss=128628627683.5556 entropy=4.4389 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 360] reward=-68228970.6 actor_loss=0.0849 critic_loss=133993314862.5455 entropy=4.4480 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421356.2 mean_steps=14.3
|
|
[Episode 370] reward=-45769179.1 actor_loss=0.0783 critic_loss=121957795157.3333 entropy=4.4490 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 380] reward=-53894899.9 actor_loss=0.0935 critic_loss=126139775337.4118 entropy=4.4635 approx_kl=0.0102 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 380] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-251191.5 mean_steps=15.8
|
|
[Episode 390] reward=-40347186.8 actor_loss=0.0946 critic_loss=113778008808.7273 entropy=4.4677 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 400] reward=-56006298.9 actor_loss=0.0960 critic_loss=126746072157.0909 entropy=4.4847 approx_kl=0.0105 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-385721.4 mean_steps=14.1
|
|
[Episode 410] reward=-52433080.8 actor_loss=0.1099 critic_loss=124657345957.6471 entropy=4.4996 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 420] reward=-47826936.5 actor_loss=0.0597 critic_loss=120730056824.4706 entropy=4.5029 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 420] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-699170.2 mean_steps=10.6
|
|
[Episode 430] reward=-41801159.8 actor_loss=0.0660 critic_loss=122075588697.0435 entropy=4.5102 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 440] reward=-51852845.5 actor_loss=0.0697 critic_loss=126575427584.0000 entropy=4.5110 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555929.7 mean_steps=12.9
|
|
[Episode 450] reward=-48562589.4 actor_loss=0.0538 critic_loss=118882009770.6667 entropy=4.5239 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 460] reward=-55234582.7 actor_loss=0.1129 critic_loss=126805633954.9091 entropy=4.5206 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-444017.8 mean_steps=15.4
|
|
[Episode 470] reward=-57283386.9 actor_loss=0.0813 critic_loss=127480594822.0952 entropy=4.5199 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 480] reward=-45270585.6 actor_loss=0.0728 critic_loss=121532391424.0000 entropy=4.5294 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-489264.6 mean_steps=12.4
|
|
[Episode 490] reward=-46300794.5 actor_loss=0.0619 critic_loss=123200349481.2903 entropy=4.5341 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 500] reward=-43910019.8 actor_loss=0.0761 critic_loss=117153194831.4483 entropy=4.5294 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-604429.2 mean_steps=11.5
|
|
[Episode 510] reward=-57350371.1 actor_loss=0.0609 critic_loss=130192431706.3529 entropy=4.5404 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 520] reward=-53688062.4 actor_loss=0.0760 critic_loss=125707627315.2000 entropy=4.5543 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320757.1 mean_steps=15.4
|
|
[Episode 530] reward=-56823920.7 actor_loss=0.0576 critic_loss=130341251229.5385 entropy=4.5522 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 540] reward=-48132661.0 actor_loss=0.0631 critic_loss=124058215936.0000 entropy=4.5613 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474459.6 mean_steps=13.2
|
|
[Episode 550] reward=-47221935.5 actor_loss=0.0753 critic_loss=124212877393.9200 entropy=4.5633 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 560] reward=-56484280.6 actor_loss=0.0670 critic_loss=128806450972.4444 entropy=4.5733 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430183.1 mean_steps=14.4
|
|
[Episode 570] reward=-49995150.6 actor_loss=0.0813 critic_loss=120400907702.8571 entropy=4.5912 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 580] reward=-66161305.0 actor_loss=0.0508 critic_loss=134859467264.0000 entropy=4.5949 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320377.0 mean_steps=15.4
|
|
[Episode 590] reward=-47910004.0 actor_loss=0.0624 critic_loss=123571607875.3684 entropy=4.6140 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 600] reward=-51850694.6 actor_loss=0.0520 critic_loss=124033825698.9091 entropy=4.6179 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 600] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-641574.6 mean_steps=11.1
|
|
[Episode 610] reward=-61692198.6 actor_loss=0.0718 critic_loss=129761947739.0222 entropy=4.6264 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 620] reward=-64394874.0 actor_loss=0.0828 critic_loss=135451265469.2174 entropy=4.6340 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436043.0 mean_steps=14.2
|
|
[Episode 630] reward=-63710481.2 actor_loss=0.0667 critic_loss=138342157880.8889 entropy=4.6394 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 640] reward=-49855998.0 actor_loss=0.0785 critic_loss=122099866009.6000 entropy=4.6499 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-338261.8 mean_steps=15.4
|
|
[Episode 650] reward=-61077476.6 actor_loss=0.0731 critic_loss=128685489902.9333 entropy=4.6632 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 660] reward=-66995918.8 actor_loss=0.0751 critic_loss=134115317880.4706 entropy=4.6737 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540761.2 mean_steps=12.4
|
|
[Episode 670] reward=-44843363.7 actor_loss=0.0564 critic_loss=122180784128.0000 entropy=4.6820 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 680] reward=-44124394.5 actor_loss=0.0982 critic_loss=121139932943.0588 entropy=4.6908 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480481.8 mean_steps=13.8
|
|
[Episode 690] reward=-50991151.3 actor_loss=0.0791 critic_loss=128689696452.9231 entropy=4.7021 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 700] reward=-57815228.2 actor_loss=0.0670 critic_loss=126640117917.5385 entropy=4.7081 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-608107.6 mean_steps=11.3
|
|
[Episode 710] reward=-57534415.2 actor_loss=0.0509 critic_loss=127584470016.0000 entropy=4.7070 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 720] reward=-53784085.8 actor_loss=0.0767 critic_loss=125846335728.9412 entropy=4.7197 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520095.2 mean_steps=13.3
|
|
[Episode 730] reward=-49154441.9 actor_loss=0.1085 critic_loss=121495267800.6154 entropy=4.7255 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 740] reward=-56145577.6 actor_loss=0.0856 critic_loss=125386126034.8235 entropy=4.7338 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426957.1 mean_steps=13.8
|
|
[Episode 750] reward=-49157921.6 actor_loss=0.0754 critic_loss=122083749515.6364 entropy=4.7397 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 760] reward=-53667293.2 actor_loss=0.0760 critic_loss=123926680462.2222 entropy=4.7559 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-599224.7 mean_steps=12.9
|
|
[Episode 770] reward=-48659685.1 actor_loss=0.0455 critic_loss=122377802043.0769 entropy=4.7593 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 780] reward=-50780690.8 actor_loss=0.0666 critic_loss=124193601693.5385 entropy=4.7639 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453661.8 mean_steps=13.6
|
|
[Episode 790] reward=-64936869.5 actor_loss=0.0943 critic_loss=135974655317.3333 entropy=4.7687 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 800] reward=-53383371.7 actor_loss=0.1071 critic_loss=123379683012.9231 entropy=4.7647 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-403145.7 mean_steps=14.9
|
|
[Episode 810] reward=-48200015.8 actor_loss=0.0662 critic_loss=122403719577.6000 entropy=4.7720 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 820] reward=-55990425.0 actor_loss=0.0658 critic_loss=128459384229.6471 entropy=4.7889 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-416908.9 mean_steps=15.6
|
|
[Episode 830] reward=-45098262.0 actor_loss=0.0649 critic_loss=120583621409.3913 entropy=4.7931 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 840] reward=-52869078.0 actor_loss=0.0775 critic_loss=124161526232.6154 entropy=4.8025 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631936.7 mean_steps=11.8
|
|
[Episode 850] reward=-55861209.1 actor_loss=0.0838 critic_loss=124002487864.8889 entropy=4.8060 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 860] reward=-60712468.0 actor_loss=0.0742 critic_loss=130732592090.0741 entropy=4.8154 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 860] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-268618.5 mean_steps=15.5
|
|
[Episode 870] reward=-44006541.8 actor_loss=0.0528 critic_loss=122740209956.5714 entropy=4.8227 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 880] reward=-54778212.9 actor_loss=0.0896 critic_loss=124946205549.7143 entropy=4.8347 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409136.3 mean_steps=14.9
|
|
[Episode 890] reward=-44683055.0 actor_loss=0.0692 critic_loss=123374749468.4444 entropy=4.8421 approx_kl=0.0098 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 900] reward=-56283325.0 actor_loss=0.0695 critic_loss=128001324373.3333 entropy=4.8477 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467749.8 mean_steps=13.7
|
|
[Episode 910] reward=-42465155.6 actor_loss=0.0484 critic_loss=120555453480.9600 entropy=4.8575 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 920] reward=-44798283.5 actor_loss=0.0716 critic_loss=117414117376.0000 entropy=4.8627 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433326.1 mean_steps=14.3
|
|
[Episode 930] reward=-53948295.1 actor_loss=0.0632 critic_loss=126252388352.0000 entropy=4.8628 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 940] reward=-44635618.6 actor_loss=0.0807 critic_loss=118977285928.4211 entropy=4.8698 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540156.2 mean_steps=13.6
|
|
[Episode 950] reward=-46682761.8 actor_loss=0.0526 critic_loss=121637163248.9412 entropy=4.8708 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 960] reward=-40770140.0 actor_loss=0.0452 critic_loss=117916856320.0000 entropy=4.8739 approx_kl=0.0098 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 960] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-217438.0 mean_steps=16.1
|
|
[Episode 970] reward=-46426240.7 actor_loss=0.0914 critic_loss=124566997219.5556 entropy=4.8816 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 980] reward=-50487443.2 actor_loss=0.0640 critic_loss=122242542411.2941 entropy=4.8845 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508337.4 mean_steps=13.1
|
|
[Episode 990] reward=-52526183.1 actor_loss=0.0500 critic_loss=123104629853.0909 entropy=4.8911 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1000] reward=-52166669.9 actor_loss=0.0536 critic_loss=127572502118.4000 entropy=4.8838 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 1000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491245.6 mean_steps=12.1
|
|
[Episode 1010] reward=-50326575.4 actor_loss=0.0552 critic_loss=121800408502.8571 entropy=4.8869 approx_kl=0.0109 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 1020] reward=-56215026.5 actor_loss=0.0718 critic_loss=131672807316.2105 entropy=4.8975 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 1020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490199.6 mean_steps=13.9
|
|
[Episode 1030] reward=-48440898.7 actor_loss=0.0376 critic_loss=123637407451.4286 entropy=4.9175 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1040] reward=-62251597.4 actor_loss=0.1034 critic_loss=134940279239.1111 entropy=4.9164 approx_kl=0.0104 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404290.5 mean_steps=15.0
|
|
[Episode 1050] reward=-51213887.9 actor_loss=0.0970 critic_loss=124047354217.4118 entropy=4.9220 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 1060] reward=-48330299.9 actor_loss=0.0660 critic_loss=120712244428.8000 entropy=4.9286 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409924.1 mean_steps=14.2
|
|
[Episode 1070] reward=-49915024.8 actor_loss=0.0772 critic_loss=123853767436.1905 entropy=4.9424 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1080] reward=-46696373.4 actor_loss=0.0532 critic_loss=121139769070.9333 entropy=4.9394 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 1080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-520272.2 mean_steps=11.8
|
|
[Episode 1090] reward=-56079126.5 actor_loss=0.0821 critic_loss=127425652872.5333 entropy=4.9511 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 1100] reward=-60148571.7 actor_loss=0.0546 critic_loss=132377950021.8182 entropy=4.9630 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 1100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-410335.3 mean_steps=13.3
|
|
[Episode 1110] reward=-55155575.2 actor_loss=0.0718 critic_loss=124707647247.0588 entropy=4.9709 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 1120] reward=-52932036.7 actor_loss=0.0595 critic_loss=128713842331.8261 entropy=4.9836 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 1120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425060.1 mean_steps=14.4
|
|
[Episode 1130] reward=-56095608.9 actor_loss=0.0730 critic_loss=127203219968.0000 entropy=4.9946 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 1140] reward=-57347510.2 actor_loss=0.1094 critic_loss=127794554880.0000 entropy=5.0051 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486891.6 mean_steps=13.1
|
|
[Episode 1150] reward=-49416311.9 actor_loss=0.0660 critic_loss=123019174518.1538 entropy=5.0136 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 1160] reward=-66028337.8 actor_loss=0.0543 critic_loss=131784529830.9565 entropy=5.0251 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 1160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-326710.3 mean_steps=15.4
|
|
[Episode 1170] reward=-53324782.0 actor_loss=0.0929 critic_loss=127227828224.0000 entropy=5.0236 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 1180] reward=-54982917.0 actor_loss=0.0686 critic_loss=127069949711.0588 entropy=5.0357 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 1180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538851.7 mean_steps=13.7
|
|
[Episode 1190] reward=-39580903.6 actor_loss=0.0661 critic_loss=112677232996.1739 entropy=5.0394 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1200] reward=-45700343.1 actor_loss=0.0529 critic_loss=124833938090.6667 entropy=5.0373 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 1200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-451248.7 mean_steps=13.1
|
|
[Episode 1210] reward=-57256411.4 actor_loss=0.1005 critic_loss=128235054019.7647 entropy=5.0403 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 1220] reward=-51509979.2 actor_loss=0.0717 critic_loss=124548089856.0000 entropy=5.0509 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 1220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448950.9 mean_steps=14.5
|
|
[Episode 1230] reward=-64815360.5 actor_loss=0.0911 critic_loss=134339378029.7143 entropy=5.0692 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 1240] reward=-49635609.3 actor_loss=0.0846 critic_loss=125225627921.0667 entropy=5.0722 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 1240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502372.5 mean_steps=12.8
|
|
[Episode 1250] reward=-44188636.0 actor_loss=0.0768 critic_loss=119677715251.2000 entropy=5.0791 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1260] reward=-49508779.6 actor_loss=0.0583 critic_loss=122989072232.2963 entropy=5.0855 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 1260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-640039.3 mean_steps=11.7
|
|
[Episode 1270] reward=-62964799.9 actor_loss=0.0928 critic_loss=131676798619.8261 entropy=5.0967 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 1280] reward=-60996379.5 actor_loss=0.0871 critic_loss=125743380187.4286 entropy=5.1112 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 1280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429937.4 mean_steps=14.2
|
|
[Episode 1290] reward=-50021769.2 actor_loss=0.0779 critic_loss=125852279125.3333 entropy=5.1176 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1300] reward=-49289072.4 actor_loss=0.0761 critic_loss=123182530244.9231 entropy=5.1326 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 1300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-381000.0 mean_steps=14.1
|
|
[Episode 1310] reward=-49804141.6 actor_loss=0.0666 critic_loss=122503423772.4444 entropy=5.1344 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 1320] reward=-54338796.6 actor_loss=0.0701 critic_loss=127756262576.5517 entropy=5.1306 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458658.6 mean_steps=14.3
|
|
[Episode 1330] reward=-63912818.8 actor_loss=0.0952 critic_loss=131328620771.5556 entropy=5.1432 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 1340] reward=-53073687.4 actor_loss=0.1006 critic_loss=126879496794.3529 entropy=5.1508 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 1340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551144.9 mean_steps=12.8
|
|
[Episode 1350] reward=-46861028.6 actor_loss=0.0559 critic_loss=122511516360.3478 entropy=5.1654 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1360] reward=-44578089.0 actor_loss=0.0746 critic_loss=116022504106.6667 entropy=5.1689 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 1360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520783.4 mean_steps=13.2
|
|
[Episode 1370] reward=-56026278.5 actor_loss=0.0594 critic_loss=124577463713.1852 entropy=5.1868 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 1380] reward=-47447530.6 actor_loss=0.0668 critic_loss=121302119046.7368 entropy=5.1956 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 1380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-445316.9 mean_steps=12.8
|
|
[Episode 1390] reward=-52044807.5 actor_loss=0.0656 critic_loss=120879124480.0000 entropy=5.2029 approx_kl=0.0095 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1400] reward=-45298107.2 actor_loss=0.0429 critic_loss=119380344504.3200 entropy=5.2079 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 1400] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-327415.5 mean_steps=14.9
|
|
[Episode 1410] reward=-60512899.8 actor_loss=0.0425 critic_loss=129239048548.1739 entropy=5.2113 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 1420] reward=-54732106.4 actor_loss=0.0673 critic_loss=128298629012.2105 entropy=5.2204 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 1420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431378.6 mean_steps=13.2
|
|
[Episode 1430] reward=-65731794.9 actor_loss=0.0639 critic_loss=136226406809.6000 entropy=5.2242 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 1440] reward=-54597841.8 actor_loss=0.1042 critic_loss=129503659874.4615 entropy=5.2385 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 1440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440524.5 mean_steps=13.5
|
|
[Episode 1450] reward=-53351059.1 actor_loss=0.0763 critic_loss=120099734621.0909 entropy=5.2444 approx_kl=0.0095 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1460] reward=-59623620.7 actor_loss=0.0671 critic_loss=131624606573.7143 entropy=5.2547 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656165.3 mean_steps=11.6
|
|
[Episode 1470] reward=-64022801.5 actor_loss=0.0777 critic_loss=132515345294.2222 entropy=5.2612 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 1480] reward=-59135802.5 actor_loss=0.1106 critic_loss=126075169611.2941 entropy=5.2651 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 1480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-424140.7 mean_steps=14.8
|
|
[Episode 1490] reward=-46993889.7 actor_loss=0.0724 critic_loss=123577821476.5714 entropy=5.2663 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 1500] reward=-57804527.2 actor_loss=0.0779 critic_loss=130188445137.4545 entropy=5.2793 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498907.3 mean_steps=12.9
|
|
[Episode 1510] reward=-41109528.5 actor_loss=0.0414 critic_loss=117799376668.4444 entropy=5.2853 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 1520] reward=-41927614.8 actor_loss=0.0666 critic_loss=120115623526.4000 entropy=5.2900 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 1520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-410926.9 mean_steps=13.1
|
|
[Episode 1530] reward=-52949221.6 actor_loss=0.0570 critic_loss=124324159488.0000 entropy=5.2922 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 1540] reward=-58050963.8 actor_loss=0.0554 critic_loss=132056897290.2400 entropy=5.2874 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-617339.9 mean_steps=12.3
|
|
[Episode 1550] reward=-49178679.7 actor_loss=0.0629 critic_loss=124195752072.5333 entropy=5.2856 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 1560] reward=-41178467.6 actor_loss=0.0828 critic_loss=117855073962.6667 entropy=5.2891 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 1560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624294.2 mean_steps=12.3
|
|
[Episode 1570] reward=-45620652.5 actor_loss=0.0723 critic_loss=116498778404.5714 entropy=5.2982 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 1580] reward=-47208027.9 actor_loss=0.0775 critic_loss=120627133644.8000 entropy=5.3023 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366150.3 mean_steps=15.2
|
|
[Episode 1590] reward=-52395285.3 actor_loss=0.0719 critic_loss=124861188778.6667 entropy=5.3067 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1600] reward=-63873217.5 actor_loss=0.0681 critic_loss=133297030680.3810 entropy=5.3096 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-576377.8 mean_steps=11.1
|
|
[Episode 1610] reward=-50191749.3 actor_loss=0.0757 critic_loss=122547012547.7647 entropy=5.3154 approx_kl=0.0109 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 1620] reward=-71984060.6 actor_loss=0.0813 critic_loss=137758454930.2857 entropy=5.3292 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 1620] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-353674.1 mean_steps=15.9
|
|
[Episode 1630] reward=-63965774.3 actor_loss=0.0757 critic_loss=128789705386.6667 entropy=5.3270 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1640] reward=-53106096.1 actor_loss=0.0545 critic_loss=127326730532.5714 entropy=5.3245 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 1640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-386520.0 mean_steps=14.0
|
|
[Episode 1650] reward=-56779038.5 actor_loss=0.0896 critic_loss=127133025219.7647 entropy=5.3231 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1660] reward=-58765442.0 actor_loss=0.0808 critic_loss=125918771712.0000 entropy=5.3227 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-682718.0 mean_steps=11.9
|
|
[Episode 1670] reward=-62053449.1 actor_loss=0.0635 critic_loss=131275133168.9412 entropy=5.3233 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 1680] reward=-49726960.2 actor_loss=0.0628 critic_loss=125194938660.5714 entropy=5.3269 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 1680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526294.4 mean_steps=12.3
|
|
[Episode 1690] reward=-48097385.5 actor_loss=0.0859 critic_loss=117175851495.6190 entropy=5.3294 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 1700] reward=-47786809.0 actor_loss=0.0706 critic_loss=121594341052.6316 entropy=5.3363 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 1700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492691.6 mean_steps=13.1
|
|
[Episode 1710] reward=-48638459.4 actor_loss=0.0655 critic_loss=120386072791.5789 entropy=5.3349 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 1720] reward=-54549017.1 actor_loss=0.0998 critic_loss=120943239168.0000 entropy=5.3462 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543718.1 mean_steps=12.8
|
|
[Episode 1730] reward=-53075255.0 actor_loss=0.0671 critic_loss=126477641318.4000 entropy=5.3427 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1740] reward=-57652317.4 actor_loss=0.0704 critic_loss=128345598882.9091 entropy=5.3445 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 1740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599723.9 mean_steps=12.3
|
|
[Episode 1750] reward=-58018890.7 actor_loss=0.0698 critic_loss=126060673347.3684 entropy=5.3630 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1760] reward=-57919425.7 actor_loss=0.0487 critic_loss=126673139939.5556 entropy=5.3669 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432778.2 mean_steps=14.1
|
|
[Episode 1770] reward=-55265240.4 actor_loss=0.0556 critic_loss=123570158871.2727 entropy=5.3771 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 1780] reward=-47803506.0 actor_loss=0.0613 critic_loss=121406901816.8889 entropy=5.3835 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 1780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-636403.3 mean_steps=11.4
|
|
[Episode 1790] reward=-61144283.8 actor_loss=0.0757 critic_loss=131039108763.8261 entropy=5.3986 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1800] reward=-53394050.9 actor_loss=0.0675 critic_loss=124766785536.0000 entropy=5.3997 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 1800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451151.7 mean_steps=14.4
|
|
[Episode 1810] reward=-55595871.3 actor_loss=0.0588 critic_loss=129518106487.4667 entropy=5.3976 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1820] reward=-48927408.8 actor_loss=0.0355 critic_loss=122573973690.1818 entropy=5.3943 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 1820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522487.4 mean_steps=12.1
|
|
[Episode 1830] reward=-64994437.7 actor_loss=0.0672 critic_loss=133571438933.3333 entropy=5.4012 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1840] reward=-53725517.7 actor_loss=0.0576 critic_loss=123661685009.0667 entropy=5.4204 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 1840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621991.6 mean_steps=11.6
|
|
[Episode 1850] reward=-51136283.9 actor_loss=0.0746 critic_loss=123396656878.9333 entropy=5.4370 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 1860] reward=-53111240.9 actor_loss=0.0679 critic_loss=120445360314.1818 entropy=5.4466 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 1860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442426.0 mean_steps=13.6
|
|
[Episode 1870] reward=-40262401.8 actor_loss=0.0463 critic_loss=118764973624.8889 entropy=5.4574 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 1880] reward=-44866848.9 actor_loss=0.0447 critic_loss=117750135739.7333 entropy=5.4596 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 1880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468068.9 mean_steps=12.7
|
|
[Episode 1890] reward=-60527872.4 actor_loss=0.0747 critic_loss=129819893564.9524 entropy=5.4649 approx_kl=0.0102 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 1900] reward=-53277331.1 actor_loss=0.0830 critic_loss=123930410188.8000 entropy=5.4668 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515893.8 mean_steps=12.3
|
|
[Episode 1910] reward=-47651528.4 actor_loss=0.0553 critic_loss=121196045251.7647 entropy=5.4698 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1920] reward=-63778981.3 actor_loss=0.0847 critic_loss=135477977088.0000 entropy=5.4792 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 1920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459036.4 mean_steps=14.0
|
|
[Episode 1930] reward=-46569590.2 actor_loss=0.0541 critic_loss=121250684245.3333 entropy=5.4925 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1940] reward=-47666215.5 actor_loss=0.0460 critic_loss=121987912499.2000 entropy=5.4944 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 1940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-557729.3 mean_steps=13.3
|
|
[Episode 1950] reward=-71290411.3 actor_loss=0.0805 critic_loss=139036095938.5600 entropy=5.4970 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 1960] reward=-45824195.3 actor_loss=0.0582 critic_loss=119961156769.6842 entropy=5.4973 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 1960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545310.8 mean_steps=11.9
|
|
[Episode 1970] reward=-62561995.8 actor_loss=0.0921 critic_loss=128791701865.4118 entropy=5.5091 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 1980] reward=-58051858.8 actor_loss=0.0787 critic_loss=126520387993.6000 entropy=5.5143 approx_kl=0.0102 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 1980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431468.1 mean_steps=13.2
|
|
[Episode 1990] reward=-65283626.6 actor_loss=0.0801 critic_loss=134127516330.6667 entropy=5.5151 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 2000] reward=-59229309.0 actor_loss=0.0681 critic_loss=127130049649.7778 entropy=5.5150 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 2000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524122.8 mean_steps=12.9
|
|
[Episode 2010] reward=-54596232.0 actor_loss=0.0764 critic_loss=120740965677.1765 entropy=5.5240 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 2020] reward=-58086157.4 actor_loss=0.0829 critic_loss=129013109760.0000 entropy=5.5275 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 2020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564077.5 mean_steps=12.5
|
|
[Episode 2030] reward=-54219025.9 actor_loss=0.0640 critic_loss=128521652410.1818 entropy=5.5349 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 2040] reward=-58614234.5 actor_loss=0.1075 critic_loss=128035455795.2000 entropy=5.5398 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 2040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484076.9 mean_steps=13.6
|
|
[Episode 2050] reward=-55922278.5 actor_loss=0.0664 critic_loss=128284885955.7647 entropy=5.5543 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 2060] reward=-55627108.2 actor_loss=0.1057 critic_loss=126244711992.8889 entropy=5.5565 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 2060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521897.2 mean_steps=13.0
|
|
[Episode 2070] reward=-59166313.8 actor_loss=0.0691 critic_loss=123912126464.0000 entropy=5.5664 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 2080] reward=-47058657.2 actor_loss=0.0472 critic_loss=120296501521.0667 entropy=5.5704 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 2080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-629397.2 mean_steps=11.3
|
|
[Episode 2090] reward=-53005608.5 actor_loss=0.0677 critic_loss=124603018519.2727 entropy=5.5808 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 2100] reward=-47790216.3 actor_loss=0.0630 critic_loss=120824129828.5714 entropy=5.5829 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 2100] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-278494.3 mean_steps=16.1
|
|
[Episode 2110] reward=-43842337.8 actor_loss=0.0559 critic_loss=115091411889.2308 entropy=5.5917 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 2120] reward=-43596699.8 actor_loss=0.0524 critic_loss=119212209018.4348 entropy=5.6008 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 2120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531036.3 mean_steps=13.1
|
|
[Episode 2130] reward=-57044803.0 actor_loss=0.0657 critic_loss=125174392711.5294 entropy=5.6090 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 2140] reward=-54491939.9 actor_loss=0.0661 critic_loss=121330810880.0000 entropy=5.6124 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 2140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-345087.8 mean_steps=14.3
|
|
[Episode 2150] reward=-49282883.8 actor_loss=0.0859 critic_loss=119696324765.5385 entropy=5.6165 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 2160] reward=-55079825.5 actor_loss=0.0533 critic_loss=125475479552.0000 entropy=5.6118 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-521655.4 mean_steps=12.1
|
|
[Episode 2170] reward=-51682578.3 actor_loss=0.0568 critic_loss=123632576804.5714 entropy=5.6120 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 2180] reward=-50020335.4 actor_loss=0.0578 critic_loss=118910512429.1765 entropy=5.6193 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 2180] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-331650.6 mean_steps=15.6
|
|
[Episode 2190] reward=-53706890.3 actor_loss=0.0842 critic_loss=122928294297.6000 entropy=5.6267 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 2200] reward=-47577051.3 actor_loss=0.0615 critic_loss=124569705403.7333 entropy=5.6298 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 2200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437280.8 mean_steps=13.8
|
|
[Episode 2210] reward=-56020259.5 actor_loss=0.0681 critic_loss=122930838771.8095 entropy=5.6392 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 2220] reward=-58043531.3 actor_loss=0.0794 critic_loss=124776720156.4444 entropy=5.6529 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 2220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399930.8 mean_steps=13.9
|
|
[Episode 2230] reward=-46026652.1 actor_loss=0.0386 critic_loss=117146306402.4615 entropy=5.6664 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 2240] reward=-43728121.1 actor_loss=0.0380 critic_loss=119278326897.7778 entropy=5.6842 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 2240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485097.6 mean_steps=13.4
|
|
[Episode 2250] reward=-46766134.2 actor_loss=0.0560 critic_loss=118027375838.6087 entropy=5.6791 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 2260] reward=-49259928.2 actor_loss=0.0653 critic_loss=122330059697.2308 entropy=5.6887 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 2260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401076.8 mean_steps=13.9
|
|
[Episode 2270] reward=-51707862.9 actor_loss=0.0567 critic_loss=126868030532.2667 entropy=5.6916 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 2280] reward=-50917369.7 actor_loss=0.0687 critic_loss=121718516814.7692 entropy=5.7047 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 2280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493542.7 mean_steps=12.9
|
|
[Episode 2290] reward=-65662731.7 actor_loss=0.0478 critic_loss=136848515072.0000 entropy=5.7043 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 2300] reward=-52866089.3 actor_loss=0.0648 critic_loss=125284155938.1333 entropy=5.7171 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563809.8 mean_steps=12.5
|
|
[Episode 2310] reward=-59584297.9 actor_loss=0.0742 critic_loss=124862720986.0741 entropy=5.7241 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 2320] reward=-45547280.5 actor_loss=0.0509 critic_loss=115496971342.7692 entropy=5.7354 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 2320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451483.2 mean_steps=13.7
|
|
[Episode 2330] reward=-54550217.7 actor_loss=0.0734 critic_loss=120158229299.2000 entropy=5.7417 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 2340] reward=-49445964.1 actor_loss=0.0929 critic_loss=120717082996.3636 entropy=5.7452 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 2340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397963.5 mean_steps=13.9
|
|
[Episode 2350] reward=-41822733.6 actor_loss=0.0589 critic_loss=115053989888.0000 entropy=5.7522 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 2360] reward=-59186246.8 actor_loss=0.0746 critic_loss=128392165785.6000 entropy=5.7539 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 2360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505350.6 mean_steps=13.8
|
|
[Episode 2370] reward=-49223740.9 actor_loss=0.0336 critic_loss=123399961531.7333 entropy=5.7582 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 2380] reward=-48647864.6 actor_loss=0.0387 critic_loss=121061440625.7778 entropy=5.7571 approx_kl=0.0107 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 2380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425649.6 mean_steps=13.6
|
|
[Episode 2390] reward=-54606274.0 actor_loss=0.0666 critic_loss=121937474901.3333 entropy=5.7624 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 2400] reward=-47723545.6 actor_loss=0.0604 critic_loss=117984891997.0909 entropy=5.7648 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 2400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491961.3 mean_steps=13.9
|
|
[Episode 2410] reward=-41029741.9 actor_loss=0.0717 critic_loss=116818416298.6667 entropy=5.7752 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 2420] reward=-46435014.6 actor_loss=0.0582 critic_loss=117160464699.0769 entropy=5.7799 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 2420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-593909.5 mean_steps=11.2
|
|
[Episode 2430] reward=-58267478.1 actor_loss=0.0286 critic_loss=127855797452.8000 entropy=5.7922 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 2440] reward=-50833007.1 actor_loss=0.0475 critic_loss=120766527780.5714 entropy=5.8017 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 2440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446555.9 mean_steps=13.5
|
|
[Episode 2450] reward=-46418684.3 actor_loss=0.0757 critic_loss=120126363461.8182 entropy=5.8074 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 2460] reward=-47890628.2 actor_loss=0.0634 critic_loss=117143723716.9231 entropy=5.8152 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 2460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-629484.8 mean_steps=11.2
|
|
[Episode 2470] reward=-58328320.1 actor_loss=0.0764 critic_loss=127853143740.6316 entropy=5.8176 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 2480] reward=-54254777.7 actor_loss=0.0913 critic_loss=122718309814.8571 entropy=5.8253 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 2480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-636676.5 mean_steps=12.2
|
|
[Episode 2490] reward=-50251094.8 actor_loss=0.0635 critic_loss=121231186478.5455 entropy=5.8269 approx_kl=0.0102 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 2500] reward=-58822492.7 actor_loss=0.0881 critic_loss=127297187840.0000 entropy=5.8276 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 2500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467885.5 mean_steps=13.3
|
|
[Episode 2510] reward=-38812720.2 actor_loss=0.0565 critic_loss=111376931418.3529 entropy=5.8359 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 2520] reward=-57869941.1 actor_loss=0.0519 critic_loss=124164118528.0000 entropy=5.8415 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 2520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-464006.6 mean_steps=12.1
|
|
[Episode 2530] reward=-62460796.0 actor_loss=0.0894 critic_loss=129499908778.6667 entropy=5.8429 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 2540] reward=-54432812.8 actor_loss=0.0812 critic_loss=122320874797.1765 entropy=5.8556 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 2540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624755.0 mean_steps=12.2
|
|
[Episode 2550] reward=-49505362.1 actor_loss=0.0476 critic_loss=118994891124.3636 entropy=5.8700 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 2560] reward=-54998862.3 actor_loss=0.0463 critic_loss=122458572572.4444 entropy=5.8761 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 2560] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-367739.3 mean_steps=15.6
|
|
[Episode 2570] reward=-52582366.2 actor_loss=0.0723 critic_loss=117959608506.1818 entropy=5.8831 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 2580] reward=-54587067.4 actor_loss=0.0773 critic_loss=122779860992.0000 entropy=5.8940 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 2580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568072.9 mean_steps=12.8
|
|
[Episode 2590] reward=-41851441.2 actor_loss=0.0674 critic_loss=113538373252.7407 entropy=5.9008 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 2600] reward=-48176231.2 actor_loss=0.0473 critic_loss=119117508608.0000 entropy=5.9065 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-380847.9 mean_steps=14.8
|
|
[Episode 2610] reward=-58655732.5 actor_loss=0.0726 critic_loss=126780493368.8889 entropy=5.9108 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 2620] reward=-43004360.1 actor_loss=0.0494 critic_loss=114958352384.0000 entropy=5.9156 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 2620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417690.6 mean_steps=13.9
|
|
[Episode 2630] reward=-40354121.3 actor_loss=0.0533 critic_loss=113636621926.4000 entropy=5.9203 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 2640] reward=-37012942.8 actor_loss=0.0411 critic_loss=112323196928.0000 entropy=5.9232 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 2640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320326.4 mean_steps=15.1
|
|
[Episode 2650] reward=-49660120.4 actor_loss=0.0565 critic_loss=122344435712.0000 entropy=5.9187 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 2660] reward=-42574484.2 actor_loss=0.0445 critic_loss=116789840749.7143 entropy=5.9230 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 2660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-458533.6 mean_steps=12.8
|
|
[Episode 2670] reward=-44059319.9 actor_loss=0.0409 critic_loss=114987164330.6667 entropy=5.9265 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 2680] reward=-46457820.0 actor_loss=0.0458 critic_loss=117283459794.8235 entropy=5.9318 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 2680] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-648032.7 mean_steps=11.2
|
|
[Episode 2690] reward=-57763153.0 actor_loss=0.0876 critic_loss=126983837923.5556 entropy=5.9414 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 2700] reward=-64716977.9 actor_loss=0.0848 critic_loss=131380658176.0000 entropy=5.9505 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 2700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438811.7 mean_steps=14.1
|
|
[Episode 2710] reward=-57503059.3 actor_loss=0.0458 critic_loss=124915508689.4545 entropy=5.9462 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 2720] reward=-57930580.3 actor_loss=0.0817 critic_loss=126038313642.6667 entropy=5.9472 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Eval 2720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460962.9 mean_steps=13.5
|
|
[Episode 2730] reward=-52961479.1 actor_loss=0.0675 critic_loss=122551389992.4211 entropy=5.9596 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 2740] reward=-47949619.3 actor_loss=0.0713 critic_loss=118312956648.7273 entropy=5.9625 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 2740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402932.6 mean_steps=13.8
|
|
[Episode 2750] reward=-51292788.4 actor_loss=0.0786 critic_loss=118480345208.4706 entropy=5.9637 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 2760] reward=-54492448.7 actor_loss=0.0870 critic_loss=119005895611.7333 entropy=5.9701 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 2760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-379104.2 mean_steps=15.5
|
|
[Episode 2770] reward=-57085844.7 actor_loss=0.0643 critic_loss=122718410069.3333 entropy=5.9796 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 2780] reward=-48664984.5 actor_loss=0.0607 critic_loss=119746480007.5294 entropy=5.9883 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 2780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405511.7 mean_steps=13.8
|
|
[Episode 2790] reward=-48955669.3 actor_loss=0.0474 critic_loss=119445148392.7273 entropy=5.9926 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 2800] reward=-38961234.9 actor_loss=0.0374 critic_loss=113236870212.2667 entropy=5.9931 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 2800] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-364227.6 mean_steps=15.3
|
|
[Episode 2810] reward=-57940270.6 actor_loss=0.0733 critic_loss=126066181188.2667 entropy=5.9962 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 2820] reward=-50117173.3 actor_loss=0.0814 critic_loss=120104604852.7059 entropy=6.0007 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 2820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395979.6 mean_steps=14.6
|
|
[Episode 2830] reward=-60844438.6 actor_loss=0.0857 critic_loss=127108843520.0000 entropy=6.0246 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 2840] reward=-58283887.1 actor_loss=0.0590 critic_loss=124718099757.1765 entropy=6.0325 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 2840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545480.4 mean_steps=12.8
|
|
[Episode 2850] reward=-34084258.3 actor_loss=0.0320 critic_loss=106487198671.2381 entropy=6.0466 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 2860] reward=-52362116.6 actor_loss=0.0806 critic_loss=119610406619.4286 entropy=6.0573 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 2860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-455460.6 mean_steps=12.2
|
|
[Episode 2870] reward=-56619582.5 actor_loss=0.0552 critic_loss=128466644992.0000 entropy=6.0678 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 2880] reward=-47290718.3 actor_loss=0.0546 critic_loss=115665207296.0000 entropy=6.0821 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 2880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538681.1 mean_steps=13.8
|
|
[Episode 2890] reward=-49629797.4 actor_loss=0.0624 critic_loss=117889839826.8235 entropy=6.0921 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 2900] reward=-32090594.6 actor_loss=0.0472 critic_loss=108572119686.7368 entropy=6.0909 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 2900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434039.0 mean_steps=14.2
|
|
[Episode 2910] reward=-43731605.6 actor_loss=0.0562 critic_loss=113495807317.3333 entropy=6.0977 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 2920] reward=-44449244.5 actor_loss=0.0339 critic_loss=117830321421.4737 entropy=6.0995 approx_kl=0.0104 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 2920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-448672.7 mean_steps=12.9
|
|
[Episode 2930] reward=-49590450.9 actor_loss=0.0659 critic_loss=119248500736.0000 entropy=6.1080 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 2940] reward=-56025702.1 actor_loss=0.0606 critic_loss=125335429120.0000 entropy=6.1080 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 2940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615387.3 mean_steps=12.3
|
|
[Episode 2950] reward=-35760750.0 actor_loss=0.0230 critic_loss=109678961664.0000 entropy=6.1181 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 2960] reward=-42521271.9 actor_loss=0.0794 critic_loss=113062947659.2941 entropy=6.1227 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 2960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-374048.7 mean_steps=14.4
|
|
[Episode 2970] reward=-47638547.6 actor_loss=0.0500 critic_loss=117355471394.1333 entropy=6.1267 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 2980] reward=-35247320.5 actor_loss=0.0481 critic_loss=110444299729.4545 entropy=6.1248 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Eval 2980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416666.0 mean_steps=13.9
|
|
[Episode 2990] reward=-61202340.5 actor_loss=0.1199 critic_loss=124686272354.4615 entropy=6.1256 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 3000] reward=-59426021.1 actor_loss=0.0794 critic_loss=129161697280.0000 entropy=6.1394 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 3000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473269.0 mean_steps=12.8
|
|
[Episode 3010] reward=-42624893.7 actor_loss=0.0538 critic_loss=113759629824.0000 entropy=6.1427 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 3020] reward=-56448058.4 actor_loss=0.0887 critic_loss=124091035062.8571 entropy=6.1395 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 3020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430669.2 mean_steps=13.9
|
|
[Episode 3030] reward=-63240780.5 actor_loss=0.0711 critic_loss=127401769518.5455 entropy=6.1420 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 3040] reward=-50397864.1 actor_loss=0.0419 critic_loss=120702951424.0000 entropy=6.1541 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 3040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-548512.8 mean_steps=11.1
|
|
[Episode 3050] reward=-57245870.9 actor_loss=0.0663 critic_loss=122719312749.7143 entropy=6.1613 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 3060] reward=-53818835.4 actor_loss=0.0586 critic_loss=126066274759.1111 entropy=6.1628 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 3060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550324.5 mean_steps=13.2
|
|
[Episode 3070] reward=-47414869.8 actor_loss=0.0616 critic_loss=117330148010.6667 entropy=6.1739 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 3080] reward=-53149088.5 actor_loss=0.0370 critic_loss=124360134851.0476 entropy=6.1847 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 3080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500500.9 mean_steps=12.7
|
|
[Episode 3090] reward=-44432909.8 actor_loss=0.0465 critic_loss=112806987217.4545 entropy=6.2002 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 3100] reward=-49955319.0 actor_loss=0.0352 critic_loss=119212648448.0000 entropy=6.2153 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 3100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451779.6 mean_steps=13.5
|
|
[Episode 3110] reward=-53687266.1 actor_loss=0.0745 critic_loss=119840503125.3333 entropy=6.2194 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 3120] reward=-47239463.0 actor_loss=0.0408 critic_loss=115461510940.4444 entropy=6.2199 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 3120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-706909.9 mean_steps=12.0
|
|
[Episode 3130] reward=-45340952.7 actor_loss=0.0817 critic_loss=111965580083.2000 entropy=6.2208 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 3140] reward=-54171853.7 actor_loss=0.0488 critic_loss=119635074108.2353 entropy=6.2306 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 3140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422297.6 mean_steps=13.8
|
|
[Episode 3150] reward=-52882577.4 actor_loss=0.0506 critic_loss=122958419606.5882 entropy=6.2386 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 3160] reward=-58595472.1 actor_loss=0.0750 critic_loss=124771749888.0000 entropy=6.2433 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 3160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566857.6 mean_steps=11.8
|
|
[Episode 3170] reward=-58520014.6 actor_loss=0.0732 critic_loss=125803616814.5455 entropy=6.2419 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 3180] reward=-63182665.3 actor_loss=0.0844 critic_loss=126668632436.3636 entropy=6.2501 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 3180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583110.8 mean_steps=11.9
|
|
[Episode 3190] reward=-47503797.4 actor_loss=0.0655 critic_loss=118081571328.0000 entropy=6.2628 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 3200] reward=-39780198.1 actor_loss=0.0483 critic_loss=111429755426.1333 entropy=6.2696 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 3200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536027.0 mean_steps=12.2
|
|
[Episode 3210] reward=-44803312.8 actor_loss=0.0614 critic_loss=113266548105.8462 entropy=6.2777 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 3220] reward=-53544714.6 actor_loss=0.0689 critic_loss=115138734762.6667 entropy=6.2810 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 3220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490675.0 mean_steps=13.3
|
|
[Episode 3230] reward=-57258081.4 actor_loss=0.0803 critic_loss=129585683660.8000 entropy=6.2891 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 3240] reward=-50050833.8 actor_loss=0.0577 critic_loss=116924976332.8000 entropy=6.2924 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 3240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-565876.1 mean_steps=11.1
|
|
[Episode 3250] reward=-39653842.2 actor_loss=0.0321 critic_loss=108300176952.8889 entropy=6.2907 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Episode 3260] reward=-46425590.7 actor_loss=0.0359 critic_loss=117261505194.6667 entropy=6.2959 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 3260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492071.2 mean_steps=12.8
|
|
[Episode 3270] reward=-39691587.7 actor_loss=0.0431 critic_loss=114108441746.2857 entropy=6.3051 approx_kl=0.0123 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Episode 3280] reward=-45019735.9 actor_loss=0.0382 critic_loss=112090240887.4667 entropy=6.3032 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 3280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555408.2 mean_steps=11.7
|
|
[Episode 3290] reward=-44265856.6 actor_loss=0.0458 critic_loss=112551222385.7778 entropy=6.3139 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 3300] reward=-52319315.0 actor_loss=0.0614 critic_loss=118050071893.3333 entropy=6.3250 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 3300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430229.9 mean_steps=13.9
|
|
[Episode 3310] reward=-53636155.0 actor_loss=0.0872 critic_loss=121581920938.6667 entropy=6.3350 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 3320] reward=-56658939.0 actor_loss=0.0614 critic_loss=118791911424.0000 entropy=6.3427 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 3320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397291.8 mean_steps=13.8
|
|
[Episode 3330] reward=-56068302.6 actor_loss=0.0804 critic_loss=121975876096.0000 entropy=6.3473 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 3340] reward=-60021763.4 actor_loss=0.0606 critic_loss=125208241421.4737 entropy=6.3540 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 3340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504942.4 mean_steps=12.9
|
|
[Episode 3350] reward=-30047687.8 actor_loss=0.0218 critic_loss=106673061456.8421 entropy=6.3643 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0521 front_blocked=0
|
|
[Episode 3360] reward=-45272032.6 actor_loss=0.0566 critic_loss=116658438963.2000 entropy=6.3669 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 3360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-405442.1 mean_steps=13.1
|
|
[Episode 3370] reward=-50814864.8 actor_loss=0.0592 critic_loss=118146717988.5714 entropy=6.3724 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 3380] reward=-53324090.8 actor_loss=0.0492 critic_loss=125187967051.8519 entropy=6.3828 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 3380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489677.2 mean_steps=13.5
|
|
[Episode 3390] reward=-52388861.9 actor_loss=0.0487 critic_loss=122621918916.9231 entropy=6.3787 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 3400] reward=-55278852.5 actor_loss=0.0650 critic_loss=125393751691.6364 entropy=6.3820 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 3400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455186.6 mean_steps=13.6
|
|
[Episode 3410] reward=-45391066.6 actor_loss=0.0543 critic_loss=111116019712.0000 entropy=6.3895 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 3420] reward=-36785577.9 actor_loss=0.0509 critic_loss=107463867703.6522 entropy=6.3920 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 3420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-560799.0 mean_steps=10.7
|
|
[Episode 3430] reward=-47722724.3 actor_loss=0.0478 critic_loss=118618392712.5333 entropy=6.3953 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 3440] reward=-46892003.7 actor_loss=0.0544 critic_loss=115012109458.2857 entropy=6.4062 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 3440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463152.8 mean_steps=12.6
|
|
[Episode 3450] reward=-47043768.3 actor_loss=0.0613 critic_loss=110451526860.8000 entropy=6.4166 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 3460] reward=-51579551.2 actor_loss=0.0721 critic_loss=119119307532.1905 entropy=6.4131 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 3460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-407085.5 mean_steps=13.8
|
|
[Episode 3470] reward=-55085790.1 actor_loss=0.0775 critic_loss=118211030317.1765 entropy=6.4172 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 3480] reward=-55076678.0 actor_loss=0.0946 critic_loss=118782485740.3077 entropy=6.4256 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 3480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441059.5 mean_steps=13.9
|
|
[Episode 3490] reward=-54489017.5 actor_loss=0.0597 critic_loss=119989294421.3333 entropy=6.4266 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 3500] reward=-53531791.4 actor_loss=0.0669 critic_loss=117888044165.5652 entropy=6.4358 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 3500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-406010.9 mean_steps=14.7
|
|
[Episode 3510] reward=-48491302.9 actor_loss=0.0456 critic_loss=117640113590.8571 entropy=6.4471 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 3520] reward=-44532695.1 actor_loss=0.0735 critic_loss=116609380625.0667 entropy=6.4537 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 3520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439807.3 mean_steps=13.1
|
|
[Episode 3530] reward=-38767625.8 actor_loss=0.0591 critic_loss=109951504624.9412 entropy=6.4574 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Episode 3540] reward=-43278006.9 actor_loss=0.0515 critic_loss=110960192625.7778 entropy=6.4630 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 3540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621937.4 mean_steps=11.3
|
|
[Episode 3550] reward=-47163371.4 actor_loss=0.0446 critic_loss=118743236280.3200 entropy=6.4696 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 3560] reward=-40910715.5 actor_loss=0.0485 critic_loss=106206169533.2174 entropy=6.4702 approx_kl=0.0104 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 3560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-374930.8 mean_steps=12.9
|
|
[Episode 3570] reward=-37570919.2 actor_loss=0.0487 critic_loss=108285955276.8000 entropy=6.4721 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Episode 3580] reward=-51085378.0 actor_loss=0.0629 critic_loss=121155525563.7333 entropy=6.4832 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 3580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559277.2 mean_steps=12.7
|
|
[Episode 3590] reward=-52502513.6 actor_loss=0.0469 critic_loss=116523186333.5385 entropy=6.4854 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 3600] reward=-55826441.5 actor_loss=0.0758 critic_loss=122674518425.6000 entropy=6.4893 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 3600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449022.8 mean_steps=13.9
|
|
[Episode 3610] reward=-38125856.7 actor_loss=0.0506 critic_loss=106722656687.1579 entropy=6.4850 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Episode 3620] reward=-38073844.7 actor_loss=0.0349 critic_loss=108530830713.2632 entropy=6.4888 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 3620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-472334.7 mean_steps=11.7
|
|
[Episode 3630] reward=-42618015.0 actor_loss=0.0347 critic_loss=112175652408.8889 entropy=6.4963 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 3640] reward=-43191742.8 actor_loss=0.0651 critic_loss=109830565888.0000 entropy=6.5064 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 3640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-495632.5 mean_steps=11.8
|
|
[Episode 3650] reward=-47383466.5 actor_loss=0.0705 critic_loss=117081673081.2632 entropy=6.5045 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 3660] reward=-39649207.6 actor_loss=0.0675 critic_loss=107620273005.7143 entropy=6.5074 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 3660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-648225.4 mean_steps=12.1
|
|
[Episode 3670] reward=-49043173.7 actor_loss=0.0529 critic_loss=118168740352.0000 entropy=6.5059 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 3680] reward=-52227968.8 actor_loss=0.0729 critic_loss=116476282321.4545 entropy=6.5165 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 3680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503613.1 mean_steps=12.1
|
|
[Episode 3690] reward=-40531573.9 actor_loss=0.0611 critic_loss=107519662957.7143 entropy=6.5299 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 3700] reward=-50688328.8 actor_loss=0.0538 critic_loss=119736137591.4667 entropy=6.5331 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 3700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-468087.2 mean_steps=11.8
|
|
[Episode 3710] reward=-47653677.3 actor_loss=0.0651 critic_loss=110664052053.3333 entropy=6.5458 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 3720] reward=-43424391.0 actor_loss=0.0483 critic_loss=113457469597.5385 entropy=6.5473 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 3720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408694.7 mean_steps=13.6
|
|
[Episode 3730] reward=-44518119.2 actor_loss=0.0495 critic_loss=114496756035.3684 entropy=6.5445 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 3740] reward=-48647381.8 actor_loss=0.0351 critic_loss=115209555057.7778 entropy=6.5419 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 3740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512574.1 mean_steps=12.7
|
|
[Episode 3750] reward=-48731138.5 actor_loss=0.0747 critic_loss=113443507501.1765 entropy=6.5431 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 3760] reward=-40349808.6 actor_loss=0.0458 critic_loss=108165383899.4286 entropy=6.5486 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 3760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-551995.5 mean_steps=13.0
|
|
[Episode 3770] reward=-42026972.9 actor_loss=0.0578 critic_loss=109682387051.7895 entropy=6.5550 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Episode 3780] reward=-35283051.7 actor_loss=0.0580 critic_loss=103627273011.2000 entropy=6.5579 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 3780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468555.3 mean_steps=13.4
|
|
[Episode 3790] reward=-46669945.1 actor_loss=0.0668 critic_loss=113860705757.8667 entropy=6.5597 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 3800] reward=-55080020.4 actor_loss=0.0844 critic_loss=120753011097.6000 entropy=6.5545 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 3800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396893.5 mean_steps=13.7
|
|
[Episode 3810] reward=-46921455.6 actor_loss=0.0366 critic_loss=116603726116.5714 entropy=6.5586 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 3820] reward=-39506151.9 actor_loss=0.0559 critic_loss=108769107968.0000 entropy=6.5607 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 3820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419811.4 mean_steps=13.7
|
|
[Episode 3830] reward=-43703574.6 actor_loss=0.0620 critic_loss=107028484681.1429 entropy=6.5628 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 3840] reward=-57932342.5 actor_loss=0.0724 critic_loss=119913337514.6667 entropy=6.5718 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 3840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534117.1 mean_steps=12.9
|
|
[Episode 3850] reward=-33807444.5 actor_loss=0.0621 critic_loss=103986128896.0000 entropy=6.5731 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 3860] reward=-46378847.9 actor_loss=0.0565 critic_loss=113717480789.3333 entropy=6.5741 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 3860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575936.5 mean_steps=11.6
|
|
[Episode 3870] reward=-39406949.0 actor_loss=0.0325 critic_loss=105990636001.8824 entropy=6.5842 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 3880] reward=-43760499.1 actor_loss=0.0607 critic_loss=108670870089.1429 entropy=6.5825 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 3880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507901.2 mean_steps=12.8
|
|
[Episode 3890] reward=-50642926.0 actor_loss=0.0478 critic_loss=117613506560.0000 entropy=6.5799 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 3900] reward=-55541769.0 actor_loss=0.0865 critic_loss=119852528338.8235 entropy=6.5951 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 3900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468441.7 mean_steps=12.7
|
|
[Episode 3910] reward=-37368241.8 actor_loss=0.0392 critic_loss=104683205973.3333 entropy=6.6019 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 3920] reward=-47189126.6 actor_loss=0.0519 critic_loss=114136229010.2857 entropy=6.6041 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 3920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523102.6 mean_steps=12.8
|
|
[Episode 3930] reward=-47220996.9 actor_loss=0.0669 critic_loss=112632700928.0000 entropy=6.6123 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 3940] reward=-51541338.3 actor_loss=0.0625 critic_loss=117208715264.0000 entropy=6.6158 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 3940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-476620.4 mean_steps=13.9
|
|
[Episode 3950] reward=-52922633.7 actor_loss=0.0516 critic_loss=120994297856.0000 entropy=6.6179 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 3960] reward=-37885440.4 actor_loss=0.0872 critic_loss=106278359203.8400 entropy=6.6232 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 3960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-650357.5 mean_steps=11.6
|
|
[Episode 3970] reward=-43844985.5 actor_loss=0.0604 critic_loss=109098337894.4000 entropy=6.6413 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 3980] reward=-46751363.7 actor_loss=0.0478 critic_loss=113037412352.0000 entropy=6.6475 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 3980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512770.0 mean_steps=12.9
|
|
[Episode 3990] reward=-49959194.8 actor_loss=0.0685 critic_loss=112287791396.5714 entropy=6.6488 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 4000] reward=-53709250.6 actor_loss=0.0757 critic_loss=121838348846.5455 entropy=6.6645 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 4000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540568.7 mean_steps=12.1
|
|
[Episode 4010] reward=-44387063.7 actor_loss=0.0407 critic_loss=108351199555.3684 entropy=6.6752 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 4020] reward=-45058785.7 actor_loss=0.0514 critic_loss=112349548859.0769 entropy=6.6885 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 4020] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-616723.2 mean_steps=10.6
|
|
[Episode 4030] reward=-42213487.0 actor_loss=0.0498 critic_loss=109363184579.7647 entropy=6.6983 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 4040] reward=-38333612.3 actor_loss=0.0523 critic_loss=106957138602.6667 entropy=6.7041 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 4040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503556.1 mean_steps=13.8
|
|
[Episode 4050] reward=-49221765.7 actor_loss=0.0580 critic_loss=115933027328.0000 entropy=6.7146 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 4060] reward=-53088471.7 actor_loss=0.0599 critic_loss=118829641081.2632 entropy=6.7159 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 4060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472886.7 mean_steps=12.6
|
|
[Episode 4070] reward=-42815371.0 actor_loss=0.0413 critic_loss=110857685869.7143 entropy=6.7205 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 4080] reward=-49537513.4 actor_loss=0.0526 critic_loss=113138430658.2069 entropy=6.7161 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 4080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523805.1 mean_steps=12.9
|
|
[Episode 4090] reward=-56210640.0 actor_loss=0.0556 critic_loss=121238352457.1429 entropy=6.7187 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 4100] reward=-41383426.5 actor_loss=0.0539 critic_loss=109131494022.7368 entropy=6.7228 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 4100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551151.8 mean_steps=11.7
|
|
[Episode 4110] reward=-42339553.0 actor_loss=0.0777 critic_loss=110352281873.0667 entropy=6.7284 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 4120] reward=-50658983.6 actor_loss=0.0791 critic_loss=119236039601.2308 entropy=6.7328 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 4120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483880.5 mean_steps=14.6
|
|
[Episode 4130] reward=-45197143.6 actor_loss=0.0620 critic_loss=113585163972.9231 entropy=6.7345 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 4140] reward=-47814893.2 actor_loss=0.0641 critic_loss=110936360082.2857 entropy=6.7339 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 4140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484248.1 mean_steps=13.4
|
|
[Episode 4150] reward=-50353608.5 actor_loss=0.0674 critic_loss=117073419195.7333 entropy=6.7302 approx_kl=0.0098 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 4160] reward=-42011724.6 actor_loss=0.0648 critic_loss=108140103559.5294 entropy=6.7335 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 4160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-647441.8 mean_steps=11.4
|
|
[Episode 4170] reward=-48113379.2 actor_loss=0.0623 critic_loss=115764756480.0000 entropy=6.7416 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 4180] reward=-35535685.7 actor_loss=0.0550 critic_loss=104389282762.1053 entropy=6.7566 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 4180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-419661.6 mean_steps=14.8
|
|
[Episode 4190] reward=-47115924.1 actor_loss=0.0623 critic_loss=108382135235.7647 entropy=6.7659 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 4200] reward=-43627240.6 actor_loss=0.0714 critic_loss=111737792512.0000 entropy=6.7686 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Eval 4200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-343381.2 mean_steps=14.2
|
|
[Episode 4210] reward=-35767871.8 actor_loss=0.0659 critic_loss=107860114195.6923 entropy=6.7880 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 4220] reward=-35426610.6 actor_loss=0.0493 critic_loss=105116961698.9091 entropy=6.8000 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 4220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466883.9 mean_steps=13.4
|
|
[Episode 4230] reward=-45468607.9 actor_loss=0.0305 critic_loss=109137656490.6667 entropy=6.7981 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 4240] reward=-40616501.5 actor_loss=0.0598 critic_loss=111290064289.1852 entropy=6.8071 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 4240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567643.6 mean_steps=12.8
|
|
[Episode 4250] reward=-36700406.1 actor_loss=0.0456 critic_loss=105115013802.6667 entropy=6.8108 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 4260] reward=-52950249.1 actor_loss=0.0955 critic_loss=119238440870.9565 entropy=6.8195 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 4260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487673.0 mean_steps=13.8
|
|
[Episode 4270] reward=-54353672.5 actor_loss=0.0660 critic_loss=119922094614.2609 entropy=6.8245 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 4280] reward=-62926198.2 actor_loss=0.0933 critic_loss=119597833808.8421 entropy=6.8344 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 4280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447973.1 mean_steps=13.2
|
|
[Episode 4290] reward=-53266139.3 actor_loss=0.0577 critic_loss=124060435742.7200 entropy=6.8346 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 4300] reward=-49594766.3 actor_loss=0.0632 critic_loss=116236689920.0000 entropy=6.8468 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 4300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-602988.0 mean_steps=12.9
|
|
[Episode 4310] reward=-48151549.2 actor_loss=0.0574 critic_loss=111418559692.8000 entropy=6.8665 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 4320] reward=-34261136.2 actor_loss=0.0521 critic_loss=102432366110.1176 entropy=6.8711 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Eval 4320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422134.7 mean_steps=14.2
|
|
[Episode 4330] reward=-39933161.9 actor_loss=0.0379 critic_loss=106697819204.2667 entropy=6.8771 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 4340] reward=-45743488.8 actor_loss=0.0630 critic_loss=107685738359.4667 entropy=6.8891 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 4340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-369061.3 mean_steps=15.3
|
|
[Episode 4350] reward=-39905560.6 actor_loss=0.0397 critic_loss=109834388626.2857 entropy=6.8931 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 4360] reward=-51576122.4 actor_loss=0.0527 critic_loss=114772284757.3333 entropy=6.9035 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 4360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617072.3 mean_steps=11.2
|
|
[Episode 4370] reward=-43625276.5 actor_loss=0.0502 critic_loss=104789771150.2222 entropy=6.9032 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 4380] reward=-51230753.4 actor_loss=0.0716 critic_loss=112499210397.5385 entropy=6.9083 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 4380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555223.4 mean_steps=12.8
|
|
[Episode 4390] reward=-33107260.2 actor_loss=0.0361 critic_loss=94135362280.7273 entropy=6.9241 approx_kl=0.0116 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 4400] reward=-40362964.9 actor_loss=0.0417 critic_loss=108512310452.7059 entropy=6.9357 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 4400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-498924.3 mean_steps=12.3
|
|
[Episode 4410] reward=-48610307.2 actor_loss=0.0708 critic_loss=108013010944.0000 entropy=6.9371 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 4420] reward=-49190934.2 actor_loss=0.0584 critic_loss=111537297817.6000 entropy=6.9402 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 4420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490586.7 mean_steps=12.7
|
|
[Episode 4430] reward=-47968519.7 actor_loss=0.0619 critic_loss=110763967692.8000 entropy=6.9350 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 4440] reward=-51020309.0 actor_loss=0.0653 critic_loss=114742361721.9048 entropy=6.9239 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 4440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-598786.1 mean_steps=12.9
|
|
[Episode 4450] reward=-45077886.7 actor_loss=0.0724 critic_loss=105649903686.6207 entropy=6.9238 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 4460] reward=-48743223.3 actor_loss=0.0814 critic_loss=109355436243.8621 entropy=6.9189 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 4460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500738.7 mean_steps=12.3
|
|
[Episode 4470] reward=-42409111.4 actor_loss=0.0745 critic_loss=104589674788.5714 entropy=6.9248 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 4480] reward=-45604314.0 actor_loss=0.0467 critic_loss=108510286714.4348 entropy=6.9329 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 4480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365616.9 mean_steps=14.1
|
|
[Episode 4490] reward=-32242113.1 actor_loss=0.0378 critic_loss=97669653065.1429 entropy=6.9468 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 4500] reward=-44378022.1 actor_loss=0.0474 critic_loss=104245972992.0000 entropy=6.9445 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Eval 4500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369279.7 mean_steps=14.2
|
|
[Episode 4510] reward=-42995482.2 actor_loss=0.0624 critic_loss=107261352345.6000 entropy=6.9569 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 4520] reward=-47714317.8 actor_loss=0.0587 critic_loss=112488953054.6087 entropy=6.9494 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 4520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-301562.8 mean_steps=14.5
|
|
[Episode 4530] reward=-44500955.4 actor_loss=0.0506 critic_loss=110204878848.0000 entropy=6.9411 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 4540] reward=-39777639.7 actor_loss=0.0354 critic_loss=103692861440.0000 entropy=6.9513 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 4540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436631.4 mean_steps=13.4
|
|
[Episode 4550] reward=-38551408.7 actor_loss=0.0536 critic_loss=103484999387.4286 entropy=6.9612 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 4560] reward=-42673608.0 actor_loss=0.0574 critic_loss=101070948059.4286 entropy=6.9569 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 4560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-405237.9 mean_steps=13.4
|
|
[Episode 4570] reward=-40216837.6 actor_loss=0.0217 critic_loss=103328939212.8000 entropy=6.9604 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Episode 4580] reward=-31179940.0 actor_loss=0.0474 critic_loss=96695013691.0769 entropy=6.9711 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 4580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486856.2 mean_steps=12.8
|
|
[Episode 4590] reward=-37336908.9 actor_loss=0.0598 critic_loss=103335691878.4000 entropy=6.9670 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 4600] reward=-41324927.2 actor_loss=0.0620 critic_loss=98337588292.2667 entropy=6.9684 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 4600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632924.4 mean_steps=11.6
|
|
[Episode 4610] reward=-43231772.3 actor_loss=0.0506 critic_loss=106433925939.2000 entropy=6.9725 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 4620] reward=-42871138.9 actor_loss=0.0337 critic_loss=102757924233.8462 entropy=6.9776 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Eval 4620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570131.4 mean_steps=11.7
|
|
[Episode 4630] reward=-50863596.8 actor_loss=0.0634 critic_loss=111727428015.1579 entropy=6.9854 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 4640] reward=-50876695.4 actor_loss=0.0533 critic_loss=111613432048.9412 entropy=6.9904 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 4640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481505.8 mean_steps=13.3
|
|
[Episode 4650] reward=-48798128.6 actor_loss=0.0866 critic_loss=114037655931.2593 entropy=7.0016 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 4660] reward=-47721023.8 actor_loss=0.0457 critic_loss=111362887224.8889 entropy=7.0154 approx_kl=0.0108 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 4660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562780.7 mean_steps=12.2
|
|
[Episode 4670] reward=-39301069.0 actor_loss=0.0411 critic_loss=101095947059.2000 entropy=7.0268 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 4680] reward=-41532639.7 actor_loss=0.0460 critic_loss=103081277440.0000 entropy=7.0232 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Eval 4680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-541507.0 mean_steps=10.8
|
|
[Episode 4690] reward=-40666992.2 actor_loss=0.0219 critic_loss=106022557816.4706 entropy=7.0342 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 4700] reward=-46154559.2 actor_loss=0.0590 critic_loss=105462778958.7692 entropy=7.0361 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 4700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562639.2 mean_steps=12.6
|
|
[Episode 4710] reward=-53369454.1 actor_loss=0.0715 critic_loss=112623501668.1739 entropy=7.0376 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 4720] reward=-41053036.6 actor_loss=0.0346 critic_loss=104024635904.0000 entropy=7.0572 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 4720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455268.6 mean_steps=14.1
|
|
[Episode 4730] reward=-49608822.2 actor_loss=0.0391 critic_loss=107120768236.3077 entropy=7.0613 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 4740] reward=-47438341.4 actor_loss=0.0463 critic_loss=109408336359.6190 entropy=7.0616 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 4740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478428.8 mean_steps=12.6
|
|
[Episode 4750] reward=-52289509.1 actor_loss=0.0551 critic_loss=116986576440.8889 entropy=7.0673 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 4760] reward=-48853762.2 actor_loss=0.0841 critic_loss=106706575990.1538 entropy=7.0606 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 4760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531155.4 mean_steps=12.9
|
|
[Episode 4770] reward=-52065369.5 actor_loss=0.0864 critic_loss=108553281863.6800 entropy=7.0607 approx_kl=0.0108 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 4780] reward=-31800009.4 actor_loss=0.0089 critic_loss=94175110333.6296 entropy=7.0724 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0482 front_blocked=0
|
|
[Eval 4780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603480.0 mean_steps=11.0
|
|
[Episode 4790] reward=-41037827.2 actor_loss=0.0610 critic_loss=108488524322.1333 entropy=7.0742 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 4800] reward=-38625805.1 actor_loss=0.0532 critic_loss=103368153380.5714 entropy=7.0800 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 4800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509118.8 mean_steps=12.9
|
|
[Episode 4810] reward=-42656875.0 actor_loss=0.0528 critic_loss=102170587136.0000 entropy=7.0790 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 4820] reward=-54028578.6 actor_loss=0.0525 critic_loss=110746498295.1724 entropy=7.0872 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 4820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535382.0 mean_steps=12.3
|
|
[Episode 4830] reward=-42485338.6 actor_loss=0.0570 critic_loss=105365489436.4444 entropy=7.0889 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Episode 4840] reward=-43357588.3 actor_loss=0.0582 critic_loss=109607101379.7647 entropy=7.0975 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 4840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437278.3 mean_steps=13.2
|
|
[Episode 4850] reward=-27094335.5 actor_loss=0.0279 critic_loss=90305624064.0000 entropy=7.0977 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 4860] reward=-37297504.3 actor_loss=0.0487 critic_loss=103284900750.2222 entropy=7.1076 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 4860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562930.0 mean_steps=12.4
|
|
[Episode 4870] reward=-33578474.4 actor_loss=0.0364 critic_loss=98578617230.2222 entropy=7.1075 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 4880] reward=-37074771.2 actor_loss=0.0417 critic_loss=105269533809.7778 entropy=7.1054 approx_kl=0.0095 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Eval 4880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427917.1 mean_steps=13.9
|
|
[Episode 4890] reward=-47733825.2 actor_loss=0.0484 critic_loss=110619021627.0769 entropy=7.1071 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 4900] reward=-32527618.8 actor_loss=0.0424 critic_loss=90206701056.0000 entropy=7.1100 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 4900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-647884.5 mean_steps=11.3
|
|
[Episode 4910] reward=-36115541.6 actor_loss=0.0158 critic_loss=92927711524.5714 entropy=7.1135 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 4920] reward=-33663295.9 actor_loss=0.0341 critic_loss=97092248462.2222 entropy=7.1149 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 4920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424140.0 mean_steps=13.9
|
|
[Episode 4930] reward=-50912181.6 actor_loss=0.0485 critic_loss=110691682986.6667 entropy=7.1291 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 4940] reward=-53761568.0 actor_loss=0.0493 critic_loss=116669887186.8235 entropy=7.1319 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 4940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-521809.4 mean_steps=11.2
|
|
[Episode 4950] reward=-42104579.1 actor_loss=0.0406 critic_loss=103033010176.0000 entropy=7.1359 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 4960] reward=-46975734.3 actor_loss=0.0506 critic_loss=107996433066.6667 entropy=7.1396 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 4960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-581174.9 mean_steps=11.1
|
|
[Episode 4970] reward=-37881497.1 actor_loss=0.0745 critic_loss=103854886696.4211 entropy=7.1397 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 4980] reward=-41649697.1 actor_loss=0.0709 critic_loss=100195890062.2222 entropy=7.1441 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 4980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-438632.8 mean_steps=14.4
|
|
[Episode 4990] reward=-49616597.7 actor_loss=0.0633 critic_loss=110235391317.3333 entropy=7.1417 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Episode 5000] reward=-24195112.7 actor_loss=0.0098 critic_loss=82937879665.7778 entropy=7.1445 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Eval 5000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-353932.1 mean_steps=14.3
|
|
[Episode 5010] reward=-52470136.7 actor_loss=0.0580 critic_loss=113400730322.8235 entropy=7.1471 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 5020] reward=-53669501.3 actor_loss=0.0599 critic_loss=113673422060.3077 entropy=7.1567 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 5020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613530.4 mean_steps=12.0
|
|
[Episode 5030] reward=-43084497.7 actor_loss=0.0399 critic_loss=111012861440.0000 entropy=7.1634 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 5040] reward=-46336948.0 actor_loss=0.0699 critic_loss=104315275450.1818 entropy=7.1583 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 5040] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-403494.8 mean_steps=15.3
|
|
[Episode 5050] reward=-45390790.9 actor_loss=0.0538 critic_loss=106508459212.8000 entropy=7.1635 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 5060] reward=-47981525.6 actor_loss=0.0513 critic_loss=113698722767.2381 entropy=7.1656 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 5060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530997.7 mean_steps=12.3
|
|
[Episode 5070] reward=-34881704.7 actor_loss=0.0655 critic_loss=96843842981.6471 entropy=7.1613 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 5080] reward=-45048800.2 actor_loss=0.0219 critic_loss=102706065723.0769 entropy=7.1673 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 5080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351909.1 mean_steps=14.1
|
|
[Episode 5090] reward=-41422994.0 actor_loss=0.0371 critic_loss=103941085184.0000 entropy=7.1694 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 5100] reward=-28400700.1 actor_loss=-0.0119 critic_loss=93723149365.8947 entropy=7.1713 approx_kl=0.0109 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 5100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-374246.6 mean_steps=14.6
|
|
[Episode 5110] reward=-44442230.4 actor_loss=0.0375 critic_loss=111703702291.6923 entropy=7.1761 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 5120] reward=-37664208.1 actor_loss=0.0540 critic_loss=92623304021.3333 entropy=7.1830 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 5120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395472.1 mean_steps=14.3
|
|
[Episode 5130] reward=-51521284.7 actor_loss=0.0493 critic_loss=109921604754.2857 entropy=7.1964 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Episode 5140] reward=-39754838.6 actor_loss=0.0734 critic_loss=100080946959.0588 entropy=7.2096 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 5140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560486.0 mean_steps=11.8
|
|
[Episode 5150] reward=-55133575.7 actor_loss=0.0827 critic_loss=114629162449.4545 entropy=7.2191 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 5160] reward=-44471966.7 actor_loss=0.0461 critic_loss=106819597863.3846 entropy=7.2202 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 5160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581084.7 mean_steps=11.9
|
|
[Episode 5170] reward=-40314543.3 actor_loss=0.0715 critic_loss=94880406459.7333 entropy=7.2217 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Episode 5180] reward=-47431207.7 actor_loss=0.0561 critic_loss=105966719622.7368 entropy=7.2412 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 5180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572714.9 mean_steps=11.7
|
|
[Episode 5190] reward=-38703383.6 actor_loss=0.0212 critic_loss=106933025996.8000 entropy=7.2478 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 5200] reward=-36425886.1 actor_loss=0.0386 critic_loss=100874571190.8571 entropy=7.2405 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Eval 5200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-654236.7 mean_steps=11.2
|
|
[Episode 5210] reward=-34651024.1 actor_loss=0.0434 critic_loss=93728343381.3333 entropy=7.2421 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 5220] reward=-39970723.8 actor_loss=0.0482 critic_loss=96602999229.2174 entropy=7.2496 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 5220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496123.3 mean_steps=13.1
|
|
[Episode 5230] reward=-28550601.6 actor_loss=0.0202 critic_loss=86262567634.8235 entropy=7.2572 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Episode 5240] reward=-42240233.3 actor_loss=0.0557 critic_loss=102852902502.4000 entropy=7.2587 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 5240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-418599.3 mean_steps=14.9
|
|
[Episode 5250] reward=-28995135.6 actor_loss=0.0248 critic_loss=93214982690.1333 entropy=7.2603 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 5260] reward=-32412425.5 actor_loss=0.0392 critic_loss=87690437518.2222 entropy=7.2580 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 5260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501106.3 mean_steps=13.7
|
|
[Episode 5270] reward=-48930207.3 actor_loss=0.0588 critic_loss=109211966259.2000 entropy=7.2569 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 5280] reward=-36582920.0 actor_loss=0.0313 critic_loss=93824345156.2667 entropy=7.2714 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Eval 5280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-431952.3 mean_steps=11.8
|
|
[Episode 5290] reward=-38619424.4 actor_loss=0.0345 critic_loss=100358931549.0909 entropy=7.2677 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 5300] reward=-47428037.1 actor_loss=0.0474 critic_loss=110657114646.2609 entropy=7.2621 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 5300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441754.1 mean_steps=13.8
|
|
[Episode 5310] reward=-40905313.7 actor_loss=0.0536 critic_loss=101354352399.0588 entropy=7.2585 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Episode 5320] reward=-34589554.4 actor_loss=0.0310 critic_loss=92346605999.1579 entropy=7.2715 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 5320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546852.4 mean_steps=12.3
|
|
[Episode 5330] reward=-38994966.3 actor_loss=0.0452 critic_loss=99748695153.7778 entropy=7.2729 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 5340] reward=-45762154.2 actor_loss=0.0490 critic_loss=102257220371.6923 entropy=7.2812 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 5340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447130.5 mean_steps=13.8
|
|
[Episode 5350] reward=-34969189.2 actor_loss=0.0489 critic_loss=94539388928.0000 entropy=7.2858 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 5360] reward=-35713794.3 actor_loss=0.0432 critic_loss=92964915882.6667 entropy=7.2949 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 5360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492427.5 mean_steps=12.9
|
|
[Episode 5370] reward=-38208026.5 actor_loss=0.0429 critic_loss=97194360832.0000 entropy=7.2993 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Episode 5380] reward=-35542700.7 actor_loss=0.0018 critic_loss=94836883456.0000 entropy=7.2922 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Eval 5380] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-256965.3 mean_steps=14.9
|
|
[Episode 5390] reward=-43106893.3 actor_loss=0.0661 critic_loss=102018261736.7273 entropy=7.2923 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 5400] reward=-46870739.4 actor_loss=0.0595 critic_loss=107973328310.8571 entropy=7.2943 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 5400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412554.4 mean_steps=13.8
|
|
[Episode 5410] reward=-28962234.5 actor_loss=0.0065 critic_loss=87302653831.5294 entropy=7.2953 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 5420] reward=-41106797.2 actor_loss=0.0246 critic_loss=103138975744.0000 entropy=7.3094 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 5420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-303470.5 mean_steps=14.3
|
|
[Episode 5430] reward=-41501450.6 actor_loss=0.0296 critic_loss=99599814009.2632 entropy=7.3096 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 5440] reward=-40926784.0 actor_loss=0.0588 critic_loss=92734442373.1200 entropy=7.3132 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 5440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452337.7 mean_steps=12.7
|
|
[Episode 5450] reward=-36032444.0 actor_loss=0.0284 critic_loss=96226223445.3333 entropy=7.3155 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 5460] reward=-42149313.6 actor_loss=0.0545 critic_loss=97507800726.5882 entropy=7.3313 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Eval 5460] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-265802.5 mean_steps=15.7
|
|
[Episode 5470] reward=-46138133.0 actor_loss=0.0570 critic_loss=97107415040.0000 entropy=7.3401 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 5480] reward=-46484353.1 actor_loss=0.0410 critic_loss=104735621510.0952 entropy=7.3457 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 5480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508937.3 mean_steps=12.8
|
|
[Episode 5490] reward=-42529870.9 actor_loss=0.0646 critic_loss=101725604717.7143 entropy=7.3451 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 5500] reward=-35906228.8 actor_loss=0.0314 critic_loss=96048997888.0000 entropy=7.3632 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Eval 5500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491678.6 mean_steps=12.8
|
|
[Episode 5510] reward=-32750329.4 actor_loss=0.0188 critic_loss=93875954145.8824 entropy=7.3566 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 5520] reward=-44693498.6 actor_loss=0.0918 critic_loss=100517579044.5714 entropy=7.3549 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 5520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465890.8 mean_steps=12.7
|
|
[Episode 5530] reward=-41157960.6 actor_loss=0.0159 critic_loss=100909000996.5714 entropy=7.3615 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 5540] reward=-35588704.6 actor_loss=0.0455 critic_loss=93520530733.1765 entropy=7.3685 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 5540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472536.8 mean_steps=13.2
|
|
[Episode 5550] reward=-31635240.3 actor_loss=0.0373 critic_loss=86412585642.6667 entropy=7.3647 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 5560] reward=-40463029.2 actor_loss=0.0470 critic_loss=101927394304.0000 entropy=7.3605 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 5560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566278.0 mean_steps=12.0
|
|
[Episode 5570] reward=-29221900.6 actor_loss=0.0189 critic_loss=86750496475.4286 entropy=7.3668 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Episode 5580] reward=-37788733.0 actor_loss=0.0427 critic_loss=97602881024.0000 entropy=7.3707 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Eval 5580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476192.1 mean_steps=12.9
|
|
[Episode 5590] reward=-43827086.8 actor_loss=0.0490 critic_loss=100631571456.0000 entropy=7.3770 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Episode 5600] reward=-44929289.8 actor_loss=0.0244 critic_loss=96467804790.1538 entropy=7.3864 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Eval 5600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542701.7 mean_steps=13.1
|
|
[Episode 5610] reward=-23071356.5 actor_loss=0.0214 critic_loss=87266569898.6667 entropy=7.3946 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 5620] reward=-41671791.7 actor_loss=0.0643 critic_loss=99930396672.0000 entropy=7.3878 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Eval 5620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481594.4 mean_steps=13.9
|
|
[Episode 5630] reward=-39201721.1 actor_loss=0.0496 critic_loss=101285902677.3333 entropy=7.3894 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 5640] reward=-30542045.9 actor_loss=0.0612 critic_loss=89066162283.7895 entropy=7.3966 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Eval 5640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479792.2 mean_steps=12.6
|
|
[Episode 5650] reward=-40680090.6 actor_loss=0.0454 critic_loss=102314895496.5333 entropy=7.4085 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Episode 5660] reward=-29940841.3 actor_loss=-0.0083 critic_loss=87772470923.6364 entropy=7.4108 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0430 front_blocked=0
|
|
[Eval 5660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577277.9 mean_steps=12.0
|
|
[Episode 5670] reward=-29189498.1 actor_loss=0.0218 critic_loss=87419688773.8182 entropy=7.4206 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Episode 5680] reward=-48495525.0 actor_loss=0.0633 critic_loss=101038013952.0000 entropy=7.4137 approx_kl=0.0101 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 5680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-428554.6 mean_steps=14.4
|
|
[Episode 5690] reward=-33050995.0 actor_loss=0.0167 critic_loss=90700034340.5714 entropy=7.4202 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 5700] reward=-43504460.5 actor_loss=0.0289 critic_loss=101699960832.0000 entropy=7.4175 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 5700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-454918.7 mean_steps=11.7
|
|
[Episode 5710] reward=-24058153.7 actor_loss=0.0128 critic_loss=81062787364.5714 entropy=7.4123 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 5720] reward=-37624253.7 actor_loss=0.0266 critic_loss=98682486784.0000 entropy=7.4235 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Eval 5720] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-656796.3 mean_steps=11.0
|
|
[Episode 5730] reward=-40626505.9 actor_loss=0.0371 critic_loss=100862202675.2000 entropy=7.4278 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 5740] reward=-43676307.5 actor_loss=0.0401 critic_loss=104052700842.6667 entropy=7.4196 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Eval 5740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529569.2 mean_steps=12.4
|
|
[Episode 5750] reward=-22257509.6 actor_loss=0.0237 critic_loss=82180213097.4118 entropy=7.4297 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 5760] reward=-42520446.3 actor_loss=0.0455 critic_loss=95426211840.0000 entropy=7.4339 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Eval 5760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463234.2 mean_steps=12.9
|
|
[Episode 5770] reward=-27623684.5 actor_loss=0.0268 critic_loss=83660585041.9200 entropy=7.4275 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Episode 5780] reward=-26838559.7 actor_loss=0.0195 critic_loss=89186610614.8571 entropy=7.4263 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 5780] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-272050.8 mean_steps=15.7
|
|
[Episode 5790] reward=-39263298.7 actor_loss=0.0208 critic_loss=92085694737.0667 entropy=7.4264 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0521 front_blocked=0
|
|
[Episode 5800] reward=-32908783.4 actor_loss=0.0418 critic_loss=93112639146.6667 entropy=7.4201 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Eval 5800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-515702.5 mean_steps=14.0
|
|
[Episode 5810] reward=-44022234.2 actor_loss=0.0678 critic_loss=102152501475.5556 entropy=7.4286 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 5820] reward=-39187038.7 actor_loss=0.0374 critic_loss=89497604336.9412 entropy=7.4214 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 5820] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-702688.8 mean_steps=11.3
|
|
[Episode 5830] reward=-25937544.5 actor_loss=0.0068 critic_loss=78401654374.4000 entropy=7.4328 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 5840] reward=-38067303.0 actor_loss=0.0277 critic_loss=92032795587.7647 entropy=7.4282 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 5840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413139.3 mean_steps=14.1
|
|
[Episode 5850] reward=-39559137.7 actor_loss=0.0476 critic_loss=96149309819.2593 entropy=7.4283 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Episode 5860] reward=-33638540.1 actor_loss=0.0394 critic_loss=88226691859.6923 entropy=7.4221 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Eval 5860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610623.2 mean_steps=12.2
|
|
[Episode 5870] reward=-34340728.0 actor_loss=0.0351 critic_loss=91244279808.0000 entropy=7.4233 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 5880] reward=-43927652.6 actor_loss=0.0360 critic_loss=105792512409.6000 entropy=7.4196 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 5880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504188.1 mean_steps=13.0
|
|
[Episode 5890] reward=-27397026.2 actor_loss=0.0065 critic_loss=86152180297.1429 entropy=7.4238 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 5900] reward=-43145557.5 actor_loss=0.0218 critic_loss=98676453376.0000 entropy=7.4169 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 5900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398205.8 mean_steps=14.3
|
|
[Episode 5910] reward=-41705079.6 actor_loss=0.0437 critic_loss=97397452924.1212 entropy=7.4141 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Episode 5920] reward=-36472394.2 actor_loss=0.0572 critic_loss=96608418059.1304 entropy=7.4129 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 5920] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-647986.2 mean_steps=10.8
|
|
[Episode 5930] reward=-28516298.0 actor_loss=0.0143 critic_loss=88874126637.1765 entropy=7.4144 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Episode 5940] reward=-39861460.3 actor_loss=0.0207 critic_loss=99833944119.3513 entropy=7.4113 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Eval 5940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408703.0 mean_steps=14.0
|
|
[Episode 5950] reward=-31207970.9 actor_loss=0.0257 critic_loss=93505651671.0400 entropy=7.4184 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0521 front_blocked=0
|
|
[Episode 5960] reward=-42780751.1 actor_loss=0.0655 critic_loss=96598957116.2353 entropy=7.4310 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 5960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422318.8 mean_steps=13.7
|
|
[Episode 5970] reward=-32852108.2 actor_loss=0.0112 critic_loss=89243323050.6667 entropy=7.4351 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 5980] reward=-38692093.6 actor_loss=0.0363 critic_loss=97156612778.6667 entropy=7.4410 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 5980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-389571.0 mean_steps=12.9
|
|
[Episode 5990] reward=-30933986.9 actor_loss=0.0314 critic_loss=86752343162.8800 entropy=7.4438 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 6000] reward=-38883457.4 actor_loss=0.0386 critic_loss=100450832042.6667 entropy=7.4410 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 6000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-602914.5 mean_steps=11.2
|
|
[Episode 6010] reward=-31395397.9 actor_loss=0.0162 critic_loss=86976566916.7407 entropy=7.4485 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 6020] reward=-34390303.4 actor_loss=0.0075 critic_loss=93052884546.7826 entropy=7.4632 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 6020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485482.2 mean_steps=13.0
|
|
[Episode 6030] reward=-33312965.0 actor_loss=0.0062 critic_loss=91590429033.4118 entropy=7.4687 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0462 front_blocked=0
|
|
[Episode 6040] reward=-27987285.9 actor_loss=0.0286 critic_loss=81204585103.3600 entropy=7.4721 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Eval 6040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555843.4 mean_steps=12.0
|
|
[Episode 6050] reward=-34201237.6 actor_loss=0.0093 critic_loss=89176473600.0000 entropy=7.4781 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Episode 6060] reward=-21131427.2 actor_loss=0.0124 critic_loss=67590545889.8824 entropy=7.4971 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Eval 6060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-310951.7 mean_steps=14.1
|
|
[Episode 6070] reward=-36455413.2 actor_loss=0.0279 critic_loss=91605019852.8000 entropy=7.4923 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Episode 6080] reward=-35524526.8 actor_loss=0.0064 critic_loss=90058084592.9412 entropy=7.4977 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 6080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403873.1 mean_steps=14.0
|
|
[Episode 6090] reward=-43831668.6 actor_loss=0.0180 critic_loss=99781606400.0000 entropy=7.4948 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Episode 6100] reward=-40191030.2 actor_loss=0.0250 critic_loss=93619679016.4211 entropy=7.4904 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 6100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-386231.4 mean_steps=14.2
|
|
[Episode 6110] reward=-34820002.5 actor_loss=0.0307 critic_loss=89070279923.8095 entropy=7.4854 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 6120] reward=-37885968.7 actor_loss=0.0284 critic_loss=92328701952.0000 entropy=7.4837 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 6120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477997.1 mean_steps=13.1
|
|
[Episode 6130] reward=-42592250.1 actor_loss=0.0625 critic_loss=98753878835.2000 entropy=7.4896 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 6140] reward=-29803410.5 actor_loss=0.0165 critic_loss=81335436247.0400 entropy=7.4963 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Eval 6140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445861.4 mean_steps=13.6
|
|
[Episode 6150] reward=-42084181.4 actor_loss=0.0333 critic_loss=94698126969.9048 entropy=7.5024 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 6160] reward=-40914363.6 actor_loss=0.0373 critic_loss=100642040035.5556 entropy=7.5029 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 6160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-482123.9 mean_steps=12.2
|
|
[Episode 6170] reward=-36887030.4 actor_loss=0.0416 critic_loss=98286625398.1538 entropy=7.5069 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 6180] reward=-32397531.1 actor_loss=0.0383 critic_loss=88938318701.7143 entropy=7.5043 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 6180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447717.3 mean_steps=13.6
|
|
[Episode 6190] reward=-36129926.2 actor_loss=0.0564 critic_loss=91127755434.6667 entropy=7.5052 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 6200] reward=-34865136.2 actor_loss=0.0342 critic_loss=86987157699.0476 entropy=7.5041 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 6200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462975.9 mean_steps=13.5
|
|
[Episode 6210] reward=-33435097.6 actor_loss=0.0365 critic_loss=91976889548.8000 entropy=7.5031 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 6220] reward=-47796081.6 actor_loss=0.0401 critic_loss=102629333779.6923 entropy=7.5162 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 6220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472799.3 mean_steps=13.7
|
|
[Episode 6230] reward=-43083457.2 actor_loss=0.0371 critic_loss=98651801276.6316 entropy=7.5206 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Episode 6240] reward=-50307394.6 actor_loss=0.0347 critic_loss=104896790528.0000 entropy=7.5115 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 6240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-365293.4 mean_steps=15.1
|
|
[Episode 6250] reward=-33603797.6 actor_loss=0.0070 critic_loss=85381675235.5556 entropy=7.5130 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 6260] reward=-45132411.9 actor_loss=0.0340 critic_loss=98626843852.8000 entropy=7.5152 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 6260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420019.8 mean_steps=14.8
|
|
[Episode 6270] reward=-33321509.2 actor_loss=0.0406 critic_loss=96147103416.3200 entropy=7.5167 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 6280] reward=-24931790.0 actor_loss=0.0183 critic_loss=78846705254.4000 entropy=7.5294 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0449 front_blocked=0
|
|
[Eval 6280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475669.4 mean_steps=12.5
|
|
[Episode 6290] reward=-30089700.6 actor_loss=-0.0132 critic_loss=78896669789.0909 entropy=7.5276 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0410 front_blocked=0
|
|
[Episode 6300] reward=-37592214.8 actor_loss=0.0094 critic_loss=92171995015.5294 entropy=7.5306 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 6300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533278.4 mean_steps=12.2
|
|
[Episode 6310] reward=-34556832.7 actor_loss=0.0306 critic_loss=88861101494.8571 entropy=7.5328 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 6320] reward=-43314658.0 actor_loss=0.0240 critic_loss=96674429715.6923 entropy=7.5427 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Eval 6320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585794.5 mean_steps=11.8
|
|
[Episode 6330] reward=-39512885.6 actor_loss=0.0701 critic_loss=97767525338.0741 entropy=7.5514 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 6340] reward=-53031005.0 actor_loss=0.0496 critic_loss=111076375405.7143 entropy=7.5528 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 6340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498307.3 mean_steps=12.8
|
|
[Episode 6350] reward=-34003868.4 actor_loss=0.0051 critic_loss=92869079463.7241 entropy=7.5536 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Episode 6360] reward=-44761972.9 actor_loss=0.0544 critic_loss=102066769197.1765 entropy=7.5556 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 6360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471641.0 mean_steps=12.4
|
|
[Episode 6370] reward=-37917060.9 actor_loss=0.0399 critic_loss=97210897203.2000 entropy=7.5614 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 6380] reward=-37070053.7 actor_loss=0.0221 critic_loss=89787563758.9333 entropy=7.5607 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 6380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571139.8 mean_steps=12.2
|
|
[Episode 6390] reward=-35719550.4 actor_loss=0.0190 critic_loss=89849304726.5882 entropy=7.5653 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Episode 6400] reward=-33968859.4 actor_loss=0.0231 critic_loss=97000644608.0000 entropy=7.5727 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 6400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427855.9 mean_steps=14.1
|
|
[Episode 6410] reward=-30171854.4 actor_loss=0.0231 critic_loss=82267458839.2727 entropy=7.5790 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 6420] reward=-40794013.1 actor_loss=0.0300 critic_loss=99655835930.4828 entropy=7.5815 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 6420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505570.1 mean_steps=13.1
|
|
[Episode 6430] reward=-49583889.0 actor_loss=0.0385 critic_loss=106517111239.1111 entropy=7.5805 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 6440] reward=-35218459.3 actor_loss=0.0486 critic_loss=87562439101.2174 entropy=7.5840 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Eval 6440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471970.0 mean_steps=13.8
|
|
[Episode 6450] reward=-38046867.8 actor_loss=0.0316 critic_loss=89233105964.5217 entropy=7.5976 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 6460] reward=-35822017.6 actor_loss=0.0141 critic_loss=88286208409.6000 entropy=7.5984 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 6460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540966.0 mean_steps=12.2
|
|
[Episode 6470] reward=-27942398.0 actor_loss=0.0198 critic_loss=76669804134.4000 entropy=7.6017 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 6480] reward=-40370688.6 actor_loss=0.0389 critic_loss=93571851776.0000 entropy=7.5888 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 6480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549281.5 mean_steps=13.2
|
|
[Episode 6490] reward=-32424125.5 actor_loss=-0.0023 critic_loss=84397062931.6923 entropy=7.5914 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 6500] reward=-46562107.7 actor_loss=0.0445 critic_loss=100972991283.2000 entropy=7.5893 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 6500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504741.9 mean_steps=13.0
|
|
[Episode 6510] reward=-38939113.9 actor_loss=0.0250 critic_loss=86227629056.0000 entropy=7.5985 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 6520] reward=-28430834.9 actor_loss=0.0100 critic_loss=76029920768.0000 entropy=7.5937 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Eval 6520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391919.6 mean_steps=14.4
|
|
[Episode 6530] reward=-32582742.1 actor_loss=0.0179 critic_loss=89385265152.0000 entropy=7.6012 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 6540] reward=-32283528.9 actor_loss=0.0135 critic_loss=86996181772.1905 entropy=7.6094 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 6540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539352.2 mean_steps=12.6
|
|
[Episode 6550] reward=-34734542.7 actor_loss=0.0399 critic_loss=92496174731.6364 entropy=7.6111 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 6560] reward=-35836799.2 actor_loss=0.0272 critic_loss=86741547300.5714 entropy=7.6157 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Eval 6560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-485022.8 mean_steps=12.1
|
|
[Episode 6570] reward=-27853951.5 actor_loss=0.0270 critic_loss=80878201969.7778 entropy=7.6143 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 6580] reward=-34652984.6 actor_loss=0.0046 critic_loss=87258732098.7826 entropy=7.6107 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Eval 6580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419611.4 mean_steps=13.0
|
|
[Episode 6590] reward=-31847154.7 actor_loss=0.0007 critic_loss=90766744780.8000 entropy=7.6211 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 6600] reward=-42182114.7 actor_loss=0.0672 critic_loss=96606093312.0000 entropy=7.6340 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Eval 6600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477270.7 mean_steps=12.8
|
|
[Episode 6610] reward=-42692364.6 actor_loss=0.0282 critic_loss=94279702648.4706 entropy=7.6344 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Episode 6620] reward=-23910786.8 actor_loss=-0.0050 critic_loss=69051456418.9091 entropy=7.6390 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Eval 6620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435772.0 mean_steps=13.1
|
|
[Episode 6630] reward=-33957646.3 actor_loss=0.0388 critic_loss=86557226871.4667 entropy=7.6478 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Episode 6640] reward=-43184256.9 actor_loss=-0.0004 critic_loss=91235792357.0526 entropy=7.6464 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Eval 6640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465726.6 mean_steps=13.5
|
|
[Episode 6650] reward=-32116698.3 actor_loss=-0.0037 critic_loss=86499865466.4348 entropy=7.6467 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 6660] reward=-26032258.3 actor_loss=-0.0123 critic_loss=74538138785.6842 entropy=7.6580 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0358 front_blocked=0
|
|
[Eval 6660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465624.1 mean_steps=13.3
|
|
[Episode 6670] reward=-22483863.9 actor_loss=-0.0014 critic_loss=77097679075.5556 entropy=7.6538 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0371 front_blocked=0
|
|
[Episode 6680] reward=-42818551.9 actor_loss=0.0287 critic_loss=99404718080.0000 entropy=7.6544 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Eval 6680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-470548.0 mean_steps=14.2
|
|
[Episode 6690] reward=-34383187.0 actor_loss=0.0240 critic_loss=89864077793.8824 entropy=7.6562 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Episode 6700] reward=-36366137.1 actor_loss=0.0285 critic_loss=91750849194.6667 entropy=7.6615 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 6700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-296239.6 mean_steps=14.8
|
|
[Episode 6710] reward=-23049374.6 actor_loss=0.0016 critic_loss=75869785702.4000 entropy=7.6598 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 6720] reward=-34853263.9 actor_loss=0.0140 critic_loss=100103531724.8000 entropy=7.6681 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Eval 6720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-466741.3 mean_steps=14.2
|
|
[Episode 6730] reward=-44979186.2 actor_loss=0.0360 critic_loss=100461418905.6000 entropy=7.6672 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 6740] reward=-42626800.2 actor_loss=0.0281 critic_loss=95858683740.1600 entropy=7.6683 approx_kl=0.0095 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 6740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-291597.7 mean_steps=14.8
|
|
[Episode 6750] reward=-30951494.3 actor_loss=0.0030 critic_loss=83732540235.2941 entropy=7.6737 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Episode 6760] reward=-32855076.8 actor_loss=0.0053 critic_loss=87654625495.5789 entropy=7.6901 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Eval 6760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429792.7 mean_steps=14.1
|
|
[Episode 6770] reward=-21473278.9 actor_loss=0.0096 critic_loss=78859726356.4800 entropy=7.6988 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0430 front_blocked=0
|
|
[Episode 6780] reward=-32068778.8 actor_loss=0.0247 critic_loss=81555050736.9412 entropy=7.6919 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Eval 6780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447918.2 mean_steps=13.1
|
|
[Episode 6790] reward=-27223340.7 actor_loss=-0.0058 critic_loss=68696231772.1600 entropy=7.6967 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0384 front_blocked=0
|
|
[Episode 6800] reward=-26129540.1 actor_loss=0.0020 critic_loss=70173865149.6296 entropy=7.7051 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 6800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-634977.0 mean_steps=12.3
|
|
[Episode 6810] reward=-33538953.4 actor_loss=0.0485 critic_loss=92016050532.1739 entropy=7.7052 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Episode 6820] reward=-34728021.5 actor_loss=0.0249 critic_loss=88824585808.8421 entropy=7.7069 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Eval 6820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-639661.3 mean_steps=11.4
|
|
[Episode 6830] reward=-23584222.2 actor_loss=0.0102 critic_loss=73784012214.8571 entropy=7.7160 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0384 front_blocked=0
|
|
[Episode 6840] reward=-25801958.5 actor_loss=0.0084 critic_loss=80522181451.2941 entropy=7.7185 approx_kl=0.0104 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 6840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393613.1 mean_steps=14.6
|
|
[Episode 6850] reward=-36934741.2 actor_loss=0.0487 critic_loss=96719067648.0000 entropy=7.7100 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Episode 6860] reward=-32125133.7 actor_loss=0.0265 critic_loss=93245298551.4667 entropy=7.7146 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Eval 6860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465973.3 mean_steps=13.7
|
|
[Episode 6870] reward=-31668543.7 actor_loss=0.0152 critic_loss=88388560896.0000 entropy=7.7153 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0430 front_blocked=0
|
|
[Episode 6880] reward=-25858525.5 actor_loss=-0.0032 critic_loss=83079695701.3333 entropy=7.7192 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0482 front_blocked=0
|
|
[Eval 6880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541208.8 mean_steps=12.3
|
|
[Episode 6890] reward=-36583950.6 actor_loss=0.0270 critic_loss=89419276615.6800 entropy=7.7267 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 6900] reward=-37905296.2 actor_loss=0.0416 critic_loss=90384571671.2727 entropy=7.7328 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 6900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472827.9 mean_steps=13.9
|
|
[Episode 6910] reward=-37814967.5 actor_loss=0.0535 critic_loss=95748764765.0909 entropy=7.7389 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Episode 6920] reward=-45827141.0 actor_loss=0.0339 critic_loss=98994472401.4545 entropy=7.7396 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 6920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524308.8 mean_steps=12.6
|
|
[Episode 6930] reward=-35430306.5 actor_loss=0.0366 critic_loss=88443952206.7692 entropy=7.7502 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0521 front_blocked=0
|
|
[Episode 6940] reward=-37894227.7 actor_loss=0.0420 critic_loss=99956472410.3529 entropy=7.7516 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Eval 6940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518119.9 mean_steps=13.2
|
|
[Episode 6950] reward=-46303192.8 actor_loss=0.0373 critic_loss=96031996024.4706 entropy=7.7577 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 6960] reward=-38110678.7 actor_loss=0.0151 critic_loss=91068672107.7895 entropy=7.7476 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Eval 6960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536186.3 mean_steps=12.2
|
|
[Episode 6970] reward=-29135706.8 actor_loss=0.0084 critic_loss=87902975426.5600 entropy=7.7454 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 6980] reward=-35541730.8 actor_loss=0.0183 critic_loss=91743053336.3810 entropy=7.7494 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 6980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491555.3 mean_steps=12.8
|
|
[Episode 6990] reward=-35795922.0 actor_loss=0.0285 critic_loss=82392222768.7619 entropy=7.7609 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 7000] reward=-24803324.2 actor_loss=0.0042 critic_loss=81296575977.7391 entropy=7.7768 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0462 front_blocked=0
|
|
[Eval 7000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463964.6 mean_steps=13.4
|
|
[Episode 7010] reward=-34262749.1 actor_loss=0.0126 critic_loss=84738374041.6000 entropy=7.7801 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Episode 7020] reward=-34100910.2 actor_loss=-0.0003 critic_loss=82810973449.4815 entropy=7.7910 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Eval 7020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519994.9 mean_steps=12.3
|
|
[Episode 7030] reward=-39029040.4 actor_loss=0.0253 critic_loss=88743752570.4348 entropy=7.7989 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 7040] reward=-38586030.8 actor_loss=0.0345 critic_loss=93919814451.2000 entropy=7.7953 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Eval 7040] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-290157.4 mean_steps=16.2
|
|
[Episode 7050] reward=-35880546.0 actor_loss=-0.0148 critic_loss=83604412825.6000 entropy=7.8003 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Episode 7060] reward=-41591517.9 actor_loss=0.0385 critic_loss=93439030613.3333 entropy=7.8164 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Eval 7060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-364637.4 mean_steps=14.2
|
|
[Episode 7070] reward=-51257471.4 actor_loss=0.0201 critic_loss=105336268390.4000 entropy=7.8146 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Episode 7080] reward=-40793769.4 actor_loss=0.0252 critic_loss=90418901967.2381 entropy=7.8137 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 7080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508182.6 mean_steps=13.3
|
|
[Episode 7090] reward=-37299419.0 actor_loss=0.0323 critic_loss=90671783936.0000 entropy=7.8172 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Episode 7100] reward=-45468378.4 actor_loss=0.0454 critic_loss=93158548626.2857 entropy=7.8062 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 7100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-603172.5 mean_steps=12.8
|
|
[Episode 7110] reward=-26478284.5 actor_loss=0.0318 critic_loss=79627262439.6190 entropy=7.8142 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0462 front_blocked=0
|
|
[Episode 7120] reward=-48042824.8 actor_loss=0.0636 critic_loss=107624745369.6000 entropy=7.8102 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 7120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537460.3 mean_steps=12.2
|
|
[Episode 7130] reward=-40047239.1 actor_loss=0.0410 critic_loss=94216031280.7619 entropy=7.8079 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Episode 7140] reward=-35118720.0 actor_loss=0.0200 critic_loss=81131884953.6000 entropy=7.8129 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0521 front_blocked=0
|
|
[Eval 7140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368149.2 mean_steps=14.1
|
|
[Episode 7150] reward=-30764618.6 actor_loss=0.0122 critic_loss=80436865706.6667 entropy=7.8167 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 7160] reward=-23360059.2 actor_loss=-0.0067 critic_loss=67307012808.3478 entropy=7.8246 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0299 front_blocked=0
|
|
[Eval 7160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-670434.5 mean_steps=11.7
|
|
[Episode 7170] reward=-35821038.3 actor_loss=0.0212 critic_loss=82935180434.2857 entropy=7.8284 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Episode 7180] reward=-42231440.0 actor_loss=0.0217 critic_loss=91292667904.0000 entropy=7.8327 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 7180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515066.8 mean_steps=12.9
|
|
[Episode 7190] reward=-39224634.1 actor_loss=0.0183 critic_loss=89473742740.2105 entropy=7.8325 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 7200] reward=-32149110.1 actor_loss=0.0307 critic_loss=81157589178.1818 entropy=7.8420 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 7200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541473.8 mean_steps=12.6
|
|
[Episode 7210] reward=-33618716.5 actor_loss=0.0185 critic_loss=84997126826.6667 entropy=7.8510 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Episode 7220] reward=-12776641.2 actor_loss=-0.0093 critic_loss=56276044961.6842 entropy=7.8516 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0293 front_blocked=0
|
|
[Eval 7220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492937.4 mean_steps=12.8
|
|
[Episode 7230] reward=-24933700.4 actor_loss=0.0230 critic_loss=75704752865.2800 entropy=7.8630 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 7240] reward=-44067564.7 actor_loss=0.0227 critic_loss=91190693205.3333 entropy=7.8739 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Eval 7240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465054.1 mean_steps=14.0
|
|
[Episode 7250] reward=-36956579.5 actor_loss=0.0157 critic_loss=84791832380.9524 entropy=7.8912 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 7260] reward=-37743618.3 actor_loss=0.0399 critic_loss=88285828892.4444 entropy=7.8997 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Eval 7260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440572.5 mean_steps=13.9
|
|
[Episode 7270] reward=-47043608.5 actor_loss=0.0432 critic_loss=100680653619.2000 entropy=7.9053 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 7280] reward=-29953851.3 actor_loss=0.0144 critic_loss=72763897344.0000 entropy=7.9020 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0449 front_blocked=0
|
|
[Eval 7280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-448029.0 mean_steps=12.3
|
|
[Episode 7290] reward=-38683843.7 actor_loss=0.0176 critic_loss=83468118395.2593 entropy=7.9026 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 7300] reward=-19835703.5 actor_loss=-0.0136 critic_loss=65346449080.3200 entropy=7.9105 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Eval 7300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-454893.9 mean_steps=12.8
|
|
[Episode 7310] reward=-26462469.1 actor_loss=-0.0003 critic_loss=74093649547.6364 entropy=7.9357 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Episode 7320] reward=-39876317.4 actor_loss=0.0490 critic_loss=86747374861.4737 entropy=7.9507 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 7320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505470.1 mean_steps=13.2
|
|
[Episode 7330] reward=-35012463.5 actor_loss=0.0153 critic_loss=81839628769.8824 entropy=7.9527 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 7340] reward=-33918205.6 actor_loss=0.0168 critic_loss=81550231738.1818 entropy=7.9547 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 7340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642262.5 mean_steps=12.4
|
|
[Episode 7350] reward=-28717230.2 actor_loss=-0.0162 critic_loss=73171586885.8182 entropy=7.9599 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0397 front_blocked=0
|
|
[Episode 7360] reward=-32083744.6 actor_loss=0.0187 critic_loss=84348309012.4800 entropy=7.9686 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Eval 7360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-343862.7 mean_steps=13.5
|
|
[Episode 7370] reward=-28046336.6 actor_loss=0.0222 critic_loss=75782035712.0000 entropy=7.9670 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 7380] reward=-29909341.1 actor_loss=0.0086 critic_loss=78374969002.6667 entropy=7.9695 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0423 front_blocked=0
|
|
[Eval 7380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498937.2 mean_steps=12.8
|
|
[Episode 7390] reward=-32421860.8 actor_loss=0.0063 critic_loss=78671541114.4348 entropy=7.9662 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 7400] reward=-25560439.3 actor_loss=0.0102 critic_loss=72178852608.0000 entropy=7.9573 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Eval 7400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387895.7 mean_steps=13.7
|
|
[Episode 7410] reward=-28309100.6 actor_loss=0.0167 critic_loss=72611507833.9048 entropy=7.9530 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0482 front_blocked=0
|
|
[Episode 7420] reward=-25046132.2 actor_loss=0.0129 critic_loss=69913501013.3333 entropy=7.9540 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0449 front_blocked=0
|
|
[Eval 7420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468373.8 mean_steps=13.6
|
|
[Episode 7430] reward=-26104329.3 actor_loss=0.0178 critic_loss=79890094762.6667 entropy=7.9496 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 7440] reward=-33844953.1 actor_loss=0.0230 critic_loss=83546844293.5652 entropy=7.9480 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Eval 7440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508146.7 mean_steps=12.3
|
|
[Episode 7450] reward=-25031720.7 actor_loss=-0.0132 critic_loss=71955815201.3913 entropy=7.9593 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 7460] reward=-27150630.6 actor_loss=-0.0189 critic_loss=65730727936.0000 entropy=7.9615 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Eval 7460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575938.0 mean_steps=12.2
|
|
[Episode 7470] reward=-40261537.6 actor_loss=0.0467 critic_loss=88170492723.2000 entropy=7.9793 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Episode 7480] reward=-27060374.0 actor_loss=0.0052 critic_loss=76146750557.0909 entropy=7.9896 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Eval 7480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525444.6 mean_steps=12.6
|
|
[Episode 7490] reward=-27567698.6 actor_loss=0.0088 critic_loss=72090302374.9565 entropy=7.9852 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Episode 7500] reward=-22545330.7 actor_loss=-0.0190 critic_loss=66091407415.3513 entropy=7.9987 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0326 front_blocked=0
|
|
[Eval 7500] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-295258.5 mean_steps=16.1
|
|
[Episode 7510] reward=-41634400.3 actor_loss=0.0382 critic_loss=90521949915.4286 entropy=8.0136 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Episode 7520] reward=-29510532.8 actor_loss=0.0014 critic_loss=81991168178.0870 entropy=8.0213 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Eval 7520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592432.4 mean_steps=12.5
|
|
[Episode 7530] reward=-26347656.1 actor_loss=-0.0000 critic_loss=67573191580.9032 entropy=8.0217 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0443 front_blocked=0
|
|
[Episode 7540] reward=-19122036.3 actor_loss=-0.0289 critic_loss=59744544768.0000 entropy=8.0297 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Eval 7540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524288.9 mean_steps=13.2
|
|
[Episode 7550] reward=-24113566.9 actor_loss=-0.0205 critic_loss=71961015773.8667 entropy=8.0417 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0358 front_blocked=0
|
|
[Episode 7560] reward=-22283175.1 actor_loss=-0.0109 critic_loss=68584081162.2400 entropy=8.0618 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0352 front_blocked=0
|
|
[Eval 7560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511301.4 mean_steps=13.3
|
|
[Episode 7570] reward=-22800790.5 actor_loss=-0.0275 critic_loss=67526619487.0857 entropy=8.0784 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Episode 7580] reward=-18624578.3 actor_loss=-0.0140 critic_loss=60587091037.0909 entropy=8.0707 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0286 front_blocked=0
|
|
[Eval 7580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623222.8 mean_steps=11.5
|
|
[Episode 7590] reward=-30398635.7 actor_loss=0.0022 critic_loss=78923676876.8000 entropy=8.0605 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 7600] reward=-34950426.6 actor_loss=0.0361 critic_loss=85991258574.4516 entropy=8.0656 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 7600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574884.5 mean_steps=12.8
|
|
[Episode 7610] reward=-29536317.0 actor_loss=-0.0010 critic_loss=77492425950.6087 entropy=8.0769 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Episode 7620] reward=-37529258.2 actor_loss=0.0162 critic_loss=86075223900.1600 entropy=8.0792 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Eval 7620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442138.6 mean_steps=13.4
|
|
[Episode 7630] reward=-21303922.4 actor_loss=-0.0105 critic_loss=60732761216.0000 entropy=8.0835 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0397 front_blocked=0
|
|
[Episode 7640] reward=-17626763.4 actor_loss=-0.0183 critic_loss=53997550884.5714 entropy=8.0896 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0280 front_blocked=0
|
|
[Eval 7640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464109.5 mean_steps=13.4
|
|
[Episode 7650] reward=-33646433.6 actor_loss=0.0495 critic_loss=79238730020.5714 entropy=8.0952 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Episode 7660] reward=-33943887.0 actor_loss=0.0418 critic_loss=82928769536.0000 entropy=8.0883 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 7660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500222.1 mean_steps=12.9
|
|
[Episode 7670] reward=-22583286.6 actor_loss=-0.0131 critic_loss=69673700966.4000 entropy=8.0871 approx_kl=0.0103 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 7680] reward=-53802968.9 actor_loss=0.0532 critic_loss=103285227847.6800 entropy=8.0979 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 7680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562413.6 mean_steps=12.8
|
|
[Episode 7690] reward=-23081250.8 actor_loss=-0.0056 critic_loss=65962829960.5333 entropy=8.1057 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Episode 7700] reward=-29244044.0 actor_loss=0.0141 critic_loss=80772514107.0769 entropy=8.1189 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 7700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585207.1 mean_steps=12.2
|
|
[Episode 7710] reward=-39917080.2 actor_loss=0.0110 critic_loss=81995062905.9048 entropy=8.1232 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Episode 7720] reward=-38594248.7 actor_loss=-0.0075 critic_loss=88281879605.8947 entropy=8.1339 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 7720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534554.6 mean_steps=13.2
|
|
[Episode 7730] reward=-32961712.7 actor_loss=0.0311 critic_loss=85284874825.1429 entropy=8.1397 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 7740] reward=-33718691.6 actor_loss=0.0080 critic_loss=76255669381.5652 entropy=8.1564 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 7740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-389970.6 mean_steps=14.1
|
|
[Episode 7750] reward=-26665162.3 actor_loss=0.0111 critic_loss=71132286976.0000 entropy=8.1624 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0443 front_blocked=0
|
|
[Episode 7760] reward=-19953789.3 actor_loss=-0.0497 critic_loss=44866333062.0952 entropy=8.1818 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0247 front_blocked=0
|
|
[Eval 7760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-479770.3 mean_steps=11.2
|
|
[Episode 7770] reward=-23023341.9 actor_loss=-0.0291 critic_loss=67598920735.0303 entropy=8.1954 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Episode 7780] reward=-25239755.8 actor_loss=-0.0332 critic_loss=64302215331.8400 entropy=8.2079 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Eval 7780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482090.4 mean_steps=13.8
|
|
[Episode 7790] reward=-27880961.3 actor_loss=0.0149 critic_loss=73964929024.0000 entropy=8.2248 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 7800] reward=-32466538.0 actor_loss=0.0295 critic_loss=78094627761.2308 entropy=8.2330 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Eval 7800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454650.6 mean_steps=13.9
|
|
[Episode 7810] reward=-29583733.1 actor_loss=0.0112 critic_loss=67715700628.2105 entropy=8.2475 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0462 front_blocked=0
|
|
[Episode 7820] reward=-41469700.1 actor_loss=0.0295 critic_loss=85712469032.9600 entropy=8.2413 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Eval 7820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423996.6 mean_steps=13.9
|
|
[Episode 7830] reward=-26043723.7 actor_loss=0.0029 critic_loss=64650591717.0526 entropy=8.2434 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Episode 7840] reward=-18664254.3 actor_loss=-0.0291 critic_loss=55241740288.0000 entropy=8.2408 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0312 front_blocked=0
|
|
[Eval 7840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-392560.7 mean_steps=13.8
|
|
[Episode 7850] reward=-25978932.0 actor_loss=0.0139 critic_loss=65860530043.8710 entropy=8.2416 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Episode 7860] reward=-29046121.0 actor_loss=0.0348 critic_loss=71651179297.3913 entropy=8.2629 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0482 front_blocked=0
|
|
[Eval 7860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561554.2 mean_steps=12.6
|
|
[Episode 7870] reward=-27826612.3 actor_loss=0.0235 critic_loss=65945781248.0000 entropy=8.2735 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 7880] reward=-21536375.1 actor_loss=-0.0080 critic_loss=53047293269.3333 entropy=8.2822 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Eval 7880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567903.8 mean_steps=11.9
|
|
[Episode 7890] reward=-29419736.1 actor_loss=0.0318 critic_loss=73572013093.9259 entropy=8.3041 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 7900] reward=-31056284.2 actor_loss=0.0164 critic_loss=78791384268.8000 entropy=8.3146 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 7900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-471932.7 mean_steps=14.5
|
|
[Episode 7910] reward=-26302997.0 actor_loss=0.0137 critic_loss=65013919744.0000 entropy=8.3124 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Episode 7920] reward=-14480361.1 actor_loss=-0.0555 critic_loss=36171513675.2941 entropy=8.3229 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0202 front_blocked=0
|
|
[Eval 7920] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-342221.9 mean_steps=15.7
|
|
[Episode 7930] reward=-31180898.8 actor_loss=0.0265 critic_loss=69631028155.7333 entropy=8.3316 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 7940] reward=-44793807.2 actor_loss=0.0241 critic_loss=88034101930.6667 entropy=8.3401 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Eval 7940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482165.2 mean_steps=13.1
|
|
[Episode 7950] reward=-29165012.3 actor_loss=-0.0030 critic_loss=65390284957.5385 entropy=8.3500 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Episode 7960] reward=-39219194.5 actor_loss=0.0231 critic_loss=77452496896.0000 entropy=8.3432 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Eval 7960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-593985.4 mean_steps=11.4
|
|
[Episode 7970] reward=-16424661.7 actor_loss=-0.0371 critic_loss=58485897216.0000 entropy=8.3433 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0260 front_blocked=0
|
|
[Episode 7980] reward=-16309258.6 actor_loss=0.0197 critic_loss=62335942656.0000 entropy=8.3500 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0332 front_blocked=0
|
|
[Eval 7980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505899.2 mean_steps=12.7
|
|
[Episode 7990] reward=-20185445.3 actor_loss=-0.0317 critic_loss=58218552173.7143 entropy=8.3372 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0273 front_blocked=0
|
|
[Episode 8000] reward=-16921059.3 actor_loss=-0.0430 critic_loss=42536458093.7143 entropy=8.3399 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0267 front_blocked=0
|
|
[Eval 8000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450909.0 mean_steps=14.1
|
|
[Episode 8010] reward=-18205794.9 actor_loss=-0.0317 critic_loss=52087806244.5714 entropy=8.3616 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0273 front_blocked=0
|
|
[Episode 8020] reward=-25655271.1 actor_loss=-0.0050 critic_loss=63238401365.3333 entropy=8.3635 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0378 front_blocked=0
|
|
[Eval 8020] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-274924.0 mean_steps=16.3
|
|
[Episode 8030] reward=-25981034.7 actor_loss=-0.0228 critic_loss=63359873272.2424 entropy=8.3782 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 8040] reward=-18563212.0 actor_loss=-0.0329 critic_loss=58495291245.7143 entropy=8.3727 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0306 front_blocked=0
|
|
[Eval 8040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535442.2 mean_steps=12.7
|
|
[Episode 8050] reward=-18267928.5 actor_loss=-0.0468 critic_loss=50708337095.1111 entropy=8.3927 approx_kl=0.0104 kl_stop=1 intervention_rate=0.0267 front_blocked=0
|
|
[Episode 8060] reward=-34122205.1 actor_loss=0.0115 critic_loss=74310698507.3778 entropy=8.3996 approx_kl=0.0080 kl_stop=0 intervention_rate=0.0482 front_blocked=0
|
|
[Eval 8060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-581912.3 mean_steps=11.3
|
|
[Episode 8070] reward=-8322793.1 actor_loss=-0.0632 critic_loss=30975172969.4118 entropy=8.4030 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0143 front_blocked=0
|
|
[Episode 8080] reward=-35719643.7 actor_loss=-0.0017 critic_loss=80926218649.6000 entropy=8.4189 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Eval 8080] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-336419.5 mean_steps=16.4
|
|
[Episode 8090] reward=-28594598.6 actor_loss=-0.0187 critic_loss=67785433460.3636 entropy=8.4223 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Episode 8100] reward=-34773255.6 actor_loss=-0.0121 critic_loss=71260649851.2593 entropy=8.4149 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0482 front_blocked=0
|
|
[Eval 8100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420619.0 mean_steps=13.7
|
|
[Episode 8110] reward=-22022915.0 actor_loss=-0.0211 critic_loss=62997026570.2400 entropy=8.4264 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 8120] reward=-27888935.2 actor_loss=0.0094 critic_loss=64403449540.9231 entropy=8.4314 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0443 front_blocked=0
|
|
[Eval 8120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-635283.9 mean_steps=12.8
|
|
[Episode 8130] reward=-19538824.9 actor_loss=-0.0045 critic_loss=61933969221.8182 entropy=8.4314 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Episode 8140] reward=-17342524.6 actor_loss=-0.0543 critic_loss=44963881216.0000 entropy=8.4348 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0215 front_blocked=0
|
|
[Eval 8140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429090.5 mean_steps=15.0
|
|
[Episode 8150] reward=-31237419.7 actor_loss=0.0261 critic_loss=73458438144.0000 entropy=8.4325 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0443 front_blocked=0
|
|
[Episode 8160] reward=-27939327.1 actor_loss=-0.0058 critic_loss=63420657859.0476 entropy=8.4483 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0397 front_blocked=0
|
|
[Eval 8160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553749.6 mean_steps=14.2
|
|
[Episode 8170] reward=-14960170.2 actor_loss=-0.0088 critic_loss=46374673302.0690 entropy=8.4547 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0286 front_blocked=0
|
|
[Episode 8180] reward=-20986423.2 actor_loss=-0.0111 critic_loss=55230341283.8400 entropy=8.4501 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0352 front_blocked=0
|
|
[Eval 8180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534700.7 mean_steps=12.6
|
|
[Episode 8190] reward=-26440111.5 actor_loss=-0.0155 critic_loss=60424018033.7778 entropy=8.4518 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Episode 8200] reward=-21722363.9 actor_loss=-0.0012 critic_loss=59233341591.7037 entropy=8.4644 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0352 front_blocked=0
|
|
[Eval 8200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435323.1 mean_steps=13.3
|
|
[Episode 8210] reward=-29184775.3 actor_loss=0.0216 critic_loss=70030824130.2069 entropy=8.4636 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 8220] reward=-28533637.0 actor_loss=-0.0054 critic_loss=69324010782.7200 entropy=8.4636 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0443 front_blocked=0
|
|
[Eval 8220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-417823.7 mean_steps=16.4
|
|
[Episode 8230] reward=-41258439.4 actor_loss=0.0131 critic_loss=80448637466.9474 entropy=8.4713 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Episode 8240] reward=-31175078.2 actor_loss=-0.0026 critic_loss=73977853952.0000 entropy=8.4905 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Eval 8240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534028.5 mean_steps=13.4
|
|
[Episode 8250] reward=-40495730.4 actor_loss=0.0340 critic_loss=83510653486.5455 entropy=8.4998 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 8260] reward=-20760714.1 actor_loss=-0.0178 critic_loss=55077426135.0400 entropy=8.5079 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0312 front_blocked=0
|
|
[Eval 8260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463094.6 mean_steps=13.9
|
|
[Episode 8270] reward=-29410082.0 actor_loss=0.0067 critic_loss=64075541248.0000 entropy=8.5228 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Episode 8280] reward=-25578914.0 actor_loss=-0.0103 critic_loss=66306817267.8095 entropy=8.5331 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0371 front_blocked=0
|
|
[Eval 8280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360783.3 mean_steps=15.5
|
|
[Episode 8290] reward=-23499586.9 actor_loss=-0.0137 critic_loss=56929071816.3478 entropy=8.5486 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 8300] reward=-25926711.3 actor_loss=-0.0117 critic_loss=63475360699.7333 entropy=8.5878 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0358 front_blocked=0
|
|
[Eval 8300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579496.7 mean_steps=12.3
|
|
[Episode 8310] reward=-19320631.4 actor_loss=-0.0284 critic_loss=52519879262.8148 entropy=8.5960 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 8320] reward=-21523214.7 actor_loss=-0.0286 critic_loss=64643083410.2857 entropy=8.6073 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0306 front_blocked=0
|
|
[Eval 8320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548379.4 mean_steps=13.2
|
|
[Episode 8330] reward=-23834223.0 actor_loss=0.0098 critic_loss=56965057050.9474 entropy=8.6192 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0358 front_blocked=0
|
|
[Episode 8340] reward=-23542083.1 actor_loss=-0.0073 critic_loss=62788907648.0000 entropy=8.6240 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0384 front_blocked=0
|
|
[Eval 8340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506798.8 mean_steps=12.6
|
|
[Episode 8350] reward=-22071095.9 actor_loss=-0.0070 critic_loss=55712935454.1176 entropy=8.6397 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 8360] reward=-25874592.8 actor_loss=0.0371 critic_loss=67797997410.4615 entropy=8.6377 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Eval 8360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535068.0 mean_steps=12.8
|
|
[Episode 8370] reward=-12599252.0 actor_loss=-0.0491 critic_loss=44719575176.5333 entropy=8.6504 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0221 front_blocked=0
|
|
[Episode 8380] reward=-26250946.9 actor_loss=0.0059 critic_loss=55911788228.9231 entropy=8.6628 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Eval 8380] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-278071.0 mean_steps=16.5
|
|
[Episode 8390] reward=-21760907.0 actor_loss=-0.0260 critic_loss=50577546333.0909 entropy=8.6787 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0280 front_blocked=0
|
|
[Episode 8400] reward=-9957410.5 actor_loss=-0.0408 critic_loss=37559105588.5128 entropy=8.6836 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0228 front_blocked=0
|
|
[Eval 8400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457385.5 mean_steps=13.1
|
|
[Episode 8410] reward=-24864099.6 actor_loss=-0.0162 critic_loss=58983809536.0000 entropy=8.6896 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0378 front_blocked=0
|
|
[Episode 8420] reward=-26173783.4 actor_loss=0.0055 critic_loss=68154297250.9091 entropy=8.7035 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0469 front_blocked=0
|
|
[Eval 8420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504548.3 mean_steps=13.7
|
|
[Episode 8430] reward=-20181998.7 actor_loss=-0.0125 critic_loss=51481653604.1739 entropy=8.7196 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 8440] reward=-12386749.7 actor_loss=-0.0394 critic_loss=44471333914.2564 entropy=8.7331 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0247 front_blocked=0
|
|
[Eval 8440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598256.8 mean_steps=12.2
|
|
[Episode 8450] reward=-22090853.4 actor_loss=-0.0135 critic_loss=57380124779.7895 entropy=8.7506 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0312 front_blocked=0
|
|
[Episode 8460] reward=-21409841.7 actor_loss=-0.0373 critic_loss=54124379008.0000 entropy=8.7636 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0286 front_blocked=0
|
|
[Eval 8460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430811.7 mean_steps=14.8
|
|
[Episode 8470] reward=-21461208.8 actor_loss=0.0046 critic_loss=65589036646.4000 entropy=8.7921 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Episode 8480] reward=-18034110.2 actor_loss=-0.0303 critic_loss=39851097115.6757 entropy=8.8067 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0260 front_blocked=0
|
|
[Eval 8480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510743.7 mean_steps=13.6
|
|
[Episode 8490] reward=-23247141.6 actor_loss=-0.0343 critic_loss=51358159394.1333 entropy=8.8107 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Episode 8500] reward=-14260034.7 actor_loss=-0.0457 critic_loss=34882207467.2432 entropy=8.8218 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0241 front_blocked=0
|
|
[Eval 8500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559660.5 mean_steps=12.2
|
|
[Episode 8510] reward=-29223672.7 actor_loss=-0.0135 critic_loss=61328727276.3077 entropy=8.8368 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0384 front_blocked=0
|
|
[Episode 8520] reward=-9394460.9 actor_loss=-0.0536 critic_loss=34146214348.8000 entropy=8.8420 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0202 front_blocked=0
|
|
[Eval 8520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491902.4 mean_steps=12.7
|
|
[Episode 8530] reward=-26185256.4 actor_loss=0.0064 critic_loss=62632846525.6296 entropy=8.8503 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0410 front_blocked=0
|
|
[Episode 8540] reward=-14255619.3 actor_loss=-0.0566 critic_loss=40321226524.4444 entropy=8.8600 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0189 front_blocked=0
|
|
[Eval 8540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-413990.9 mean_steps=13.8
|
|
[Episode 8550] reward=-26369421.2 actor_loss=-0.0204 critic_loss=63893770098.7586 entropy=8.8637 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 8560] reward=-27839248.3 actor_loss=0.0310 critic_loss=58218341242.4348 entropy=8.8776 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Eval 8560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502767.6 mean_steps=12.8
|
|
[Episode 8570] reward=-14617984.0 actor_loss=-0.0114 critic_loss=42352775450.4828 entropy=8.8907 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0332 front_blocked=0
|
|
[Episode 8580] reward=-15016245.4 actor_loss=-0.0305 critic_loss=41652868313.2121 entropy=8.8937 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0247 front_blocked=0
|
|
[Eval 8580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452749.8 mean_steps=14.2
|
|
[Episode 8590] reward=-22512699.8 actor_loss=-0.0154 critic_loss=52731660363.8519 entropy=8.9150 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0312 front_blocked=0
|
|
[Episode 8600] reward=-15131396.5 actor_loss=-0.0507 critic_loss=43879938366.5778 entropy=8.9104 approx_kl=0.0089 kl_stop=0 intervention_rate=0.0247 front_blocked=0
|
|
[Eval 8600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567516.4 mean_steps=13.2
|
|
[Episode 8610] reward=-12118990.1 actor_loss=-0.0408 critic_loss=47189263883.3778 entropy=8.9276 approx_kl=0.0095 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Episode 8620] reward=-11032194.9 actor_loss=-0.0515 critic_loss=37929962968.6154 entropy=8.9501 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0208 front_blocked=0
|
|
[Eval 8620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493966.2 mean_steps=13.4
|
|
[Episode 8630] reward=-14879650.7 actor_loss=-0.0272 critic_loss=42602328064.0000 entropy=8.9679 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0247 front_blocked=0
|
|
[Episode 8640] reward=-11577973.7 actor_loss=-0.0556 critic_loss=32047282113.9394 entropy=8.9734 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0182 front_blocked=0
|
|
[Eval 8640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402560.7 mean_steps=15.7
|
|
[Episode 8650] reward=-19620241.5 actor_loss=-0.0154 critic_loss=59616276206.9333 entropy=8.9950 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0358 front_blocked=0
|
|
[Episode 8660] reward=-21438609.7 actor_loss=-0.0068 critic_loss=52490819447.4667 entropy=9.0212 approx_kl=0.0085 kl_stop=0 intervention_rate=0.0352 front_blocked=0
|
|
[Eval 8660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-448110.0 mean_steps=13.2
|
|
[Episode 8670] reward=-11524398.0 actor_loss=-0.0569 critic_loss=34261749942.0444 entropy=9.0413 approx_kl=0.0072 kl_stop=0 intervention_rate=0.0182 front_blocked=0
|
|
[Episode 8680] reward=-11969960.4 actor_loss=-0.0320 critic_loss=38155318303.0303 entropy=9.0619 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0241 front_blocked=0
|
|
[Eval 8680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502015.7 mean_steps=13.4
|
|
[Episode 8690] reward=-12625665.0 actor_loss=-0.0375 critic_loss=33854818021.5172 entropy=9.0814 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 8700] reward=-23301402.2 actor_loss=-0.0174 critic_loss=57275302066.0870 entropy=9.1063 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0384 front_blocked=0
|
|
[Eval 8700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-693262.2 mean_steps=12.1
|
|
[Episode 8710] reward=-18482143.1 actor_loss=-0.0423 critic_loss=45094650148.5714 entropy=9.1127 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0273 front_blocked=0
|
|
[Episode 8720] reward=-11824038.0 actor_loss=-0.0410 critic_loss=34462866184.8276 entropy=9.1188 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0208 front_blocked=0
|
|
[Eval 8720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-490699.4 mean_steps=15.2
|
|
[Episode 8730] reward=-15606291.9 actor_loss=-0.0480 critic_loss=43571983701.3333 entropy=9.1321 approx_kl=0.0072 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 8740] reward=-6954807.2 actor_loss=-0.0756 critic_loss=23815827774.5778 entropy=9.1426 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0150 front_blocked=0
|
|
[Eval 8740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556096.7 mean_steps=14.1
|
|
[Episode 8750] reward=-10612683.4 actor_loss=-0.0499 critic_loss=28977631232.0000 entropy=9.1379 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 8760] reward=-19411229.9 actor_loss=-0.0329 critic_loss=35604831963.4286 entropy=9.1427 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0254 front_blocked=0
|
|
[Eval 8760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459630.2 mean_steps=14.2
|
|
[Episode 8770] reward=-26847001.1 actor_loss=0.0016 critic_loss=53098443016.2581 entropy=9.1647 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 8780] reward=-14843984.4 actor_loss=-0.0259 critic_loss=45930515456.0000 entropy=9.1920 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0306 front_blocked=0
|
|
[Eval 8780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-593857.1 mean_steps=12.7
|
|
[Episode 8790] reward=-14251603.0 actor_loss=-0.0371 critic_loss=28777267264.0000 entropy=9.2094 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 8800] reward=-7310834.1 actor_loss=-0.0758 critic_loss=27188171277.1282 entropy=9.2397 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 8800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540745.3 mean_steps=13.9
|
|
[Episode 8810] reward=-18027693.8 actor_loss=-0.0400 critic_loss=55951645857.6842 entropy=9.2503 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0267 front_blocked=0
|
|
[Episode 8820] reward=-17478768.5 actor_loss=-0.0406 critic_loss=38335532646.4000 entropy=9.2611 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0228 front_blocked=0
|
|
[Eval 8820] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-338159.0 mean_steps=17.4
|
|
[Episode 8830] reward=-11729754.9 actor_loss=-0.0630 critic_loss=26686399647.2889 entropy=9.2857 approx_kl=0.0056 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Episode 8840] reward=-13456656.0 actor_loss=-0.0700 critic_loss=34926255217.7778 entropy=9.3095 approx_kl=0.0085 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Eval 8840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-365063.5 mean_steps=14.7
|
|
[Episode 8850] reward=-12567271.4 actor_loss=-0.0665 critic_loss=29652870576.3556 entropy=9.3201 approx_kl=0.0080 kl_stop=0 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 8860] reward=-18373778.2 actor_loss=-0.0331 critic_loss=49841886759.3846 entropy=9.3453 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0280 front_blocked=0
|
|
[Eval 8860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554373.5 mean_steps=12.1
|
|
[Episode 8870] reward=-20874940.7 actor_loss=-0.0268 critic_loss=41867639661.7143 entropy=9.3821 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0280 front_blocked=0
|
|
[Episode 8880] reward=-12720524.6 actor_loss=-0.0691 critic_loss=27823035547.1515 entropy=9.3900 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0163 front_blocked=0
|
|
[Eval 8880] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-655994.0 mean_steps=11.0
|
|
[Episode 8890] reward=-4257519.7 actor_loss=-0.0898 critic_loss=21328898867.2000 entropy=9.4049 approx_kl=0.0056 kl_stop=0 intervention_rate=0.0104 front_blocked=0
|
|
[Episode 8900] reward=-18597932.0 actor_loss=-0.0513 critic_loss=48726358639.3043 entropy=9.4323 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0273 front_blocked=0
|
|
[Eval 8900] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-269411.2 mean_steps=17.9
|
|
[Episode 8910] reward=-12270353.1 actor_loss=-0.0533 critic_loss=26752574272.0000 entropy=9.4539 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0176 front_blocked=0
|
|
[Episode 8920] reward=-21055690.1 actor_loss=-0.0355 critic_loss=47414385732.2667 entropy=9.4626 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0299 front_blocked=0
|
|
[Eval 8920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369511.9 mean_steps=15.9
|
|
[Episode 8930] reward=-12243808.1 actor_loss=-0.0386 critic_loss=24366876535.4667 entropy=9.4899 approx_kl=0.0082 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Episode 8940] reward=-18804359.2 actor_loss=-0.0407 critic_loss=42669841448.9600 entropy=9.5109 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0221 front_blocked=0
|
|
[Eval 8940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-368910.4 mean_steps=14.4
|
|
[Episode 8950] reward=-25344684.9 actor_loss=0.0126 critic_loss=52692128399.3600 entropy=9.5213 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0378 front_blocked=0
|
|
[Episode 8960] reward=-6208843.2 actor_loss=-0.0769 critic_loss=16903710219.3778 entropy=9.5319 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0117 front_blocked=0
|
|
[Eval 8960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511677.1 mean_steps=13.7
|
|
[Episode 8970] reward=-13988433.1 actor_loss=-0.0306 critic_loss=32767520819.2000 entropy=9.5521 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 8980] reward=-11219497.8 actor_loss=-0.0662 critic_loss=28066671820.8000 entropy=9.5747 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0163 front_blocked=0
|
|
[Eval 8980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442605.3 mean_steps=14.2
|
|
[Episode 8990] reward=-3220304.3 actor_loss=-0.0988 critic_loss=11058927126.7556 entropy=9.6019 approx_kl=0.0057 kl_stop=0 intervention_rate=0.0065 front_blocked=0
|
|
[Episode 9000] reward=-9202161.2 actor_loss=-0.0731 critic_loss=23011955598.2222 entropy=9.6228 approx_kl=0.0066 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Eval 9000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-627115.3 mean_steps=11.6
|
|
[Episode 9010] reward=-13834439.5 actor_loss=-0.0559 critic_loss=34136759955.9111 entropy=9.6427 approx_kl=0.0080 kl_stop=0 intervention_rate=0.0221 front_blocked=0
|
|
[Episode 9020] reward=-5101875.8 actor_loss=-0.0945 critic_loss=10405705375.2889 entropy=9.6645 approx_kl=0.0054 kl_stop=0 intervention_rate=0.0072 front_blocked=0
|
|
[Eval 9020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489516.0 mean_steps=17.5
|
|
[Episode 9030] reward=-17030768.6 actor_loss=-0.0385 critic_loss=37514124083.2000 entropy=9.6893 approx_kl=0.0069 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 9040] reward=-21451102.6 actor_loss=-0.0284 critic_loss=41732206405.8182 entropy=9.7093 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0326 front_blocked=0
|
|
[Eval 9040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549353.5 mean_steps=13.3
|
|
[Episode 9050] reward=-12913082.2 actor_loss=-0.0472 critic_loss=35044054396.3429 entropy=9.7226 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 9060] reward=-17216130.9 actor_loss=-0.0386 critic_loss=39195011657.1429 entropy=9.7376 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0326 front_blocked=0
|
|
[Eval 9060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480371.3 mean_steps=15.4
|
|
[Episode 9070] reward=-8764676.7 actor_loss=-0.0737 critic_loss=20577113472.0000 entropy=9.7628 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0130 front_blocked=0
|
|
[Episode 9080] reward=-18041743.2 actor_loss=0.0063 critic_loss=38241170537.9310 entropy=9.7756 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0306 front_blocked=0
|
|
[Eval 9080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555020.7 mean_steps=14.1
|
|
[Episode 9090] reward=-12294126.8 actor_loss=-0.0369 critic_loss=18923963707.0769 entropy=9.7890 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0163 front_blocked=0
|
|
[Episode 9100] reward=-9116760.7 actor_loss=-0.0613 critic_loss=26010355939.5556 entropy=9.7907 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0176 front_blocked=0
|
|
[Eval 9100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552794.8 mean_steps=13.2
|
|
[Episode 9110] reward=-8860324.2 actor_loss=-0.0710 critic_loss=17200675066.3111 entropy=9.8259 approx_kl=0.0079 kl_stop=0 intervention_rate=0.0124 front_blocked=0
|
|
[Episode 9120] reward=-16868459.7 actor_loss=-0.0633 critic_loss=33744714043.0769 entropy=9.8631 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0228 front_blocked=0
|
|
[Eval 9120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430449.8 mean_steps=16.1
|
|
[Episode 9130] reward=-25229244.5 actor_loss=-0.0253 critic_loss=44180564278.3030 entropy=9.8919 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0319 front_blocked=0
|
|
[Episode 9140] reward=-13092042.7 actor_loss=-0.0415 critic_loss=27227214912.0000 entropy=9.9086 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0221 front_blocked=0
|
|
[Eval 9140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433912.7 mean_steps=14.3
|
|
[Episode 9150] reward=-19515144.0 actor_loss=-0.0357 critic_loss=40040000034.1333 entropy=9.9344 approx_kl=0.0077 kl_stop=0 intervention_rate=0.0254 front_blocked=0
|
|
[Episode 9160] reward=-17075668.0 actor_loss=-0.0152 critic_loss=34863312031.2889 entropy=9.9408 approx_kl=0.0062 kl_stop=0 intervention_rate=0.0326 front_blocked=0
|
|
[Eval 9160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-371753.7 mean_steps=16.1
|
|
[Episode 9170] reward=-9420028.9 actor_loss=-0.0585 critic_loss=25294158540.8000 entropy=9.9518 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 9180] reward=-6525570.4 actor_loss=-0.0710 critic_loss=18783851588.2667 entropy=9.9680 approx_kl=0.0052 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Eval 9180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349268.4 mean_steps=20.6
|
|
[Episode 9190] reward=-20145685.5 actor_loss=-0.0327 critic_loss=40528429524.1143 entropy=9.9982 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0293 front_blocked=0
|
|
[Episode 9200] reward=-7218078.8 actor_loss=-0.0830 critic_loss=19209191378.4889 entropy=10.0253 approx_kl=0.0060 kl_stop=0 intervention_rate=0.0111 front_blocked=0
|
|
[Eval 9200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-647434.0 mean_steps=13.5
|
|
[Episode 9210] reward=-7952088.9 actor_loss=-0.0763 critic_loss=14270663680.0000 entropy=10.0430 approx_kl=0.0059 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Episode 9220] reward=-24809995.6 actor_loss=-0.0246 critic_loss=49785401958.4000 entropy=10.0883 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0332 front_blocked=0
|
|
[Eval 9220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526118.9 mean_steps=13.8
|
|
[Episode 9230] reward=-11166820.2 actor_loss=-0.0792 critic_loss=30521606052.9778 entropy=10.1154 approx_kl=0.0076 kl_stop=0 intervention_rate=0.0195 front_blocked=0
|
|
[Episode 9240] reward=-21259980.4 actor_loss=0.0143 critic_loss=39753994240.0000 entropy=10.1338 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0326 front_blocked=0
|
|
[Eval 9240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500843.2 mean_steps=14.8
|
|
[Episode 9250] reward=-11662398.9 actor_loss=-0.0496 critic_loss=27620340667.7333 entropy=10.1355 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Episode 9260] reward=-13855024.9 actor_loss=-0.0280 critic_loss=27583258487.4667 entropy=10.1702 approx_kl=0.0063 kl_stop=0 intervention_rate=0.0215 front_blocked=0
|
|
[Eval 9260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459436.3 mean_steps=22.1
|
|
[Episode 9270] reward=-16748117.5 actor_loss=-0.0573 critic_loss=36121866649.6000 entropy=10.1998 approx_kl=0.0076 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Episode 9280] reward=-4862311.0 actor_loss=-0.1046 critic_loss=9681954281.2444 entropy=10.2283 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0052 front_blocked=0
|
|
[Eval 9280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375761.4 mean_steps=29.5
|
|
[Episode 9290] reward=-2947860.5 actor_loss=-0.0903 critic_loss=6588617147.7333 entropy=10.2600 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0052 front_blocked=0
|
|
[Episode 9300] reward=-15214907.8 actor_loss=-0.0535 critic_loss=31625892886.7556 entropy=10.2992 approx_kl=0.0058 kl_stop=0 intervention_rate=0.0254 front_blocked=0
|
|
[Eval 9300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-414823.8 mean_steps=49.7
|
|
[Episode 9310] reward=-14090712.2 actor_loss=-0.0453 critic_loss=23971216270.2222 entropy=10.3177 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Episode 9320] reward=-8516324.4 actor_loss=-0.0856 critic_loss=14152738377.1429 entropy=10.3543 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0104 front_blocked=0
|
|
[Eval 9320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404553.8 mean_steps=118.2
|
|
[Episode 9330] reward=-4124219.9 actor_loss=-0.0940 critic_loss=12571637623.4667 entropy=10.3704 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0078 front_blocked=0
|
|
[Episode 9340] reward=-5994446.2 actor_loss=-0.0751 critic_loss=14993278088.5333 entropy=10.3964 approx_kl=0.0056 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 9340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447202.7 mean_steps=40.5
|
|
[Episode 9350] reward=-9114591.2 actor_loss=-0.0516 critic_loss=24180552362.6667 entropy=10.4226 approx_kl=0.0065 kl_stop=0 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 9360] reward=-17200579.7 actor_loss=-0.0400 critic_loss=31842428017.7778 entropy=10.4540 approx_kl=0.0066 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Eval 9360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512064.4 mean_steps=90.2
|
|
[Episode 9370] reward=-9424737.4 actor_loss=-0.0668 critic_loss=19813134882.1333 entropy=10.4893 approx_kl=0.0061 kl_stop=0 intervention_rate=0.0156 front_blocked=0
|
|
[Episode 9380] reward=-10868818.0 actor_loss=-0.0412 critic_loss=20251600418.1333 entropy=10.5213 approx_kl=0.0074 kl_stop=0 intervention_rate=0.0182 front_blocked=0
|
|
[Eval 9380] success_rate=0.000 qp_infeasible_rate=0.650 mean_return=-708058.5 mean_steps=1126.2
|
|
[Episode 9390] reward=-14070247.7 actor_loss=-0.0581 critic_loss=28538053745.7778 entropy=10.5350 approx_kl=0.0065 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Episode 9400] reward=-4558196.2 actor_loss=-0.0752 critic_loss=8917567772.4444 entropy=10.5608 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0098 front_blocked=0
|
|
[Eval 9400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426116.5 mean_steps=157.3
|
|
[Episode 9410] reward=-3057403.9 actor_loss=-0.0940 critic_loss=10882036997.6889 entropy=10.5801 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0065 front_blocked=0
|
|
[Episode 9420] reward=-13532925.0 actor_loss=-0.0548 critic_loss=23268035925.3333 entropy=10.6016 approx_kl=0.0066 kl_stop=0 intervention_rate=0.0176 front_blocked=0
|
|
[Eval 9420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-625429.3 mean_steps=243.1
|
|
[Episode 9430] reward=-14641966.8 actor_loss=-0.0611 critic_loss=23674228794.5143 entropy=10.6203 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0195 front_blocked=0
|
|
[Episode 9440] reward=-14631008.6 actor_loss=-0.0407 critic_loss=23664119716.9778 entropy=10.6396 approx_kl=0.0062 kl_stop=0 intervention_rate=0.0195 front_blocked=0
|
|
[Eval 9440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470214.6 mean_steps=193.1
|
|
[Episode 9450] reward=-10123254.6 actor_loss=-0.0620 critic_loss=17279657096.5333 entropy=10.6493 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0150 front_blocked=0
|
|
[Episode 9460] reward=-10225590.5 actor_loss=-0.0568 critic_loss=14035101832.5333 entropy=10.6925 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 9460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518713.2 mean_steps=352.9
|
|
[Episode 9470] reward=-7201681.0 actor_loss=-0.0834 critic_loss=9773371505.7778 entropy=10.7220 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Episode 9480] reward=-8618974.8 actor_loss=-0.0776 critic_loss=16807405954.8444 entropy=10.7259 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 9480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-555315.4 mean_steps=25.9
|
|
[Episode 9490] reward=-10203041.3 actor_loss=-0.0750 critic_loss=19287177124.9778 entropy=10.7430 approx_kl=0.0052 kl_stop=0 intervention_rate=0.0163 front_blocked=0
|
|
[Episode 9500] reward=-9403997.2 actor_loss=-0.0615 critic_loss=16045164726.0444 entropy=10.7461 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0182 front_blocked=0
|
|
[Eval 9500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552593.9 mean_steps=32.6
|
|
[Episode 9510] reward=-3065352.5 actor_loss=-0.0936 critic_loss=5400488009.9556 entropy=10.7636 approx_kl=0.0040 kl_stop=0 intervention_rate=0.0065 front_blocked=0
|
|
[Episode 9520] reward=-10384743.4 actor_loss=0.0011 critic_loss=17018642727.8222 entropy=10.7892 approx_kl=0.0057 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Eval 9520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448782.2 mean_steps=62.8
|
|
[Episode 9530] reward=-12167632.0 actor_loss=-0.0824 critic_loss=20407998577.7778 entropy=10.7948 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Episode 9540] reward=-5052597.5 actor_loss=-0.0909 critic_loss=7678188123.0222 entropy=10.8255 approx_kl=0.0042 kl_stop=0 intervention_rate=0.0091 front_blocked=0
|
|
[Eval 9540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446088.7 mean_steps=84.7
|
|
[Episode 9550] reward=-7990569.0 actor_loss=-0.0765 critic_loss=11528731295.2889 entropy=10.8545 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0111 front_blocked=0
|
|
[Episode 9560] reward=-9259972.9 actor_loss=-0.0141 critic_loss=14070046424.1778 entropy=10.8752 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Eval 9560] success_rate=0.250 qp_infeasible_rate=0.700 mean_return=-571651.9 mean_steps=404.4
|
|
[Episode 9570] reward=-21396328.5 actor_loss=0.0171 critic_loss=36048388369.0667 entropy=10.8866 approx_kl=0.0067 kl_stop=0 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 9580] reward=-16083641.9 actor_loss=-0.0318 critic_loss=34384477115.7333 entropy=10.9125 approx_kl=0.0066 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Eval 9580] success_rate=0.000 qp_infeasible_rate=0.550 mean_return=-644006.6 mean_steps=1444.9
|
|
[Episode 9590] reward=-8576655.5 actor_loss=-0.0634 critic_loss=16133964800.0000 entropy=10.9318 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Episode 9600] reward=-6458933.4 actor_loss=-0.0525 critic_loss=12477533980.4444 entropy=10.9618 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 9600] success_rate=0.200 qp_infeasible_rate=0.700 mean_return=-580161.8 mean_steps=330.6
|
|
[Episode 9610] reward=-3027189.1 actor_loss=-0.0845 critic_loss=3040180588.0889 entropy=10.9802 approx_kl=0.0008 kl_stop=0 intervention_rate=0.0059 front_blocked=0
|
|
[Episode 9620] reward=-3807207.6 actor_loss=-0.0833 critic_loss=9781683086.2222 entropy=11.0167 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0091 front_blocked=0
|
|
[Eval 9620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-654269.1 mean_steps=11.2
|
|
[Episode 9630] reward=-5713267.2 actor_loss=-0.0512 critic_loss=10029797649.0667 entropy=11.0336 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Episode 9640] reward=-12735108.9 actor_loss=-0.0708 critic_loss=23216791392.7111 entropy=11.0627 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0163 front_blocked=0
|
|
[Eval 9640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516191.0 mean_steps=16.1
|
|
[Episode 9650] reward=-11081373.2 actor_loss=-0.0553 critic_loss=18549743069.8667 entropy=11.0704 approx_kl=0.0061 kl_stop=0 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 9660] reward=-4483900.3 actor_loss=-0.0727 critic_loss=6222012404.6222 entropy=11.0781 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0078 front_blocked=0
|
|
[Eval 9660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656389.0 mean_steps=12.4
|
|
[Episode 9670] reward=-5949991.3 actor_loss=-0.0899 critic_loss=8892813391.6444 entropy=11.1002 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0085 front_blocked=0
|
|
[Episode 9680] reward=-7029322.5 actor_loss=-0.0695 critic_loss=16941720962.8444 entropy=11.1204 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Eval 9680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428277.3 mean_steps=18.1
|
|
[Episode 9690] reward=-13965862.3 actor_loss=-0.0111 critic_loss=24764818682.3111 entropy=11.1595 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 9700] reward=-6560737.9 actor_loss=-0.0664 critic_loss=12339190647.4667 entropy=11.1687 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Eval 9700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452139.5 mean_steps=67.5
|
|
[Episode 9710] reward=-12262746.8 actor_loss=-0.0518 critic_loss=20907951195.0222 entropy=11.1798 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 9720] reward=-7046621.6 actor_loss=-0.0467 critic_loss=7776711441.0667 entropy=11.1809 approx_kl=0.0018 kl_stop=0 intervention_rate=0.0163 front_blocked=0
|
|
[Eval 9720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-622421.9 mean_steps=16.8
|
|
[Episode 9730] reward=-8826777.9 actor_loss=-0.0626 critic_loss=11324309321.9556 entropy=11.1759 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0124 front_blocked=0
|
|
[Episode 9740] reward=-7799513.3 actor_loss=-0.0762 critic_loss=11389222525.1556 entropy=11.2221 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 9740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515030.6 mean_steps=13.4
|
|
[Episode 9750] reward=-10255734.6 actor_loss=-0.0615 critic_loss=14687999590.4000 entropy=11.2407 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0163 front_blocked=0
|
|
[Episode 9760] reward=-2450005.3 actor_loss=-0.0999 critic_loss=2280660216.8889 entropy=11.2639 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0065 front_blocked=0
|
|
[Eval 9760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-593309.3 mean_steps=14.1
|
|
[Episode 9770] reward=-6449185.2 actor_loss=-0.0696 critic_loss=11053556462.9333 entropy=11.2847 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Episode 9780] reward=-14516772.4 actor_loss=-0.0238 critic_loss=20748653385.9556 entropy=11.2993 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0260 front_blocked=0
|
|
[Eval 9780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-515456.5 mean_steps=15.3
|
|
[Episode 9790] reward=-6232775.9 actor_loss=-0.0672 critic_loss=7160044572.4444 entropy=11.2891 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0104 front_blocked=0
|
|
[Episode 9800] reward=-7642475.7 actor_loss=-0.0638 critic_loss=11487444115.9111 entropy=11.2985 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0150 front_blocked=0
|
|
[Eval 9800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466854.0 mean_steps=16.2
|
|
[Episode 9810] reward=-9943955.0 actor_loss=-0.0510 critic_loss=12918935916.0889 entropy=11.3218 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Episode 9820] reward=-11169303.4 actor_loss=-0.0509 critic_loss=13050735092.6222 entropy=11.3513 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0182 front_blocked=0
|
|
[Eval 9820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545847.3 mean_steps=13.3
|
|
[Episode 9830] reward=-18185694.1 actor_loss=-0.0372 critic_loss=29291620260.9778 entropy=11.3542 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 9840] reward=-5638817.3 actor_loss=-0.0965 critic_loss=8055379103.2889 entropy=11.3745 approx_kl=0.0042 kl_stop=0 intervention_rate=0.0085 front_blocked=0
|
|
[Eval 9840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-421670.7 mean_steps=14.8
|
|
[Episode 9850] reward=-10006042.7 actor_loss=-0.0788 critic_loss=12366566331.7333 entropy=11.3954 approx_kl=0.0008 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Episode 9860] reward=-6936083.1 actor_loss=-0.0366 critic_loss=6832546451.9111 entropy=11.4313 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Eval 9860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462971.2 mean_steps=21.9
|
|
[Episode 9870] reward=-3623479.7 actor_loss=-0.0751 critic_loss=3643757240.8889 entropy=11.4505 approx_kl=0.0028 kl_stop=0 intervention_rate=0.0111 front_blocked=0
|
|
[Episode 9880] reward=-9056105.8 actor_loss=-0.0603 critic_loss=13271539325.1556 entropy=11.4557 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Eval 9880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507260.4 mean_steps=38.2
|
|
[Episode 9890] reward=-14394009.0 actor_loss=-0.0476 critic_loss=19628237255.1111 entropy=11.4764 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Episode 9900] reward=-10807519.7 actor_loss=-0.0369 critic_loss=13051558866.4889 entropy=11.5010 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0195 front_blocked=0
|
|
[Eval 9900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603324.2 mean_steps=42.9
|
|
[Episode 9910] reward=-3743114.9 actor_loss=-0.0795 critic_loss=3750280072.5333 entropy=11.5310 approx_kl=0.0017 kl_stop=0 intervention_rate=0.0104 front_blocked=0
|
|
[Episode 9920] reward=-15035487.8 actor_loss=-0.0551 critic_loss=19759064723.9111 entropy=11.5543 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Eval 9920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-624779.7 mean_steps=14.7
|
|
[Episode 9930] reward=-7480354.6 actor_loss=-0.0572 critic_loss=9340095283.2000 entropy=11.5727 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Episode 9940] reward=-5058430.6 actor_loss=-0.0583 critic_loss=6654403959.4667 entropy=11.6034 approx_kl=0.0012 kl_stop=0 intervention_rate=0.0124 front_blocked=0
|
|
[Eval 9940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364167.0 mean_steps=17.2
|
|
[Episode 9950] reward=-6460847.3 actor_loss=-0.0876 critic_loss=6735664998.4000 entropy=11.6222 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0091 front_blocked=0
|
|
[Episode 9960] reward=-9405917.1 actor_loss=-0.0528 critic_loss=10792844367.6444 entropy=11.6546 approx_kl=0.0014 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Eval 9960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430750.9 mean_steps=16.2
|
|
[Episode 9970] reward=-7483212.7 actor_loss=-0.0496 critic_loss=12080957508.2667 entropy=11.6782 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0163 front_blocked=0
|
|
[Episode 9980] reward=-4865494.1 actor_loss=-0.0868 critic_loss=4162202174.5778 entropy=11.6889 approx_kl=0.0010 kl_stop=0 intervention_rate=0.0085 front_blocked=0
|
|
[Eval 9980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-493597.1 mean_steps=19.0
|
|
[Episode 9990] reward=-16278226.5 actor_loss=-0.0521 critic_loss=20361809783.4667 entropy=11.6957 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0215 front_blocked=0
|
|
[Episode 10000] reward=-12705915.9 actor_loss=-0.0326 critic_loss=16800844049.0667 entropy=11.7149 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0228 front_blocked=0
|
|
[Eval 10000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-652970.3 mean_steps=11.7
|
|
[Episode 10010] reward=-2740566.7 actor_loss=-0.0836 critic_loss=2487279948.8000 entropy=11.7319 approx_kl=0.0010 kl_stop=0 intervention_rate=0.0078 front_blocked=0
|
|
[Episode 10020] reward=-10026994.6 actor_loss=-0.0741 critic_loss=11989841032.5333 entropy=11.7499 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Eval 10020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592516.1 mean_steps=13.8
|
|
[Episode 10030] reward=-12244190.1 actor_loss=-0.0794 critic_loss=17867325485.5111 entropy=11.7518 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0150 front_blocked=0
|
|
[Episode 10040] reward=-14560159.0 actor_loss=-0.0830 critic_loss=20052901546.6667 entropy=11.7676 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0176 front_blocked=0
|
|
[Eval 10040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559552.1 mean_steps=12.6
|
|
[Episode 10050] reward=-12394864.2 actor_loss=-0.0077 critic_loss=13774158279.1111 entropy=11.7865 approx_kl=0.0041 kl_stop=0 intervention_rate=0.0247 front_blocked=0
|
|
[Episode 10060] reward=-32385203.9 actor_loss=0.0239 critic_loss=40639391197.8667 entropy=11.8135 approx_kl=0.0052 kl_stop=0 intervention_rate=0.0410 front_blocked=0
|
|
[Eval 10060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479724.2 mean_steps=37.5
|
|
[Episode 10070] reward=-21871405.1 actor_loss=-0.0297 critic_loss=25893899468.8000 entropy=11.8382 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0267 front_blocked=0
|
|
[Episode 10080] reward=-6030711.9 actor_loss=-0.0558 critic_loss=6643466467.5556 entropy=11.8541 approx_kl=0.0016 kl_stop=0 intervention_rate=0.0156 front_blocked=0
|
|
[Eval 10080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465095.2 mean_steps=13.8
|
|
[Episode 10090] reward=-11515011.9 actor_loss=-0.0360 critic_loss=15471367532.0889 entropy=11.8987 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 10100] reward=-10631088.7 actor_loss=-0.0134 critic_loss=14036777688.1778 entropy=11.9144 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Eval 10100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563337.3 mean_steps=12.2
|
|
[Episode 10110] reward=-12700415.6 actor_loss=-0.0246 critic_loss=15613321921.4222 entropy=11.9384 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0247 front_blocked=0
|
|
[Episode 10120] reward=-3759325.9 actor_loss=-0.0865 critic_loss=3787425371.0222 entropy=11.9412 approx_kl=0.0010 kl_stop=0 intervention_rate=0.0072 front_blocked=0
|
|
[Eval 10120] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-720098.0 mean_steps=9.9
|
|
[Episode 10130] reward=-24622461.3 actor_loss=0.0387 critic_loss=29787055035.7333 entropy=11.9685 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0410 front_blocked=0
|
|
[Episode 10140] reward=-2248124.9 actor_loss=-0.0931 critic_loss=1875161347.5556 entropy=11.9892 approx_kl=0.0005 kl_stop=0 intervention_rate=0.0085 front_blocked=0
|
|
[Eval 10140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402311.9 mean_steps=16.6
|
|
[Episode 10150] reward=-4499198.6 actor_loss=-0.0551 critic_loss=6233913543.1111 entropy=12.0267 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Episode 10160] reward=-5422561.7 actor_loss=-0.0646 critic_loss=5481725866.6667 entropy=12.0543 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Eval 10160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519252.2 mean_steps=15.4
|
|
[Episode 10170] reward=-10478896.2 actor_loss=0.0279 critic_loss=11678490988.0889 entropy=12.0781 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0273 front_blocked=0
|
|
[Episode 10180] reward=-17862713.4 actor_loss=0.0448 critic_loss=19925555336.5333 entropy=12.0822 approx_kl=0.0044 kl_stop=0 intervention_rate=0.0293 front_blocked=0
|
|
[Eval 10180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461744.7 mean_steps=13.8
|
|
[Episode 10190] reward=-6117465.1 actor_loss=-0.0709 critic_loss=6222370821.6889 entropy=12.0783 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0124 front_blocked=0
|
|
[Episode 10200] reward=-9724898.8 actor_loss=-0.0425 critic_loss=10795798107.0222 entropy=12.0894 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0176 front_blocked=0
|
|
[Eval 10200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473615.6 mean_steps=14.9
|
|
[Episode 10210] reward=-23964619.2 actor_loss=0.0313 critic_loss=34230248288.7111 entropy=12.1178 approx_kl=0.0044 kl_stop=0 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 10220] reward=-29376207.6 actor_loss=0.0340 critic_loss=38375180014.9333 entropy=12.1499 approx_kl=0.0053 kl_stop=0 intervention_rate=0.0391 front_blocked=0
|
|
[Eval 10220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532973.5 mean_steps=14.8
|
|
[Episode 10230] reward=-12968879.9 actor_loss=-0.0210 critic_loss=14127733782.7556 entropy=12.1633 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 10240] reward=-19933348.0 actor_loss=-0.0200 critic_loss=23295330030.9333 entropy=12.1705 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0260 front_blocked=0
|
|
[Eval 10240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583385.8 mean_steps=14.2
|
|
[Episode 10250] reward=-17513224.0 actor_loss=0.0140 critic_loss=20872193888.7111 entropy=12.1869 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0312 front_blocked=0
|
|
[Episode 10260] reward=-19407390.1 actor_loss=-0.0115 critic_loss=23567793675.3778 entropy=12.2015 approx_kl=0.0044 kl_stop=0 intervention_rate=0.0280 front_blocked=0
|
|
[Eval 10260] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-632256.0 mean_steps=11.3
|
|
[Episode 10270] reward=-17048922.5 actor_loss=-0.0002 critic_loss=20487100893.8667 entropy=12.1965 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0286 front_blocked=0
|
|
[Episode 10280] reward=-21048809.6 actor_loss=0.0561 critic_loss=30012458780.4444 entropy=12.2134 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0365 front_blocked=0
|
|
[Eval 10280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-578091.1 mean_steps=12.1
|
|
[Episode 10290] reward=-4128855.4 actor_loss=-0.0830 critic_loss=3988595828.6222 entropy=12.2476 approx_kl=0.0007 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Episode 10300] reward=-10443359.0 actor_loss=-0.0375 critic_loss=11035567763.9111 entropy=12.2749 approx_kl=0.0015 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Eval 10300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588914.4 mean_steps=13.6
|
|
[Episode 10310] reward=-12043318.3 actor_loss=-0.0319 critic_loss=14201692979.2000 entropy=12.2866 approx_kl=0.0018 kl_stop=0 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 10320] reward=-11244842.8 actor_loss=-0.0366 critic_loss=13112955483.0222 entropy=12.2944 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Eval 10320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-437227.6 mean_steps=13.5
|
|
[Episode 10330] reward=-36840562.1 actor_loss=0.0778 critic_loss=46182670427.0222 entropy=12.3118 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 10340] reward=-10084293.9 actor_loss=-0.0090 critic_loss=11652830845.1556 entropy=12.3269 approx_kl=0.0028 kl_stop=0 intervention_rate=0.0202 front_blocked=0
|
|
[Eval 10340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452811.6 mean_steps=13.9
|
|
[Episode 10350] reward=-17833381.0 actor_loss=0.0337 critic_loss=20142782646.0444 entropy=12.3346 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0326 front_blocked=0
|
|
[Episode 10360] reward=-28397599.4 actor_loss=0.0523 critic_loss=34516406044.4444 entropy=12.3650 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0449 front_blocked=0
|
|
[Eval 10360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551334.9 mean_steps=12.4
|
|
[Episode 10370] reward=-20428126.2 actor_loss=0.0007 critic_loss=24060212383.2889 entropy=12.3959 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0326 front_blocked=0
|
|
[Episode 10380] reward=-23395575.5 actor_loss=0.1015 critic_loss=28281383412.6222 entropy=12.4171 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0436 front_blocked=0
|
|
[Eval 10380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-437212.7 mean_steps=13.7
|
|
[Episode 10390] reward=-11133758.8 actor_loss=-0.0589 critic_loss=12398859036.4444 entropy=12.4174 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 10400] reward=-15830540.8 actor_loss=-0.0264 critic_loss=17936894270.5778 entropy=12.4182 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Eval 10400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477987.4 mean_steps=14.8
|
|
[Episode 10410] reward=-14958032.8 actor_loss=-0.0401 critic_loss=17092854624.7111 entropy=12.4325 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 10420] reward=-13903150.8 actor_loss=0.0142 critic_loss=15816999367.1111 entropy=12.4376 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0299 front_blocked=0
|
|
[Eval 10420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518932.0 mean_steps=13.8
|
|
[Episode 10430] reward=-11820151.2 actor_loss=-0.0509 critic_loss=13941241969.7778 entropy=12.4397 approx_kl=0.0023 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Episode 10440] reward=-28469288.9 actor_loss=0.0606 critic_loss=34335300767.2889 entropy=12.4511 approx_kl=0.0049 kl_stop=0 intervention_rate=0.0430 front_blocked=0
|
|
[Eval 10440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488123.5 mean_steps=14.4
|
|
[Episode 10450] reward=-6606798.8 actor_loss=-0.0201 critic_loss=6822875784.5333 entropy=12.4679 approx_kl=0.0009 kl_stop=0 intervention_rate=0.0195 front_blocked=0
|
|
[Episode 10460] reward=-18495219.4 actor_loss=0.0189 critic_loss=23407323477.3333 entropy=12.4603 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0352 front_blocked=0
|
|
[Eval 10460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462445.5 mean_steps=14.6
|
|
[Episode 10470] reward=-31617634.0 actor_loss=0.0658 critic_loss=38419574692.9778 entropy=12.4813 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0482 front_blocked=0
|
|
[Episode 10480] reward=-22398000.2 actor_loss=0.0745 critic_loss=25732927032.8889 entropy=12.4846 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0436 front_blocked=0
|
|
[Eval 10480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423536.0 mean_steps=16.5
|
|
[Episode 10490] reward=-18516257.2 actor_loss=0.0072 critic_loss=20582939374.9333 entropy=12.5004 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0326 front_blocked=0
|
|
[Episode 10500] reward=-26699397.4 actor_loss=0.0366 critic_loss=31934520797.8667 entropy=12.5146 approx_kl=0.0049 kl_stop=0 intervention_rate=0.0417 front_blocked=0
|
|
[Eval 10500] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-584773.9 mean_steps=10.8
|
|
[Episode 10510] reward=-15604787.5 actor_loss=-0.0107 critic_loss=17178262414.2222 entropy=12.5443 approx_kl=0.0023 kl_stop=0 intervention_rate=0.0319 front_blocked=0
|
|
[Episode 10520] reward=-17725330.5 actor_loss=0.0342 critic_loss=20537742995.9111 entropy=12.5806 approx_kl=0.0012 kl_stop=0 intervention_rate=0.0312 front_blocked=0
|
|
[Eval 10520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520389.8 mean_steps=14.6
|
|
[Episode 10530] reward=-25903162.2 actor_loss=0.0239 critic_loss=31888258980.9778 entropy=12.5854 approx_kl=0.0028 kl_stop=0 intervention_rate=0.0371 front_blocked=0
|
|
[Episode 10540] reward=-10969202.9 actor_loss=-0.0153 critic_loss=11534863974.4000 entropy=12.5953 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Eval 10540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614574.2 mean_steps=11.8
|
|
[Episode 10550] reward=-43034868.7 actor_loss=0.1208 critic_loss=52111952827.7333 entropy=12.6185 approx_kl=0.0062 kl_stop=0 intervention_rate=0.0618 front_blocked=0
|
|
[Episode 10560] reward=-22180131.3 actor_loss=0.0363 critic_loss=25222014793.9556 entropy=12.6075 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0391 front_blocked=0
|
|
[Eval 10560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566509.6 mean_steps=12.6
|
|
[Episode 10570] reward=-18888756.5 actor_loss=-0.0102 critic_loss=21705800863.2889 entropy=12.6426 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0293 front_blocked=0
|
|
[Episode 10580] reward=-31843610.9 actor_loss=0.0725 critic_loss=37947417759.2889 entropy=12.6480 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0469 front_blocked=0
|
|
[Eval 10580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533752.5 mean_steps=13.8
|
|
[Episode 10590] reward=-25013710.2 actor_loss=0.0350 critic_loss=29663643283.9111 entropy=12.6555 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0404 front_blocked=0
|
|
[Episode 10600] reward=-24838757.6 actor_loss=0.0669 critic_loss=28121504745.2444 entropy=12.6891 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0430 front_blocked=0
|
|
[Eval 10600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534860.5 mean_steps=13.9
|
|
[Episode 10610] reward=-28307095.2 actor_loss=0.0996 critic_loss=31230635576.8889 entropy=12.7131 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 10620] reward=-27717554.9 actor_loss=0.0467 critic_loss=34382708872.5333 entropy=12.7049 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 10620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522269.6 mean_steps=12.9
|
|
[Episode 10630] reward=-39073291.6 actor_loss=0.1732 critic_loss=48085296560.3556 entropy=12.7271 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0651 front_blocked=0
|
|
[Episode 10640] reward=-27306118.0 actor_loss=0.1008 critic_loss=31458785280.0000 entropy=12.7644 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 10640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619819.7 mean_steps=12.5
|
|
[Episode 10650] reward=-46543245.3 actor_loss=0.1859 critic_loss=58429959463.8222 entropy=12.7799 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 10660] reward=-22203513.3 actor_loss=0.0518 critic_loss=24778574961.7778 entropy=12.7992 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0397 front_blocked=0
|
|
[Eval 10660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410114.0 mean_steps=16.6
|
|
[Episode 10670] reward=-27911568.3 actor_loss=0.1061 critic_loss=31730460034.8444 entropy=12.7987 approx_kl=0.0011 kl_stop=0 intervention_rate=0.0488 front_blocked=0
|
|
[Episode 10680] reward=-23551252.6 actor_loss=0.0708 critic_loss=28976239957.3333 entropy=12.8007 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0430 front_blocked=0
|
|
[Eval 10680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547787.4 mean_steps=13.8
|
|
[Episode 10690] reward=-23219016.2 actor_loss=0.0726 critic_loss=27962995689.2444 entropy=12.8096 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0417 front_blocked=0
|
|
[Episode 10700] reward=-44830107.5 actor_loss=0.1407 critic_loss=56649894843.7333 entropy=12.8367 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 10700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435497.4 mean_steps=15.1
|
|
[Episode 10710] reward=-28494619.0 actor_loss=0.0627 critic_loss=32929686368.7111 entropy=12.8500 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 10720] reward=-32168309.1 actor_loss=0.1049 critic_loss=39495118392.8889 entropy=12.8316 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0540 front_blocked=0
|
|
[Eval 10720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590972.8 mean_steps=12.6
|
|
[Episode 10730] reward=-35199510.7 actor_loss=0.1213 critic_loss=41795254681.6000 entropy=12.8519 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0579 front_blocked=0
|
|
[Episode 10740] reward=-32788499.9 actor_loss=0.1232 critic_loss=38423379740.4444 entropy=12.8421 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0547 front_blocked=0
|
|
[Eval 10740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550240.6 mean_steps=12.4
|
|
[Episode 10750] reward=-27549322.2 actor_loss=0.0729 critic_loss=32263193349.6889 entropy=12.8587 approx_kl=0.0013 kl_stop=0 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 10760] reward=-39681726.7 actor_loss=0.1595 critic_loss=49877282907.0222 entropy=12.8733 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 10760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-654929.7 mean_steps=12.3
|
|
[Episode 10770] reward=-35446139.6 actor_loss=0.1775 critic_loss=43420585210.3111 entropy=12.8683 approx_kl=0.0015 kl_stop=0 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 10780] reward=-35303391.3 actor_loss=0.1307 critic_loss=44100536456.5333 entropy=12.8556 approx_kl=0.0011 kl_stop=0 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 10780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569139.5 mean_steps=13.2
|
|
[Episode 10790] reward=-24840113.5 actor_loss=0.1100 critic_loss=28195491248.3556 entropy=12.8505 approx_kl=0.0015 kl_stop=0 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 10800] reward=-25691690.7 actor_loss=0.0731 critic_loss=31119315490.1333 entropy=12.8545 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0462 front_blocked=0
|
|
[Eval 10800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382755.9 mean_steps=15.6
|
|
[Episode 10810] reward=-23249538.7 actor_loss=0.0835 critic_loss=27643992246.0444 entropy=12.8484 approx_kl=0.0014 kl_stop=0 intervention_rate=0.0430 front_blocked=0
|
|
[Episode 10820] reward=-29857269.0 actor_loss=0.1288 critic_loss=35889409410.8444 entropy=12.8555 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 10820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-658655.8 mean_steps=12.4
|
|
[Episode 10830] reward=-31193826.2 actor_loss=0.0993 critic_loss=36122352116.6222 entropy=12.8697 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0508 front_blocked=0
|
|
[Episode 10840] reward=-30355746.4 actor_loss=0.0696 critic_loss=37444509513.9556 entropy=12.8688 approx_kl=0.0017 kl_stop=0 intervention_rate=0.0462 front_blocked=0
|
|
[Eval 10840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479477.2 mean_steps=14.1
|
|
[Episode 10850] reward=-51054910.8 actor_loss=0.2413 critic_loss=64768796717.5111 entropy=12.8977 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 10860] reward=-35200658.3 actor_loss=0.1216 critic_loss=40399574539.3778 entropy=12.9102 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 10860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464110.9 mean_steps=14.8
|
|
[Episode 10870] reward=-27392116.1 actor_loss=0.1200 critic_loss=31240643834.3111 entropy=12.9141 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 10880] reward=-20575027.7 actor_loss=0.0662 critic_loss=24283358367.2889 entropy=12.9042 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0404 front_blocked=0
|
|
[Eval 10880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456148.3 mean_steps=13.6
|
|
[Episode 10890] reward=-14696978.5 actor_loss=0.0775 critic_loss=17023893458.4889 entropy=12.8992 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 10900] reward=-33869188.0 actor_loss=0.0810 critic_loss=41269595886.9333 entropy=12.9065 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0521 front_blocked=0
|
|
[Eval 10900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496291.0 mean_steps=14.8
|
|
[Episode 10910] reward=-16041355.6 actor_loss=0.1733 critic_loss=17750515029.3333 entropy=12.9147 approx_kl=0.0014 kl_stop=0 intervention_rate=0.0475 front_blocked=0
|
|
[Episode 10920] reward=-30038164.3 actor_loss=0.0977 critic_loss=35917448032.7111 entropy=12.9214 approx_kl=0.0017 kl_stop=0 intervention_rate=0.0508 front_blocked=0
|
|
[Eval 10920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589718.9 mean_steps=13.7
|
|
[Episode 10930] reward=-44948772.4 actor_loss=0.1513 critic_loss=53874062677.3333 entropy=12.9045 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 10940] reward=-20571164.9 actor_loss=0.0651 critic_loss=24086974372.9778 entropy=12.8922 approx_kl=0.0014 kl_stop=0 intervention_rate=0.0417 front_blocked=0
|
|
[Eval 10940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446846.5 mean_steps=14.4
|
|
[Episode 10950] reward=-47588126.1 actor_loss=0.1785 critic_loss=57115691235.5556 entropy=12.9084 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 10960] reward=-25501366.3 actor_loss=0.1200 critic_loss=31558801908.6222 entropy=12.9175 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0521 front_blocked=0
|
|
[Eval 10960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594680.2 mean_steps=12.7
|
|
[Episode 10970] reward=-38392635.9 actor_loss=0.1128 critic_loss=47215243264.0000 entropy=12.9190 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0592 front_blocked=0
|
|
[Episode 10980] reward=-39667463.7 actor_loss=0.1675 critic_loss=47544914557.1556 entropy=12.9387 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 10980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493251.8 mean_steps=13.7
|
|
[Episode 10990] reward=-39381742.0 actor_loss=0.1455 critic_loss=47951089299.9111 entropy=12.9387 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0625 front_blocked=0
|
|
[Episode 11000] reward=-44839691.7 actor_loss=0.1777 critic_loss=54860111325.8667 entropy=12.9142 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 11000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-617945.0 mean_steps=13.1
|
|
[Episode 11010] reward=-43071491.1 actor_loss=0.1930 critic_loss=54113964123.0222 entropy=12.9276 approx_kl=0.0011 kl_stop=0 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 11020] reward=-25993299.5 actor_loss=0.0725 critic_loss=30215061549.5111 entropy=12.9617 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0443 front_blocked=0
|
|
[Eval 11020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440896.6 mean_steps=15.4
|
|
[Episode 11030] reward=-29878791.5 actor_loss=0.0858 critic_loss=35075091478.7556 entropy=12.9944 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 11040] reward=-27283410.8 actor_loss=0.1263 critic_loss=32971684477.1556 entropy=12.9808 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0508 front_blocked=0
|
|
[Eval 11040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541202.4 mean_steps=12.9
|
|
[Episode 11050] reward=-53361968.3 actor_loss=0.1325 critic_loss=66197457578.6667 entropy=12.9745 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 11060] reward=-30999965.6 actor_loss=0.0759 critic_loss=37460002042.3111 entropy=12.9817 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 11060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401546.7 mean_steps=15.6
|
|
[Episode 11070] reward=-44672210.4 actor_loss=0.2506 critic_loss=52350882065.0667 entropy=12.9951 approx_kl=0.0006 kl_stop=0 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 11080] reward=-33393411.6 actor_loss=0.1702 critic_loss=38703569851.7333 entropy=13.0014 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 11080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420494.5 mean_steps=15.8
|
|
[Episode 11090] reward=-35033180.5 actor_loss=0.1640 critic_loss=39745028733.1556 entropy=13.0248 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 11100] reward=-36742964.5 actor_loss=0.1476 critic_loss=43296902530.8444 entropy=13.0337 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 11100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530907.2 mean_steps=14.2
|
|
[Episode 11110] reward=-26179297.1 actor_loss=0.1257 critic_loss=29993233430.7556 entropy=13.0354 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 11120] reward=-30289808.5 actor_loss=0.0918 critic_loss=36191405260.8000 entropy=13.0375 approx_kl=0.0041 kl_stop=0 intervention_rate=0.0521 front_blocked=0
|
|
[Eval 11120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614150.1 mean_steps=13.1
|
|
[Episode 11130] reward=-58645425.0 actor_loss=0.1901 critic_loss=73558812808.5333 entropy=13.0506 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 11140] reward=-52822428.2 actor_loss=0.2605 critic_loss=64518213267.9111 entropy=13.0521 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 11140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-381734.3 mean_steps=15.8
|
|
[Episode 11150] reward=-39849754.9 actor_loss=0.1733 critic_loss=47122858348.0889 entropy=13.0614 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 11160] reward=-53895374.1 actor_loss=0.2023 critic_loss=65001134489.6000 entropy=13.0826 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 11160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440653.6 mean_steps=14.8
|
|
[Episode 11170] reward=-46777970.4 actor_loss=0.1934 critic_loss=56952240264.5333 entropy=13.0669 approx_kl=0.0053 kl_stop=0 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 11180] reward=-53748581.8 actor_loss=0.1643 critic_loss=64516469373.1556 entropy=13.0855 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 11180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-429693.7 mean_steps=13.7
|
|
[Episode 11190] reward=-40408345.4 actor_loss=0.2525 critic_loss=46925352504.8889 entropy=13.0794 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 11200] reward=-62019131.3 actor_loss=0.2263 critic_loss=77809589907.9111 entropy=13.0911 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 11200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580275.4 mean_steps=13.0
|
|
[Episode 11210] reward=-52077420.2 actor_loss=0.2175 critic_loss=63100104612.9778 entropy=13.0970 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 11220] reward=-42139564.0 actor_loss=0.2250 critic_loss=51221467044.9778 entropy=13.0938 approx_kl=0.0042 kl_stop=0 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 11220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503221.2 mean_steps=14.5
|
|
[Episode 11230] reward=-48221471.5 actor_loss=0.1328 critic_loss=60123086392.8889 entropy=13.0999 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 11240] reward=-35296971.2 actor_loss=0.0685 critic_loss=41731443370.6667 entropy=13.1012 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 11240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418349.9 mean_steps=15.9
|
|
[Episode 11250] reward=-54677825.8 actor_loss=0.1659 critic_loss=66216399576.1778 entropy=13.1158 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 11260] reward=-35515236.1 actor_loss=0.1379 critic_loss=41854235898.3111 entropy=13.1448 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 11260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-558900.3 mean_steps=14.9
|
|
[Episode 11270] reward=-28239547.7 actor_loss=0.0873 critic_loss=31461045179.7333 entropy=13.1584 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 11280] reward=-43605944.4 actor_loss=0.1322 critic_loss=53454746874.3111 entropy=13.1704 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 11280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-379048.6 mean_steps=16.1
|
|
[Episode 11290] reward=-49415713.4 actor_loss=0.2603 critic_loss=60301070609.0667 entropy=13.1720 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 11300] reward=-41754673.9 actor_loss=0.1865 critic_loss=50448774849.4222 entropy=13.1769 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 11300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555119.9 mean_steps=13.2
|
|
[Episode 11310] reward=-43809064.4 actor_loss=0.1452 critic_loss=52450805987.5556 entropy=13.1681 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 11320] reward=-29489508.3 actor_loss=0.1154 critic_loss=35036794971.0222 entropy=13.1787 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0534 front_blocked=0
|
|
[Eval 11320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410124.3 mean_steps=15.7
|
|
[Episode 11330] reward=-54107997.4 actor_loss=0.2771 critic_loss=64998782202.3111 entropy=13.1837 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 11340] reward=-58225886.4 actor_loss=0.3018 critic_loss=69828118209.4222 entropy=13.2090 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 11340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-358168.3 mean_steps=15.4
|
|
[Episode 11350] reward=-48960303.6 actor_loss=0.2307 critic_loss=59495070196.6222 entropy=13.2220 approx_kl=0.0041 kl_stop=0 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 11360] reward=-49891584.2 actor_loss=0.2514 critic_loss=58945626476.0889 entropy=13.2061 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 11360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563403.5 mean_steps=13.3
|
|
[Episode 11370] reward=-54117116.3 actor_loss=0.2598 critic_loss=66150018798.9333 entropy=13.2030 approx_kl=0.0028 kl_stop=0 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 11380] reward=-57748903.3 actor_loss=0.2166 critic_loss=71658355552.7111 entropy=13.2009 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 11380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-597226.6 mean_steps=13.8
|
|
[Episode 11390] reward=-37315797.8 actor_loss=0.2087 critic_loss=44940951916.0889 entropy=13.2070 approx_kl=0.0028 kl_stop=0 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 11400] reward=-64677515.5 actor_loss=0.1778 critic_loss=79319477998.9333 entropy=13.1979 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 11400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-400581.5 mean_steps=14.6
|
|
[Episode 11410] reward=-62523776.8 actor_loss=0.1626 critic_loss=79111332568.1778 entropy=13.2029 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 11420] reward=-41651028.7 actor_loss=0.2368 critic_loss=48713200799.2889 entropy=13.1985 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 11420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522618.0 mean_steps=14.0
|
|
[Episode 11430] reward=-47833106.2 actor_loss=0.2562 critic_loss=55838016944.3556 entropy=13.2032 approx_kl=0.0042 kl_stop=0 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 11440] reward=-50160496.6 actor_loss=0.1856 critic_loss=62851599200.7111 entropy=13.2006 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 11440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536267.9 mean_steps=13.2
|
|
[Episode 11450] reward=-69502736.9 actor_loss=0.2772 critic_loss=88633734303.2889 entropy=13.2019 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0983 front_blocked=0
|
|
[Episode 11460] reward=-49978352.7 actor_loss=0.1555 critic_loss=60797677932.0889 entropy=13.2321 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 11460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508876.5 mean_steps=14.8
|
|
[Episode 11470] reward=-44835482.1 actor_loss=0.1797 critic_loss=54398083345.0667 entropy=13.2068 approx_kl=0.0023 kl_stop=0 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 11480] reward=-30707047.5 actor_loss=0.1495 critic_loss=37226436653.5111 entropy=13.2231 approx_kl=0.0018 kl_stop=0 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 11480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566014.4 mean_steps=13.0
|
|
[Episode 11490] reward=-50735264.7 actor_loss=0.2218 critic_loss=60795185470.5778 entropy=13.2201 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 11500] reward=-48424085.4 actor_loss=0.2574 critic_loss=59929841755.0222 entropy=13.2462 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 11500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421061.2 mean_steps=15.4
|
|
[Episode 11510] reward=-52733466.3 actor_loss=0.1734 critic_loss=62919542101.3333 entropy=13.2372 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 11520] reward=-54159365.9 actor_loss=0.2320 critic_loss=66130308027.7333 entropy=13.2236 approx_kl=0.0049 kl_stop=0 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 11520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434325.9 mean_steps=15.9
|
|
[Episode 11530] reward=-50970692.4 actor_loss=0.2112 critic_loss=61011652608.0000 entropy=13.2567 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 11540] reward=-67279005.1 actor_loss=0.2620 critic_loss=82167685484.0889 entropy=13.2769 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1055 front_blocked=0
|
|
[Eval 11540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506513.5 mean_steps=14.1
|
|
[Episode 11550] reward=-55627756.0 actor_loss=0.2202 critic_loss=67522012501.3333 entropy=13.2794 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 11560] reward=-67415520.6 actor_loss=0.2168 critic_loss=83009469189.6889 entropy=13.2820 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 11560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467972.7 mean_steps=15.3
|
|
[Episode 11570] reward=-59568498.2 actor_loss=0.2156 critic_loss=73232592531.9111 entropy=13.2983 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 11580] reward=-59447997.1 actor_loss=0.2831 critic_loss=70696282612.6222 entropy=13.3094 approx_kl=0.0054 kl_stop=0 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 11580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-647138.8 mean_steps=12.7
|
|
[Episode 11590] reward=-45053482.9 actor_loss=0.1914 critic_loss=51590944267.3778 entropy=13.2990 approx_kl=0.0041 kl_stop=0 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 11600] reward=-57889358.4 actor_loss=0.2965 critic_loss=70570918252.0889 entropy=13.3071 approx_kl=0.0052 kl_stop=0 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 11600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383657.6 mean_steps=16.7
|
|
[Episode 11610] reward=-39649194.1 actor_loss=0.2414 critic_loss=46934551665.7778 entropy=13.3198 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 11620] reward=-44465558.9 actor_loss=0.2126 critic_loss=54767289685.3333 entropy=13.3050 approx_kl=0.0040 kl_stop=0 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 11620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474613.2 mean_steps=15.7
|
|
[Episode 11630] reward=-54135038.8 actor_loss=0.2252 critic_loss=69204190913.4222 entropy=13.2935 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 11640] reward=-71415408.9 actor_loss=0.2965 critic_loss=86473045697.4222 entropy=13.3163 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 11640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530996.5 mean_steps=14.8
|
|
[Episode 11650] reward=-57022444.7 actor_loss=0.2726 critic_loss=70155979889.7778 entropy=13.3115 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 11660] reward=-43181429.5 actor_loss=0.2400 critic_loss=51946742670.2222 entropy=13.2999 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 11660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400432.2 mean_steps=17.1
|
|
[Episode 11670] reward=-34806649.6 actor_loss=0.2075 critic_loss=40220441304.1778 entropy=13.3041 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 11680] reward=-39123634.0 actor_loss=0.2642 critic_loss=44419977944.1778 entropy=13.3085 approx_kl=0.0049 kl_stop=0 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 11680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612003.7 mean_steps=13.2
|
|
[Episode 11690] reward=-48086150.3 actor_loss=0.2038 critic_loss=57670499168.7111 entropy=13.3200 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 11700] reward=-74220794.4 actor_loss=0.3412 critic_loss=90454708497.0667 entropy=13.3389 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 11700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547205.2 mean_steps=13.9
|
|
[Episode 11710] reward=-52841438.3 actor_loss=0.1446 critic_loss=64641944234.6667 entropy=13.3593 approx_kl=0.0066 kl_stop=0 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 11720] reward=-58101430.4 actor_loss=0.2887 critic_loss=70563321446.4000 entropy=13.3858 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0983 front_blocked=0
|
|
[Eval 11720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510280.1 mean_steps=13.6
|
|
[Episode 11730] reward=-59323382.8 actor_loss=0.2760 critic_loss=73138701107.2000 entropy=13.3857 approx_kl=0.0051 kl_stop=0 intervention_rate=0.0996 front_blocked=0
|
|
[Episode 11740] reward=-70177032.9 actor_loss=0.2987 critic_loss=87547790222.2222 entropy=13.4102 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1068 front_blocked=0
|
|
[Eval 11740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579044.9 mean_steps=13.4
|
|
[Episode 11750] reward=-54775789.7 actor_loss=0.3456 critic_loss=65437422478.2222 entropy=13.4116 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1003 front_blocked=0
|
|
[Episode 11760] reward=-65801138.8 actor_loss=0.3541 critic_loss=82414797528.1778 entropy=13.4245 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Eval 11760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503797.7 mean_steps=13.6
|
|
[Episode 11770] reward=-66477215.5 actor_loss=0.2819 critic_loss=82022158791.1111 entropy=13.4496 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1003 front_blocked=0
|
|
[Episode 11780] reward=-58386219.0 actor_loss=0.2115 critic_loss=71266534013.1555 entropy=13.4468 approx_kl=0.0065 kl_stop=0 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 11780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452408.4 mean_steps=15.8
|
|
[Episode 11790] reward=-63258637.3 actor_loss=0.2722 critic_loss=75769820319.2889 entropy=13.4394 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1016 front_blocked=0
|
|
[Episode 11800] reward=-81307047.7 actor_loss=0.2528 critic_loss=101900033683.9111 entropy=13.4754 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 11800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563621.6 mean_steps=14.1
|
|
[Episode 11810] reward=-59085266.3 actor_loss=0.2561 critic_loss=71990128184.8889 entropy=13.5158 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 11820] reward=-68240885.9 actor_loss=0.2704 critic_loss=84500153594.3111 entropy=13.5227 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1016 front_blocked=0
|
|
[Eval 11820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-369749.5 mean_steps=17.5
|
|
[Episode 11830] reward=-64990345.0 actor_loss=0.2908 critic_loss=80272383453.8667 entropy=13.5203 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1048 front_blocked=0
|
|
[Episode 11840] reward=-58409248.9 actor_loss=0.2907 critic_loss=71227950694.4000 entropy=13.5432 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 11840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506726.8 mean_steps=14.4
|
|
[Episode 11850] reward=-67929575.8 actor_loss=0.3119 critic_loss=81310334065.7778 entropy=13.5244 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Episode 11860] reward=-57043750.0 actor_loss=0.2794 critic_loss=68636607465.2444 entropy=13.5248 approx_kl=0.0051 kl_stop=0 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 11860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416640.1 mean_steps=15.0
|
|
[Episode 11870] reward=-71223452.6 actor_loss=0.3048 critic_loss=86392137136.3556 entropy=13.5352 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 11880] reward=-53014824.1 actor_loss=0.2276 critic_loss=66548975206.4000 entropy=13.5546 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 11880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532247.2 mean_steps=14.8
|
|
[Episode 11890] reward=-73750157.3 actor_loss=0.2944 critic_loss=89073129335.4667 entropy=13.5635 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Episode 11900] reward=-57205055.2 actor_loss=0.2374 critic_loss=69867294173.8667 entropy=13.5881 approx_kl=0.0040 kl_stop=0 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 11900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413622.7 mean_steps=15.8
|
|
[Episode 11910] reward=-63611559.3 actor_loss=0.3765 critic_loss=77056150641.7778 entropy=13.6100 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 11920] reward=-66234122.6 actor_loss=0.2447 critic_loss=80604409036.8000 entropy=13.6326 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1022 front_blocked=0
|
|
[Eval 11920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410642.6 mean_steps=16.5
|
|
[Episode 11930] reward=-75233540.2 actor_loss=0.3256 critic_loss=92157542035.9111 entropy=13.6401 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 11940] reward=-62225685.5 actor_loss=0.2295 critic_loss=78589057251.5556 entropy=13.6637 approx_kl=0.0041 kl_stop=0 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 11940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521293.8 mean_steps=14.3
|
|
[Episode 11950] reward=-60447496.2 actor_loss=0.1985 critic_loss=74265932868.2667 entropy=13.7113 approx_kl=0.0064 kl_stop=0 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 11960] reward=-67280279.9 actor_loss=0.3538 critic_loss=85781351628.8000 entropy=13.7270 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Eval 11960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-474487.1 mean_steps=13.3
|
|
[Episode 11970] reward=-67533635.4 actor_loss=0.3628 critic_loss=81836960426.6667 entropy=13.7573 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 11980] reward=-63929211.5 actor_loss=0.3145 critic_loss=77939104608.7111 entropy=13.7593 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 11980] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-289919.2 mean_steps=18.6
|
|
[Episode 11990] reward=-64925674.5 actor_loss=0.2777 critic_loss=77671705440.7111 entropy=13.7811 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1022 front_blocked=0
|
|
[Episode 12000] reward=-76944912.0 actor_loss=0.2713 critic_loss=97418758644.6222 entropy=13.7958 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 12000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445327.2 mean_steps=14.7
|
|
[Episode 12010] reward=-87043124.3 actor_loss=0.3329 critic_loss=107587174035.9111 entropy=13.8271 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 12020] reward=-82513496.6 actor_loss=0.3721 critic_loss=102448498460.4444 entropy=13.8278 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 12020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-594732.6 mean_steps=13.9
|
|
[Episode 12030] reward=-76725963.9 actor_loss=0.2790 critic_loss=95819218215.8222 entropy=13.8451 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 12040] reward=-59162739.6 actor_loss=0.2946 critic_loss=73943596418.8445 entropy=13.8420 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0964 front_blocked=0
|
|
[Eval 12040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540258.3 mean_steps=13.4
|
|
[Episode 12050] reward=-75157373.0 actor_loss=0.3132 critic_loss=94310187372.0889 entropy=13.8717 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 12060] reward=-66246585.2 actor_loss=0.3596 critic_loss=83616349661.8667 entropy=13.8680 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 12060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514117.8 mean_steps=14.1
|
|
[Episode 12070] reward=-66503217.0 actor_loss=0.3360 critic_loss=83271357053.1555 entropy=13.8912 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1061 front_blocked=0
|
|
[Episode 12080] reward=-50233109.4 actor_loss=0.2509 critic_loss=61517016450.8444 entropy=13.9188 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 12080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-539928.0 mean_steps=12.6
|
|
[Episode 12090] reward=-62271602.6 actor_loss=0.3196 critic_loss=76678211174.4000 entropy=13.9116 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1074 front_blocked=0
|
|
[Episode 12100] reward=-59336712.4 actor_loss=0.2314 critic_loss=72625773499.7333 entropy=13.9208 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 12100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403529.0 mean_steps=15.3
|
|
[Episode 12110] reward=-73399393.5 actor_loss=0.2890 critic_loss=92070532073.2444 entropy=13.9640 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Episode 12120] reward=-79046688.5 actor_loss=0.3183 critic_loss=98725880172.0889 entropy=13.9518 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 12120] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-328462.9 mean_steps=17.6
|
|
[Episode 12130] reward=-75411634.0 actor_loss=0.3092 critic_loss=91966871415.4667 entropy=13.9817 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 12140] reward=-77552494.0 actor_loss=0.2426 critic_loss=96202292701.8667 entropy=14.0171 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 12140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511361.9 mean_steps=14.1
|
|
[Episode 12150] reward=-87606200.6 actor_loss=0.2896 critic_loss=107866735684.2667 entropy=14.0358 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 12160] reward=-75587461.5 actor_loss=0.3199 critic_loss=91667551027.2000 entropy=14.0485 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 12160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474364.8 mean_steps=13.7
|
|
[Episode 12170] reward=-72705808.2 actor_loss=0.2997 critic_loss=88575569100.8000 entropy=14.0656 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Episode 12180] reward=-78872975.3 actor_loss=0.2487 critic_loss=96054617702.4000 entropy=14.0624 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 12180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526979.9 mean_steps=13.2
|
|
[Episode 12190] reward=-64848902.4 actor_loss=0.3102 critic_loss=79663691821.5111 entropy=14.0689 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1042 front_blocked=0
|
|
[Episode 12200] reward=-71521500.2 actor_loss=0.3116 critic_loss=86973853422.9333 entropy=14.0833 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 12200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445744.2 mean_steps=14.3
|
|
[Episode 12210] reward=-72104622.0 actor_loss=0.2765 critic_loss=87678582237.8667 entropy=14.0759 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Episode 12220] reward=-79636712.0 actor_loss=0.3380 critic_loss=97960679469.5111 entropy=14.0927 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 12220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-461652.3 mean_steps=15.2
|
|
[Episode 12230] reward=-78584466.9 actor_loss=0.3419 critic_loss=95661287378.4889 entropy=14.0999 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 12240] reward=-77330144.3 actor_loss=0.3497 critic_loss=98847813176.8889 entropy=14.1299 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Eval 12240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-398468.1 mean_steps=16.8
|
|
[Episode 12250] reward=-70673099.2 actor_loss=0.3223 critic_loss=86289723300.9778 entropy=14.1411 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 12260] reward=-81925640.7 actor_loss=0.2523 critic_loss=106913038336.0000 entropy=14.1484 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 12260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-441741.5 mean_steps=13.6
|
|
[Episode 12270] reward=-76608918.7 actor_loss=0.3288 critic_loss=96488790789.6889 entropy=14.1739 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 12280] reward=-68916732.2 actor_loss=0.3382 critic_loss=84947082717.8667 entropy=14.1839 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Eval 12280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407923.9 mean_steps=15.8
|
|
[Episode 12290] reward=-66027148.4 actor_loss=0.3055 critic_loss=85126903307.3778 entropy=14.2163 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1055 front_blocked=0
|
|
[Episode 12300] reward=-78601141.1 actor_loss=0.3216 critic_loss=101047126880.7111 entropy=14.2196 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 12300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-386944.0 mean_steps=13.8
|
|
[Episode 12310] reward=-83336937.6 actor_loss=0.2944 critic_loss=104989524332.0889 entropy=14.2278 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 12320] reward=-77248256.8 actor_loss=0.2707 critic_loss=97828343899.0222 entropy=14.2412 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Eval 12320] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-322534.3 mean_steps=15.9
|
|
[Episode 12330] reward=-79719202.0 actor_loss=0.2697 critic_loss=101988736659.9111 entropy=14.2534 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 12340] reward=-81246320.6 actor_loss=0.3612 critic_loss=104483420387.5556 entropy=14.2887 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 12340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-662944.6 mean_steps=12.4
|
|
[Episode 12350] reward=-72591562.1 actor_loss=0.3035 critic_loss=91894395153.0667 entropy=14.2782 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Episode 12360] reward=-91533960.6 actor_loss=0.3541 critic_loss=114191250773.3333 entropy=14.2901 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 12360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441840.9 mean_steps=14.3
|
|
[Episode 12370] reward=-70160105.8 actor_loss=0.3496 critic_loss=85120445826.8445 entropy=14.2912 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Episode 12380] reward=-78253644.1 actor_loss=0.3267 critic_loss=96516470465.4222 entropy=14.2951 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 12380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575582.2 mean_steps=13.3
|
|
[Episode 12390] reward=-89828443.5 actor_loss=0.2873 critic_loss=113358545078.0444 entropy=14.3148 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 12400] reward=-79679918.7 actor_loss=0.3144 critic_loss=99800065274.3111 entropy=14.3244 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Eval 12400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496044.8 mean_steps=13.8
|
|
[Episode 12410] reward=-59029555.7 actor_loss=0.3008 critic_loss=72691463873.4222 entropy=14.3452 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0983 front_blocked=0
|
|
[Episode 12420] reward=-78053361.1 actor_loss=0.4315 critic_loss=96981931167.2889 entropy=14.3484 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 12420] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-720581.7 mean_steps=10.8
|
|
[Episode 12430] reward=-79295224.1 actor_loss=0.4066 critic_loss=100570692903.8222 entropy=14.3743 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 12440] reward=-81641400.8 actor_loss=0.3240 critic_loss=103253241947.0222 entropy=14.3778 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 12440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-341130.5 mean_steps=15.2
|
|
[Episode 12450] reward=-76221187.1 actor_loss=0.4196 critic_loss=95989949144.1778 entropy=14.3897 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 12460] reward=-83618184.2 actor_loss=0.2785 critic_loss=108548949606.4000 entropy=14.4081 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 12460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401944.6 mean_steps=16.1
|
|
[Episode 12470] reward=-70323151.2 actor_loss=0.2947 critic_loss=87585469599.2889 entropy=14.4176 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 12480] reward=-81074961.1 actor_loss=0.3317 critic_loss=104357709596.4444 entropy=14.4245 approx_kl=0.0009 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 12480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540381.1 mean_steps=14.4
|
|
[Episode 12490] reward=-73245637.9 actor_loss=0.2774 critic_loss=92919571342.2222 entropy=14.4191 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1068 front_blocked=0
|
|
[Episode 12500] reward=-76933402.5 actor_loss=0.3195 critic_loss=96797177901.5111 entropy=14.4261 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 12500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542601.3 mean_steps=14.2
|
|
[Episode 12510] reward=-64139445.2 actor_loss=0.3336 critic_loss=79550896173.5111 entropy=14.4390 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1022 front_blocked=0
|
|
[Episode 12520] reward=-80760137.0 actor_loss=0.4062 critic_loss=102188808965.6889 entropy=14.4639 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 12520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516556.8 mean_steps=13.8
|
|
[Episode 12530] reward=-84809072.5 actor_loss=0.3727 critic_loss=108474371094.7556 entropy=14.4621 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 12540] reward=-83625597.8 actor_loss=0.2768 critic_loss=105911374825.2444 entropy=14.4703 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 12540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502095.6 mean_steps=14.1
|
|
[Episode 12550] reward=-84590309.0 actor_loss=0.3437 critic_loss=108033719409.7778 entropy=14.4914 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 12560] reward=-92026862.9 actor_loss=0.3550 critic_loss=117774176347.0222 entropy=14.5075 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 12560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432135.8 mean_steps=14.8
|
|
[Episode 12570] reward=-78286616.6 actor_loss=0.3213 critic_loss=99345365765.6889 entropy=14.5027 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Episode 12580] reward=-82691263.1 actor_loss=0.2404 critic_loss=104750963097.6000 entropy=14.5307 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 12580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-598598.2 mean_steps=11.8
|
|
[Episode 12590] reward=-92474352.8 actor_loss=0.2875 critic_loss=120751027541.3333 entropy=14.5521 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 12600] reward=-72027903.1 actor_loss=0.3443 critic_loss=91270724175.6444 entropy=14.5471 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 12600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-535137.8 mean_steps=12.7
|
|
[Episode 12610] reward=-85850264.7 actor_loss=0.3846 critic_loss=109682425856.0000 entropy=14.5622 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 12620] reward=-80724765.9 actor_loss=0.2873 critic_loss=101105460656.3556 entropy=14.5856 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 12620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-357765.3 mean_steps=15.1
|
|
[Episode 12630] reward=-80735810.4 actor_loss=0.3699 critic_loss=103343220326.4000 entropy=14.5960 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 12640] reward=-79321518.2 actor_loss=0.2886 critic_loss=102033154412.0889 entropy=14.6023 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 12640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-511796.0 mean_steps=12.3
|
|
[Episode 12650] reward=-77877571.3 actor_loss=0.3553 critic_loss=100077278367.2889 entropy=14.6225 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 12660] reward=-72120722.9 actor_loss=0.2976 critic_loss=92939431662.9333 entropy=14.6138 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1068 front_blocked=0
|
|
[Eval 12660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600267.4 mean_steps=13.1
|
|
[Episode 12670] reward=-88002025.6 actor_loss=0.3006 critic_loss=111559387272.5333 entropy=14.6404 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 12680] reward=-86302028.8 actor_loss=0.3313 critic_loss=110748310095.6444 entropy=14.6463 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 12680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516320.8 mean_steps=14.4
|
|
[Episode 12690] reward=-86760524.4 actor_loss=0.2569 critic_loss=111786356644.9778 entropy=14.6658 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 12700] reward=-74283138.6 actor_loss=0.3038 critic_loss=94385584355.5556 entropy=14.6919 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 12700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-300743.1 mean_steps=16.4
|
|
[Episode 12710] reward=-86195122.3 actor_loss=0.3348 critic_loss=110883056298.6667 entropy=14.6986 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 12720] reward=-79386139.3 actor_loss=0.3553 critic_loss=101987578311.1111 entropy=14.7170 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 12720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-619775.7 mean_steps=12.2
|
|
[Episode 12730] reward=-82427405.8 actor_loss=0.3890 critic_loss=104536820576.7111 entropy=14.7312 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 12740] reward=-91978050.0 actor_loss=0.2468 critic_loss=123830400887.4667 entropy=14.7520 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Eval 12740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-308033.4 mean_steps=16.4
|
|
[Episode 12750] reward=-84073865.4 actor_loss=0.3744 critic_loss=107338486215.1111 entropy=14.7790 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 12760] reward=-92512854.3 actor_loss=0.3919 critic_loss=119174891656.5333 entropy=14.7940 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 12760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464288.5 mean_steps=14.7
|
|
[Episode 12770] reward=-74912064.8 actor_loss=0.3651 critic_loss=96538602882.8445 entropy=14.8059 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 12780] reward=-88190069.9 actor_loss=0.2290 critic_loss=113941242220.0889 entropy=14.8060 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 12780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527951.2 mean_steps=14.3
|
|
[Episode 12790] reward=-80688759.2 actor_loss=0.3385 critic_loss=103920380950.7556 entropy=14.8174 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 12800] reward=-80871319.6 actor_loss=0.3741 critic_loss=104749033608.5333 entropy=14.8226 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 12800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481315.8 mean_steps=14.9
|
|
[Episode 12810] reward=-86271383.9 actor_loss=0.3484 critic_loss=111663951234.8445 entropy=14.8237 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 12820] reward=-95996377.3 actor_loss=0.3042 critic_loss=127223392028.4444 entropy=14.8193 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 12820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-513790.9 mean_steps=15.1
|
|
[Episode 12830] reward=-72203639.2 actor_loss=0.3429 critic_loss=92044833587.2000 entropy=14.8129 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 12840] reward=-81999814.1 actor_loss=0.3610 critic_loss=106456702065.7778 entropy=14.8179 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 12840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533816.5 mean_steps=14.0
|
|
[Episode 12850] reward=-76453531.8 actor_loss=0.3555 critic_loss=99021803428.9778 entropy=14.8430 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 12860] reward=-89885871.6 actor_loss=0.2709 critic_loss=115175635717.6889 entropy=14.8521 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 12860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368946.0 mean_steps=15.9
|
|
[Episode 12870] reward=-81636507.6 actor_loss=0.4321 critic_loss=110427438011.7333 entropy=14.8479 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 12880] reward=-81453859.4 actor_loss=0.3426 critic_loss=105842708935.1111 entropy=14.8619 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 12880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437702.8 mean_steps=14.3
|
|
[Episode 12890] reward=-89246033.3 actor_loss=0.2741 critic_loss=119240205744.3556 entropy=14.8763 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 12900] reward=-84208235.3 actor_loss=0.3235 critic_loss=108601400797.8667 entropy=14.8718 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 12900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517553.9 mean_steps=15.0
|
|
[Episode 12910] reward=-88227298.1 actor_loss=0.3726 critic_loss=114458957505.4222 entropy=14.8652 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 12920] reward=-82897628.5 actor_loss=0.2320 critic_loss=108066511439.6444 entropy=14.8490 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 12920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474696.5 mean_steps=13.8
|
|
[Episode 12930] reward=-78014643.8 actor_loss=0.3458 critic_loss=98115807550.5778 entropy=14.8587 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 12940] reward=-86021284.6 actor_loss=0.3667 critic_loss=111913365959.1111 entropy=14.8585 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 12940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467320.7 mean_steps=14.1
|
|
[Episode 12950] reward=-81277929.7 actor_loss=0.2483 critic_loss=105335873900.0889 entropy=14.8613 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Episode 12960] reward=-81574318.2 actor_loss=0.3042 critic_loss=105596499103.2889 entropy=14.8793 approx_kl=0.0012 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 12960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363600.6 mean_steps=15.6
|
|
[Episode 12970] reward=-80120013.8 actor_loss=0.3362 critic_loss=100573036726.0444 entropy=14.8780 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 12980] reward=-86340184.4 actor_loss=0.2560 critic_loss=110127003147.3778 entropy=14.8537 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 12980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635486.7 mean_steps=11.8
|
|
[Episode 12990] reward=-93683999.8 actor_loss=0.3468 critic_loss=121437863571.9111 entropy=14.8580 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 13000] reward=-69419330.2 actor_loss=0.4004 critic_loss=86870483308.0889 entropy=14.8627 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 13000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449634.8 mean_steps=14.4
|
|
[Episode 13010] reward=-82996032.1 actor_loss=0.3305 critic_loss=105651975691.3778 entropy=14.8806 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 13020] reward=-94560640.3 actor_loss=0.3640 critic_loss=120661416891.7333 entropy=14.8838 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 13020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439457.0 mean_steps=15.6
|
|
[Episode 13030] reward=-87491185.6 actor_loss=0.3039 critic_loss=113226284873.9556 entropy=14.8851 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 13040] reward=-94582853.9 actor_loss=0.3632 critic_loss=124082637846.7556 entropy=14.9153 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 13040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486152.1 mean_steps=13.8
|
|
[Episode 13050] reward=-81124404.3 actor_loss=0.1931 critic_loss=103909503431.1111 entropy=14.9258 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1035 front_blocked=0
|
|
[Episode 13060] reward=-83047247.3 actor_loss=0.4168 critic_loss=105297627272.5333 entropy=14.9256 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 13060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-362418.9 mean_steps=15.7
|
|
[Episode 13070] reward=-85584014.3 actor_loss=0.3570 critic_loss=111087848652.8000 entropy=14.9174 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 13080] reward=-84521430.5 actor_loss=0.3281 critic_loss=111498438064.3556 entropy=14.9376 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 13080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-666416.5 mean_steps=12.0
|
|
[Episode 13090] reward=-74998647.6 actor_loss=0.4680 critic_loss=95013035485.8667 entropy=14.9638 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 13100] reward=-84126801.7 actor_loss=0.2596 critic_loss=109844272651.3778 entropy=14.9708 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Eval 13100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564274.4 mean_steps=13.5
|
|
[Episode 13110] reward=-85907927.9 actor_loss=0.3349 critic_loss=110464811736.1778 entropy=14.9716 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 13120] reward=-72213020.6 actor_loss=0.4076 critic_loss=91396487941.6889 entropy=14.9693 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 13120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568962.4 mean_steps=13.4
|
|
[Episode 13130] reward=-78328389.1 actor_loss=0.3201 critic_loss=98848193467.7333 entropy=14.9773 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 13140] reward=-84862631.2 actor_loss=0.3308 critic_loss=109104868010.6667 entropy=15.0007 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 13140] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642823.4 mean_steps=11.8
|
|
[Episode 13150] reward=-88116744.9 actor_loss=0.3256 critic_loss=115640660969.2444 entropy=15.0141 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 13160] reward=-88962413.8 actor_loss=0.4367 critic_loss=113869219066.3111 entropy=15.0062 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 13160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451577.2 mean_steps=15.7
|
|
[Episode 13170] reward=-84710198.7 actor_loss=0.2496 critic_loss=111815333387.3778 entropy=15.0303 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 13180] reward=-95364196.7 actor_loss=0.3019 critic_loss=124296569924.2667 entropy=15.0103 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 13180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465731.0 mean_steps=14.7
|
|
[Episode 13190] reward=-96123435.5 actor_loss=0.3044 critic_loss=117418585838.9333 entropy=14.9937 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 13200] reward=-83622139.7 actor_loss=0.4636 critic_loss=110855484211.2000 entropy=14.9865 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 13200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483628.5 mean_steps=14.8
|
|
[Episode 13210] reward=-91585869.0 actor_loss=0.3784 critic_loss=120511270638.9333 entropy=15.0087 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 13220] reward=-92457123.8 actor_loss=0.3585 critic_loss=120909177924.2667 entropy=15.0185 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 13220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453328.1 mean_steps=14.6
|
|
[Episode 13230] reward=-92700024.4 actor_loss=0.3105 critic_loss=120585477233.7778 entropy=15.0348 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 13240] reward=-90824433.2 actor_loss=0.4387 critic_loss=118221533457.0667 entropy=15.0501 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 13240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521897.0 mean_steps=14.2
|
|
[Episode 13250] reward=-72320939.3 actor_loss=0.3524 critic_loss=93166251030.7556 entropy=15.0597 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Episode 13260] reward=-94139835.0 actor_loss=0.3891 critic_loss=121512692212.6222 entropy=15.0640 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 13260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485450.6 mean_steps=14.3
|
|
[Episode 13270] reward=-98894601.9 actor_loss=0.4256 critic_loss=130672457227.3778 entropy=15.0802 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 13280] reward=-84803481.9 actor_loss=0.3822 critic_loss=109328937961.2444 entropy=15.0983 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 13280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604258.1 mean_steps=13.0
|
|
[Episode 13290] reward=-86932173.9 actor_loss=0.3049 critic_loss=112224262553.6000 entropy=15.1087 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 13300] reward=-95275052.4 actor_loss=0.2988 critic_loss=125391005832.5333 entropy=15.1167 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 13300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400240.9 mean_steps=15.2
|
|
[Episode 13310] reward=-87677581.2 actor_loss=0.4167 critic_loss=116182433063.8222 entropy=15.1173 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 13320] reward=-89899978.6 actor_loss=0.3498 critic_loss=118809775308.8000 entropy=15.1309 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 13320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371822.0 mean_steps=16.2
|
|
[Episode 13330] reward=-87990854.7 actor_loss=0.2672 critic_loss=116413228100.2667 entropy=15.1404 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 13340] reward=-79185024.7 actor_loss=0.3610 critic_loss=100374644326.4000 entropy=15.1645 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 13340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353311.4 mean_steps=16.8
|
|
[Episode 13350] reward=-85680662.8 actor_loss=0.3546 critic_loss=113848939861.3333 entropy=15.1666 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 13360] reward=-86972408.1 actor_loss=0.4065 critic_loss=112293948984.8889 entropy=15.1738 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 13360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437203.7 mean_steps=14.9
|
|
[Episode 13370] reward=-88747130.7 actor_loss=0.4648 critic_loss=116243072068.2667 entropy=15.1892 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 13380] reward=-84536496.0 actor_loss=0.3929 critic_loss=106744185924.2667 entropy=15.2167 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 13380] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620113.3 mean_steps=11.8
|
|
[Episode 13390] reward=-90411566.3 actor_loss=0.2605 critic_loss=117925646244.9778 entropy=15.2109 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 13400] reward=-90830013.1 actor_loss=0.3308 critic_loss=118013754299.7333 entropy=15.1955 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 13400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525595.5 mean_steps=13.6
|
|
[Episode 13410] reward=-87595493.7 actor_loss=0.3603 critic_loss=111185844451.5556 entropy=15.2007 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 13420] reward=-80132663.7 actor_loss=0.5092 critic_loss=103977012974.9333 entropy=15.2044 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 13420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516552.2 mean_steps=13.2
|
|
[Episode 13430] reward=-84569540.3 actor_loss=0.4505 critic_loss=110117955902.5778 entropy=15.2152 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 13440] reward=-80106201.4 actor_loss=0.3753 critic_loss=102752660684.8000 entropy=15.2055 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 13440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553670.3 mean_steps=13.8
|
|
[Episode 13450] reward=-89444000.7 actor_loss=0.2486 critic_loss=116611829304.8889 entropy=15.2156 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 13460] reward=-97747397.8 actor_loss=0.2497 critic_loss=125289899986.4889 entropy=15.2272 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 13460] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-355381.2 mean_steps=17.1
|
|
[Episode 13470] reward=-93611198.0 actor_loss=0.2702 critic_loss=124539081523.2000 entropy=15.2381 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 13480] reward=-93610441.3 actor_loss=0.3163 critic_loss=121399288718.2222 entropy=15.2486 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 13480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534256.5 mean_steps=13.6
|
|
[Episode 13490] reward=-95800283.4 actor_loss=0.2829 critic_loss=127286318239.2889 entropy=15.2620 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 13500] reward=-88050131.9 actor_loss=0.4310 critic_loss=117188138689.4222 entropy=15.2805 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 13500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551719.1 mean_steps=12.6
|
|
[Episode 13510] reward=-82465105.2 actor_loss=0.3843 critic_loss=108291978581.3333 entropy=15.3056 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 13520] reward=-94736836.5 actor_loss=0.3766 critic_loss=123999234184.5333 entropy=15.2855 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 13520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478442.7 mean_steps=15.2
|
|
[Episode 13530] reward=-93605273.2 actor_loss=0.2861 critic_loss=122114989169.7778 entropy=15.2987 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 13540] reward=-98852738.8 actor_loss=0.3893 critic_loss=131015455721.2444 entropy=15.3144 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 13540] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-583098.5 mean_steps=10.8
|
|
[Episode 13550] reward=-97273938.8 actor_loss=0.2434 critic_loss=130962594383.6444 entropy=15.3121 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 13560] reward=-78611923.2 actor_loss=0.4806 critic_loss=105181267740.4444 entropy=15.3326 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 13560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541770.0 mean_steps=13.6
|
|
[Episode 13570] reward=-101373420.7 actor_loss=0.3750 critic_loss=136344269801.2444 entropy=15.3303 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 13580] reward=-85463490.7 actor_loss=0.3530 critic_loss=110064928722.4889 entropy=15.3444 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 13580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502859.2 mean_steps=14.4
|
|
[Episode 13590] reward=-97852114.3 actor_loss=0.3114 critic_loss=131508905756.4444 entropy=15.3551 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 13600] reward=-87990413.4 actor_loss=0.3905 critic_loss=114938188959.2889 entropy=15.3713 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 13600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509320.2 mean_steps=13.9
|
|
[Episode 13610] reward=-100865427.8 actor_loss=0.3327 critic_loss=129356809102.2222 entropy=15.3590 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 13620] reward=-90764127.5 actor_loss=0.3128 critic_loss=117597854651.7333 entropy=15.3548 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 13620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463256.7 mean_steps=14.8
|
|
[Episode 13630] reward=-90113151.7 actor_loss=0.3577 critic_loss=116618887350.0444 entropy=15.3934 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 13640] reward=-92986290.9 actor_loss=0.2961 critic_loss=122318694354.4889 entropy=15.4150 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 13640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578881.2 mean_steps=12.9
|
|
[Episode 13650] reward=-89679872.6 actor_loss=0.3179 critic_loss=120008666134.7556 entropy=15.4253 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 13660] reward=-95418203.6 actor_loss=0.4039 critic_loss=124292574230.7556 entropy=15.4534 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 13660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417353.2 mean_steps=15.7
|
|
[Episode 13670] reward=-89030302.1 actor_loss=0.3358 critic_loss=115939013791.2889 entropy=15.4444 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 13680] reward=-91002273.3 actor_loss=0.3848 critic_loss=118510871256.1778 entropy=15.4462 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 13680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442565.6 mean_steps=14.6
|
|
[Episode 13690] reward=-90418551.7 actor_loss=0.3499 critic_loss=119290470035.9111 entropy=15.4521 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 13700] reward=-104374137.6 actor_loss=0.3231 critic_loss=139617212097.4222 entropy=15.4678 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 13700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451528.1 mean_steps=14.3
|
|
[Episode 13710] reward=-100704749.9 actor_loss=0.2615 critic_loss=132621353142.0444 entropy=15.4945 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 13720] reward=-102226778.5 actor_loss=0.3572 critic_loss=134076204646.4000 entropy=15.5028 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 13720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517517.8 mean_steps=13.2
|
|
[Episode 13730] reward=-100661408.8 actor_loss=0.3075 critic_loss=133938034369.4222 entropy=15.5161 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 13740] reward=-100636155.9 actor_loss=0.3184 critic_loss=130935372003.5556 entropy=15.5295 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 13740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495208.5 mean_steps=14.6
|
|
[Episode 13750] reward=-98169074.3 actor_loss=0.3177 critic_loss=131508902297.6000 entropy=15.5211 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 13760] reward=-86185295.9 actor_loss=0.3213 critic_loss=113397616731.0222 entropy=15.5266 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 13760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409455.7 mean_steps=14.9
|
|
[Episode 13770] reward=-101569645.8 actor_loss=0.3407 critic_loss=132484288785.0667 entropy=15.4923 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 13780] reward=-104493684.2 actor_loss=0.3085 critic_loss=138740874717.8667 entropy=15.4978 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 13780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490067.1 mean_steps=13.8
|
|
[Episode 13790] reward=-99602378.5 actor_loss=0.3496 critic_loss=131285507185.7778 entropy=15.5025 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 13800] reward=-101835516.3 actor_loss=0.2755 critic_loss=129382769550.2222 entropy=15.5004 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 13800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487162.1 mean_steps=13.9
|
|
[Episode 13810] reward=-98539186.9 actor_loss=0.4422 critic_loss=131774088260.2667 entropy=15.5188 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 13820] reward=-96574967.9 actor_loss=0.4039 critic_loss=128131743561.9556 entropy=15.5116 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 13820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-483865.5 mean_steps=12.1
|
|
[Episode 13830] reward=-91734479.1 actor_loss=0.3986 critic_loss=117059158016.0000 entropy=15.4984 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 13840] reward=-86486779.3 actor_loss=0.4166 critic_loss=113449264560.3556 entropy=15.5180 approx_kl=0.0010 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 13840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547193.3 mean_steps=13.1
|
|
[Episode 13850] reward=-94871105.6 actor_loss=0.3136 critic_loss=125178974481.0667 entropy=15.5307 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 13860] reward=-94262472.3 actor_loss=0.3837 critic_loss=122210189858.1333 entropy=15.5445 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 13860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509086.4 mean_steps=13.9
|
|
[Episode 13870] reward=-104304642.0 actor_loss=0.4439 critic_loss=138331521570.1333 entropy=15.5602 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 13880] reward=-98700971.6 actor_loss=0.3258 critic_loss=130127212999.1111 entropy=15.5721 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 13880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449299.5 mean_steps=15.6
|
|
[Episode 13890] reward=-89016467.3 actor_loss=0.3500 critic_loss=117450670262.0444 entropy=15.5686 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 13900] reward=-93457581.0 actor_loss=0.3750 critic_loss=124847032820.6222 entropy=15.5694 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 13900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603600.4 mean_steps=12.2
|
|
[Episode 13910] reward=-98099957.4 actor_loss=0.3059 critic_loss=128595331936.7111 entropy=15.5773 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 13920] reward=-94227390.9 actor_loss=0.3059 critic_loss=125037819949.5111 entropy=15.5932 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 13920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-417085.8 mean_steps=14.7
|
|
[Episode 13930] reward=-95683160.2 actor_loss=0.3526 critic_loss=128075026340.9778 entropy=15.5948 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 13940] reward=-102613740.4 actor_loss=0.2273 critic_loss=141445618619.7333 entropy=15.5909 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 13940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-536103.3 mean_steps=12.4
|
|
[Episode 13950] reward=-98952398.0 actor_loss=0.4051 critic_loss=131612907747.5556 entropy=15.5867 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 13960] reward=-96841267.8 actor_loss=0.3344 critic_loss=132228686916.2667 entropy=15.5809 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 13960] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-734237.3 mean_steps=11.8
|
|
[Episode 13970] reward=-95271379.7 actor_loss=0.4004 critic_loss=124151672740.9778 entropy=15.5470 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 13980] reward=-97599753.7 actor_loss=0.3532 critic_loss=128923456489.2444 entropy=15.5285 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 13980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-372711.7 mean_steps=16.1
|
|
[Episode 13990] reward=-95843438.1 actor_loss=0.3365 critic_loss=129252604131.5556 entropy=15.5478 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 14000] reward=-94118116.0 actor_loss=0.3404 critic_loss=122904119432.5333 entropy=15.5423 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 14000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563656.3 mean_steps=13.6
|
|
[Episode 14010] reward=-105573275.6 actor_loss=0.3053 critic_loss=138160658477.5111 entropy=15.5480 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 14020] reward=-95674000.6 actor_loss=0.3696 critic_loss=125770792777.9556 entropy=15.5632 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 14020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545579.2 mean_steps=12.8
|
|
[Episode 14030] reward=-92035770.5 actor_loss=0.3828 critic_loss=118182182001.7778 entropy=15.5511 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 14040] reward=-98759547.8 actor_loss=0.3362 critic_loss=128403887809.4222 entropy=15.5540 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 14040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530118.0 mean_steps=13.3
|
|
[Episode 14050] reward=-96885175.1 actor_loss=0.2694 critic_loss=127358732970.6667 entropy=15.5524 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 14060] reward=-93515919.4 actor_loss=0.4623 critic_loss=126427290009.6000 entropy=15.5726 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 14060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591033.1 mean_steps=13.8
|
|
[Episode 14070] reward=-96561578.3 actor_loss=0.3075 critic_loss=127051035261.1555 entropy=15.5785 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 14080] reward=-96185360.5 actor_loss=0.3645 critic_loss=122457858776.1778 entropy=15.5958 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 14080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578830.8 mean_steps=13.8
|
|
[Episode 14090] reward=-91655545.6 actor_loss=0.4825 critic_loss=120896658181.6889 entropy=15.6093 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 14100] reward=-89549539.0 actor_loss=0.3190 critic_loss=116453429156.9778 entropy=15.6388 approx_kl=0.0011 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 14100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-553926.9 mean_steps=13.0
|
|
[Episode 14110] reward=-97973169.9 actor_loss=0.3541 critic_loss=132028332623.6444 entropy=15.6479 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 14120] reward=-101849298.1 actor_loss=0.2862 critic_loss=133208022857.9556 entropy=15.6563 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 14120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438952.1 mean_steps=14.8
|
|
[Episode 14130] reward=-98193397.8 actor_loss=0.2992 critic_loss=128509938346.6667 entropy=15.6574 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 14140] reward=-96107627.6 actor_loss=0.3848 critic_loss=124493354507.3778 entropy=15.6731 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 14140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483995.7 mean_steps=16.1
|
|
[Episode 14150] reward=-86083627.8 actor_loss=0.3452 critic_loss=114407122898.4889 entropy=15.6663 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 14160] reward=-88726166.0 actor_loss=0.3318 critic_loss=115681663021.5111 entropy=15.6522 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 14160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-447012.7 mean_steps=16.4
|
|
[Episode 14170] reward=-100959246.0 actor_loss=0.2609 critic_loss=132510486619.0222 entropy=15.6585 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 14180] reward=-91701795.4 actor_loss=0.4053 critic_loss=121126515507.2000 entropy=15.6748 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 14180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-648677.9 mean_steps=13.2
|
|
[Episode 14190] reward=-96711830.4 actor_loss=0.3283 critic_loss=130959960746.6667 entropy=15.6779 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 14200] reward=-97805121.0 actor_loss=0.3982 critic_loss=126544550661.6889 entropy=15.6859 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 14200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467572.7 mean_steps=15.1
|
|
[Episode 14210] reward=-94454846.0 actor_loss=0.3109 critic_loss=122596158486.7556 entropy=15.6974 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 14220] reward=-102577501.1 actor_loss=0.3656 critic_loss=134856974153.9556 entropy=15.6898 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 14220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431953.7 mean_steps=14.9
|
|
[Episode 14230] reward=-106693887.6 actor_loss=0.3946 critic_loss=141388643259.7333 entropy=15.6896 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 14240] reward=-100568036.6 actor_loss=0.4466 critic_loss=137249303756.8000 entropy=15.6989 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 14240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490276.9 mean_steps=14.3
|
|
[Episode 14250] reward=-101853142.8 actor_loss=0.4064 critic_loss=131479998737.0667 entropy=15.6941 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 14260] reward=-88967355.0 actor_loss=0.3137 critic_loss=115149377262.9333 entropy=15.6888 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 14260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614039.2 mean_steps=13.3
|
|
[Episode 14270] reward=-99810109.2 actor_loss=0.3501 critic_loss=132308813960.5333 entropy=15.7054 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 14280] reward=-100037750.3 actor_loss=0.2971 critic_loss=134757867155.9111 entropy=15.6887 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 14280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531801.9 mean_steps=14.3
|
|
[Episode 14290] reward=-106961122.1 actor_loss=0.3778 critic_loss=143953482365.1555 entropy=15.7070 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 14300] reward=-91124631.5 actor_loss=0.4961 critic_loss=117855009541.6889 entropy=15.7177 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 14300] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-278540.7 mean_steps=17.6
|
|
[Episode 14310] reward=-96339091.0 actor_loss=0.3550 critic_loss=123918011232.7111 entropy=15.7071 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 14320] reward=-104822323.8 actor_loss=0.3084 critic_loss=140127500879.6444 entropy=15.6916 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 14320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553354.5 mean_steps=13.6
|
|
[Episode 14330] reward=-97738593.4 actor_loss=0.3801 critic_loss=124736314299.7333 entropy=15.7140 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 14340] reward=-99365366.5 actor_loss=0.3332 critic_loss=132034199369.9556 entropy=15.7029 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 14340] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-240696.0 mean_steps=18.6
|
|
[Episode 14350] reward=-104356363.8 actor_loss=0.3719 critic_loss=137521981758.5778 entropy=15.6948 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 14360] reward=-95141968.7 actor_loss=0.3889 critic_loss=126323095415.4667 entropy=15.7208 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 14360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504787.1 mean_steps=14.2
|
|
[Episode 14370] reward=-100465835.6 actor_loss=0.3363 critic_loss=132125010238.5778 entropy=15.7135 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 14380] reward=-98919482.0 actor_loss=0.4056 critic_loss=132182857773.5111 entropy=15.7268 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 14380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-538014.9 mean_steps=12.7
|
|
[Episode 14390] reward=-98811968.7 actor_loss=0.4396 critic_loss=131011089385.2444 entropy=15.7134 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 14400] reward=-106732431.9 actor_loss=0.2351 critic_loss=142446750378.6667 entropy=15.7199 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 14400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559098.9 mean_steps=12.7
|
|
[Episode 14410] reward=-99437140.4 actor_loss=0.4552 critic_loss=130427298156.0889 entropy=15.7313 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 14420] reward=-100454359.1 actor_loss=0.3217 critic_loss=131870729466.3111 entropy=15.7432 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 14420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579351.4 mean_steps=12.8
|
|
[Episode 14430] reward=-100189999.9 actor_loss=0.3835 critic_loss=134339069178.3111 entropy=15.7467 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 14440] reward=-102951728.2 actor_loss=0.3598 critic_loss=140701625184.7111 entropy=15.7508 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 14440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408053.4 mean_steps=15.5
|
|
[Episode 14450] reward=-89188276.9 actor_loss=0.2954 critic_loss=117832191180.8000 entropy=15.7764 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 14460] reward=-99795577.6 actor_loss=0.4178 critic_loss=135119040785.0667 entropy=15.7788 approx_kl=0.0011 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 14460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499402.4 mean_steps=14.2
|
|
[Episode 14470] reward=-103981502.3 actor_loss=0.2686 critic_loss=139626840974.2222 entropy=15.7758 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 14480] reward=-103954631.7 actor_loss=0.3830 critic_loss=142529097636.9778 entropy=15.7794 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 14480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495308.2 mean_steps=14.1
|
|
[Episode 14490] reward=-100218887.1 actor_loss=0.4050 critic_loss=133192999458.1333 entropy=15.7646 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 14500] reward=-95441564.3 actor_loss=0.4186 critic_loss=124291932160.0000 entropy=15.7460 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 14500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486908.0 mean_steps=14.3
|
|
[Episode 14510] reward=-107786827.3 actor_loss=0.3145 critic_loss=145292291458.8445 entropy=15.7422 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 14520] reward=-103024154.5 actor_loss=0.3404 critic_loss=136673243591.1111 entropy=15.7378 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 14520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431059.1 mean_steps=14.4
|
|
[Episode 14530] reward=-97293478.5 actor_loss=0.4017 critic_loss=128379759456.7111 entropy=15.7360 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 14540] reward=-94491667.1 actor_loss=0.3323 critic_loss=122281858389.3333 entropy=15.7247 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 14540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452491.7 mean_steps=15.9
|
|
[Episode 14550] reward=-101601044.9 actor_loss=0.2789 critic_loss=134949210976.7111 entropy=15.7290 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 14560] reward=-99078630.7 actor_loss=0.4209 critic_loss=132525143017.2444 entropy=15.7247 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 14560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513791.7 mean_steps=14.4
|
|
[Episode 14570] reward=-96944564.2 actor_loss=0.3842 critic_loss=126619911964.4444 entropy=15.7250 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 14580] reward=-97115868.0 actor_loss=0.2986 critic_loss=129174843281.2973 entropy=15.7251 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 14580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419583.5 mean_steps=14.7
|
|
[Episode 14590] reward=-93802643.1 actor_loss=0.3560 critic_loss=121994783766.7556 entropy=15.7277 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 14600] reward=-92782745.1 actor_loss=0.3151 critic_loss=127192673302.7556 entropy=15.7117 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 14600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438711.0 mean_steps=14.8
|
|
[Episode 14610] reward=-101962997.5 actor_loss=0.3056 critic_loss=135067727371.3778 entropy=15.6990 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 14620] reward=-100057203.6 actor_loss=0.3715 critic_loss=134316808419.5556 entropy=15.7081 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 14620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488670.8 mean_steps=14.3
|
|
[Episode 14630] reward=-95940043.2 actor_loss=0.3845 critic_loss=125946926239.2889 entropy=15.7037 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 14640] reward=-93682319.1 actor_loss=0.2732 critic_loss=122441661371.7333 entropy=15.7045 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 14640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-469630.7 mean_steps=13.2
|
|
[Episode 14650] reward=-92169366.6 actor_loss=0.4308 critic_loss=117735882934.0444 entropy=15.7260 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 14660] reward=-94811412.5 actor_loss=0.3183 critic_loss=127968195925.3333 entropy=15.7349 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 14660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424851.6 mean_steps=15.9
|
|
[Episode 14670] reward=-97594150.9 actor_loss=0.4559 critic_loss=127835357366.0444 entropy=15.7254 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 14680] reward=-102055713.9 actor_loss=0.4521 critic_loss=132426362060.8000 entropy=15.7048 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 14680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569617.4 mean_steps=12.8
|
|
[Episode 14690] reward=-105747532.3 actor_loss=0.3193 critic_loss=138098401462.0444 entropy=15.7110 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 14700] reward=-101369577.9 actor_loss=0.3428 critic_loss=129209288476.4444 entropy=15.7074 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 14700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592147.5 mean_steps=12.8
|
|
[Episode 14710] reward=-96175009.6 actor_loss=0.3085 critic_loss=124626322318.2222 entropy=15.6863 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 14720] reward=-103880217.4 actor_loss=0.3288 critic_loss=136094328422.4000 entropy=15.7020 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 14720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-430406.6 mean_steps=17.1
|
|
[Episode 14730] reward=-100344325.4 actor_loss=0.3594 critic_loss=134424815934.5778 entropy=15.7040 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 14740] reward=-96110510.0 actor_loss=0.4521 critic_loss=129239481093.6889 entropy=15.6917 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 14740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500873.1 mean_steps=14.4
|
|
[Episode 14750] reward=-91985358.7 actor_loss=0.3922 critic_loss=122603611204.2667 entropy=15.7064 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 14760] reward=-98746315.9 actor_loss=0.3913 critic_loss=136871930174.5778 entropy=15.6992 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 14760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573306.7 mean_steps=13.0
|
|
[Episode 14770] reward=-100590212.6 actor_loss=0.2819 critic_loss=128942218171.7333 entropy=15.7058 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 14780] reward=-102323703.1 actor_loss=0.2894 critic_loss=130374199796.6222 entropy=15.7131 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 14780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554717.8 mean_steps=12.8
|
|
[Episode 14790] reward=-94903478.6 actor_loss=0.2913 critic_loss=125707714924.0889 entropy=15.7061 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 14800] reward=-104697962.5 actor_loss=0.4338 critic_loss=137590998994.4889 entropy=15.7093 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 14800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559605.6 mean_steps=13.7
|
|
[Episode 14810] reward=-105285269.6 actor_loss=0.2871 critic_loss=136435362656.7111 entropy=15.7119 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 14820] reward=-97448178.4 actor_loss=0.3860 critic_loss=131554706136.1778 entropy=15.7248 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 14820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-413026.9 mean_steps=14.8
|
|
[Episode 14830] reward=-102507232.1 actor_loss=0.3229 critic_loss=135914664027.0222 entropy=15.7136 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 14840] reward=-104168292.7 actor_loss=0.3671 critic_loss=138882878850.8445 entropy=15.7024 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 14840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453134.8 mean_steps=16.4
|
|
[Episode 14850] reward=-96038274.8 actor_loss=0.4143 critic_loss=125710458880.0000 entropy=15.7178 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 14860] reward=-102141804.9 actor_loss=0.3717 critic_loss=134540955921.0667 entropy=15.7415 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 14860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547486.8 mean_steps=14.7
|
|
[Episode 14870] reward=-103066828.0 actor_loss=0.3526 critic_loss=138565671867.7333 entropy=15.7491 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 14880] reward=-94303931.8 actor_loss=0.3743 critic_loss=122907148652.0889 entropy=15.7441 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 14880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402718.8 mean_steps=16.2
|
|
[Episode 14890] reward=-98213548.7 actor_loss=0.3132 critic_loss=127973817275.7333 entropy=15.7627 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 14900] reward=-87911170.3 actor_loss=0.3853 critic_loss=112568568672.7111 entropy=15.7512 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 14900] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-351438.8 mean_steps=17.1
|
|
[Episode 14910] reward=-91385970.7 actor_loss=0.3829 critic_loss=120148641200.3556 entropy=15.7656 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 14920] reward=-102101227.7 actor_loss=0.3592 critic_loss=130461473723.7333 entropy=15.7520 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 14920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577181.9 mean_steps=12.9
|
|
[Episode 14930] reward=-103140259.5 actor_loss=0.3951 critic_loss=136069739861.3333 entropy=15.7894 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 14940] reward=-104619961.5 actor_loss=0.3297 critic_loss=142778280072.5333 entropy=15.8191 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 14940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477029.3 mean_steps=15.0
|
|
[Episode 14950] reward=-97570922.7 actor_loss=0.3584 critic_loss=126996683707.7333 entropy=15.8203 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 14960] reward=-98175958.4 actor_loss=0.3107 critic_loss=130697795447.4667 entropy=15.8231 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 14960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430580.6 mean_steps=15.0
|
|
[Episode 14970] reward=-111621435.7 actor_loss=0.3036 critic_loss=149692724383.2889 entropy=15.8311 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 14980] reward=-89578430.6 actor_loss=0.4404 critic_loss=118847384052.6222 entropy=15.8429 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 14980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-645565.4 mean_steps=11.3
|
|
[Episode 14990] reward=-103249940.2 actor_loss=0.2667 critic_loss=138168125030.4000 entropy=15.8369 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 15000] reward=-96238702.6 actor_loss=0.3771 critic_loss=126427887843.5556 entropy=15.8447 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 15000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-375532.7 mean_steps=17.2
|
|
[Episode 15010] reward=-99643252.0 actor_loss=0.3662 critic_loss=134503050262.7556 entropy=15.8399 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 15020] reward=-97806208.0 actor_loss=0.3331 critic_loss=124899794761.9556 entropy=15.8511 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 15020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-348447.6 mean_steps=16.2
|
|
[Episode 15030] reward=-97792727.6 actor_loss=0.4026 critic_loss=131931838145.4222 entropy=15.8769 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 15040] reward=-104141551.9 actor_loss=0.3333 critic_loss=138811134771.2000 entropy=15.8889 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 15040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424455.5 mean_steps=15.7
|
|
[Episode 15050] reward=-98840325.2 actor_loss=0.4300 critic_loss=133178213990.4000 entropy=15.9173 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 15060] reward=-96167916.8 actor_loss=0.3152 critic_loss=123246243748.9778 entropy=15.9249 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 15060] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-383435.5 mean_steps=18.2
|
|
[Episode 15070] reward=-100868618.7 actor_loss=0.3224 critic_loss=136663903072.7111 entropy=15.9143 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 15080] reward=-105591262.8 actor_loss=0.1977 critic_loss=139266452502.7556 entropy=15.9131 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 15080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557687.8 mean_steps=12.4
|
|
[Episode 15090] reward=-102022421.7 actor_loss=0.2487 critic_loss=139240146716.4445 entropy=15.9053 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 15100] reward=-101038255.9 actor_loss=0.3809 critic_loss=131002015561.9556 entropy=15.9039 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 15100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-397365.3 mean_steps=16.6
|
|
[Episode 15110] reward=-100123129.6 actor_loss=0.3472 critic_loss=132383195500.0889 entropy=15.9181 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 15120] reward=-97911237.0 actor_loss=0.3234 critic_loss=129472343608.8889 entropy=15.9420 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 15120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557264.3 mean_steps=13.3
|
|
[Episode 15130] reward=-95497308.5 actor_loss=0.3101 critic_loss=127429332172.8000 entropy=15.9416 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 15140] reward=-99732205.0 actor_loss=0.3906 critic_loss=130665653498.3111 entropy=15.9688 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 15140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-594496.6 mean_steps=14.9
|
|
[Episode 15150] reward=-92217681.0 actor_loss=0.4052 critic_loss=119215694552.1778 entropy=15.9568 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 15160] reward=-100439123.8 actor_loss=0.3345 critic_loss=131007512758.0444 entropy=15.9372 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 15160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534432.5 mean_steps=14.8
|
|
[Episode 15170] reward=-104271820.0 actor_loss=0.3807 critic_loss=139463661977.6000 entropy=15.9387 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 15180] reward=-98107086.7 actor_loss=0.3007 critic_loss=128167359101.1555 entropy=15.9307 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 15180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431992.3 mean_steps=16.1
|
|
[Episode 15190] reward=-99546346.1 actor_loss=0.3078 critic_loss=130508808009.9556 entropy=15.9329 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 15200] reward=-96047437.6 actor_loss=0.3519 critic_loss=129249752951.4667 entropy=15.9230 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 15200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541483.5 mean_steps=13.7
|
|
[Episode 15210] reward=-102403019.3 actor_loss=0.4074 critic_loss=138795595457.4222 entropy=15.9235 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 15220] reward=-100161679.9 actor_loss=0.3719 critic_loss=134965072509.1555 entropy=15.9329 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 15220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367865.7 mean_steps=16.4
|
|
[Episode 15230] reward=-99910685.9 actor_loss=0.2372 critic_loss=129416172703.2889 entropy=15.9217 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 15240] reward=-105736719.3 actor_loss=0.3249 critic_loss=139982875124.6222 entropy=15.9114 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 15240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442644.1 mean_steps=15.9
|
|
[Episode 15250] reward=-106998514.4 actor_loss=0.3621 critic_loss=137824420932.2667 entropy=15.8793 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 15260] reward=-102178004.7 actor_loss=0.3601 critic_loss=137627613957.6889 entropy=15.8916 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 15260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-498219.2 mean_steps=13.3
|
|
[Episode 15270] reward=-106434662.3 actor_loss=0.3668 critic_loss=141740530619.7333 entropy=15.9044 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 15280] reward=-102373585.1 actor_loss=0.3479 critic_loss=131594072155.0222 entropy=15.9191 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 15280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-673935.2 mean_steps=12.4
|
|
[Episode 15290] reward=-98767624.5 actor_loss=0.4605 critic_loss=130590348629.3333 entropy=15.9287 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 15300] reward=-97540895.5 actor_loss=0.2958 critic_loss=127697983533.5111 entropy=15.9322 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 15300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-556264.1 mean_steps=13.1
|
|
[Episode 15310] reward=-97029936.3 actor_loss=0.4643 critic_loss=128071820174.2222 entropy=15.9325 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 15320] reward=-100997540.9 actor_loss=0.2835 critic_loss=135377196555.3778 entropy=15.9100 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 15320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517240.9 mean_steps=13.6
|
|
[Episode 15330] reward=-105359118.0 actor_loss=0.3828 critic_loss=136984810837.3333 entropy=15.9018 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 15340] reward=-88792149.4 actor_loss=0.3305 critic_loss=116061084421.6889 entropy=15.8861 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 15340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-379023.3 mean_steps=14.9
|
|
[Episode 15350] reward=-100591219.8 actor_loss=0.3672 critic_loss=134489355969.4222 entropy=15.9063 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 15360] reward=-106156171.6 actor_loss=0.2973 critic_loss=140991476531.2000 entropy=15.9074 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 15360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553331.1 mean_steps=14.6
|
|
[Episode 15370] reward=-93889039.2 actor_loss=0.3594 critic_loss=123429660444.4444 entropy=15.9066 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 15380] reward=-98811942.9 actor_loss=0.3061 critic_loss=133559505351.1111 entropy=15.9111 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 15380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476302.5 mean_steps=15.3
|
|
[Episode 15390] reward=-103739164.2 actor_loss=0.3455 critic_loss=139995352268.8000 entropy=15.9136 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 15400] reward=-109829634.0 actor_loss=0.3801 critic_loss=145848657510.4000 entropy=15.9314 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 15400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405060.5 mean_steps=16.5
|
|
[Episode 15410] reward=-102720561.8 actor_loss=0.4152 critic_loss=136068787768.8889 entropy=15.9399 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 15420] reward=-96236050.6 actor_loss=0.3130 critic_loss=130870814674.4889 entropy=15.9512 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 15420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464900.1 mean_steps=13.9
|
|
[Episode 15430] reward=-96667974.8 actor_loss=0.3512 critic_loss=126248869705.9556 entropy=15.9594 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 15440] reward=-107232642.2 actor_loss=0.3086 critic_loss=139419882473.2444 entropy=15.9825 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 15440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616861.7 mean_steps=13.4
|
|
[Episode 15450] reward=-106021917.4 actor_loss=0.2521 critic_loss=140696505184.7111 entropy=15.9773 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 15460] reward=-103855535.4 actor_loss=0.3713 critic_loss=139265666252.8000 entropy=15.9989 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 15460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448876.8 mean_steps=15.8
|
|
[Episode 15470] reward=-104126790.1 actor_loss=0.3033 critic_loss=138431058011.0222 entropy=15.9903 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 15480] reward=-95889193.6 actor_loss=0.4014 critic_loss=128788575027.2000 entropy=15.9699 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 15480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-314604.9 mean_steps=16.9
|
|
[Episode 15490] reward=-101096291.5 actor_loss=0.3201 critic_loss=132937878732.8000 entropy=15.9939 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 15500] reward=-103731685.2 actor_loss=0.3211 critic_loss=136831671409.7778 entropy=15.9706 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 15500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506735.5 mean_steps=13.2
|
|
[Episode 15510] reward=-109399091.3 actor_loss=0.3611 critic_loss=147480620418.8445 entropy=15.9591 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 15520] reward=-103141859.6 actor_loss=0.3742 critic_loss=135392176992.7111 entropy=15.9736 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 15520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509978.5 mean_steps=14.9
|
|
[Episode 15530] reward=-104988214.7 actor_loss=0.3751 critic_loss=139187678776.8889 entropy=15.9952 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 15540] reward=-102627958.7 actor_loss=0.3187 critic_loss=141853923555.5555 entropy=15.9950 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 15540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411080.8 mean_steps=15.6
|
|
[Episode 15550] reward=-95796165.2 actor_loss=0.4447 critic_loss=128894993476.2667 entropy=15.9923 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 15560] reward=-95570861.6 actor_loss=0.4863 critic_loss=139939903533.5111 entropy=15.9921 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 15560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540709.8 mean_steps=13.4
|
|
[Episode 15570] reward=-93926642.8 actor_loss=0.2950 critic_loss=125401108388.9778 entropy=16.0031 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 15580] reward=-97562738.3 actor_loss=0.4877 critic_loss=126033299592.5333 entropy=15.9934 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 15580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545106.2 mean_steps=14.8
|
|
[Episode 15590] reward=-98558267.7 actor_loss=0.4244 critic_loss=130547603137.4222 entropy=16.0036 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 15600] reward=-101754339.6 actor_loss=0.3265 critic_loss=133220370750.5778 entropy=16.0030 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 15600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-268641.1 mean_steps=17.8
|
|
[Episode 15610] reward=-100954776.7 actor_loss=0.3622 critic_loss=134762648371.2000 entropy=16.0291 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 15620] reward=-103196868.5 actor_loss=0.3910 critic_loss=138293306072.1778 entropy=16.0179 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 15620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423481.1 mean_steps=15.6
|
|
[Episode 15630] reward=-101575584.6 actor_loss=0.3763 critic_loss=134926445590.7556 entropy=16.0334 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 15640] reward=-95725510.5 actor_loss=0.4196 critic_loss=132065626248.5333 entropy=16.0404 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 15640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574836.4 mean_steps=14.1
|
|
[Episode 15650] reward=-106345689.2 actor_loss=0.3018 critic_loss=137572456493.5111 entropy=16.0425 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 15660] reward=-106605934.2 actor_loss=0.4244 critic_loss=139048508529.7778 entropy=16.0457 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 15660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470392.2 mean_steps=14.2
|
|
[Episode 15670] reward=-101143291.8 actor_loss=0.3487 critic_loss=130645247590.4000 entropy=16.0316 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 15680] reward=-99201541.8 actor_loss=0.2658 critic_loss=132898532192.7111 entropy=16.0179 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 15680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443626.6 mean_steps=15.7
|
|
[Episode 15690] reward=-103015869.0 actor_loss=0.3320 critic_loss=137823415500.8000 entropy=16.0317 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 15700] reward=-97776098.2 actor_loss=0.3404 critic_loss=126372794276.9778 entropy=16.0502 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 15700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368643.8 mean_steps=16.6
|
|
[Episode 15710] reward=-99307943.2 actor_loss=0.3248 critic_loss=130569580452.9778 entropy=16.0501 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 15720] reward=-105327002.4 actor_loss=0.3576 critic_loss=140902227603.9111 entropy=16.0727 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 15720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455396.7 mean_steps=15.9
|
|
[Episode 15730] reward=-97132260.1 actor_loss=0.4257 critic_loss=128523535064.1778 entropy=16.0813 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 15740] reward=-97791740.9 actor_loss=0.3793 critic_loss=130475835574.0444 entropy=16.1068 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 15740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575405.2 mean_steps=13.8
|
|
[Episode 15750] reward=-105750108.1 actor_loss=0.3121 critic_loss=144586151981.5111 entropy=16.0976 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 15760] reward=-101099020.1 actor_loss=0.3130 critic_loss=133753932640.7111 entropy=16.0830 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 15760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400853.0 mean_steps=15.5
|
|
[Episode 15770] reward=-101522855.9 actor_loss=0.3346 critic_loss=132790222483.9111 entropy=16.0748 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 15780] reward=-102201897.5 actor_loss=0.2998 critic_loss=132562946912.7111 entropy=16.0670 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 15780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537031.5 mean_steps=13.7
|
|
[Episode 15790] reward=-104670034.2 actor_loss=0.3441 critic_loss=137380162400.7111 entropy=16.0575 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 15800] reward=-97782764.9 actor_loss=0.4459 critic_loss=134853157228.0889 entropy=16.0483 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 15800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413943.9 mean_steps=16.1
|
|
[Episode 15810] reward=-100406187.2 actor_loss=0.3324 critic_loss=133687425979.7333 entropy=16.0384 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 15820] reward=-105438212.8 actor_loss=0.3549 critic_loss=143009907962.3111 entropy=16.0480 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 15820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569917.9 mean_steps=12.9
|
|
[Episode 15830] reward=-100077067.8 actor_loss=0.3588 critic_loss=137114167068.4444 entropy=16.0449 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 15840] reward=-99100298.6 actor_loss=0.4485 critic_loss=132605539123.2000 entropy=16.0497 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 15840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519665.5 mean_steps=14.4
|
|
[Episode 15850] reward=-97098028.5 actor_loss=0.4478 critic_loss=128082807830.7556 entropy=16.0667 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 15860] reward=-102108483.0 actor_loss=0.4088 critic_loss=133667095074.1333 entropy=16.0827 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 15860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518304.4 mean_steps=14.1
|
|
[Episode 15870] reward=-107790071.6 actor_loss=0.3165 critic_loss=143892985719.4667 entropy=16.0910 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 15880] reward=-107287524.8 actor_loss=0.2588 critic_loss=141668660383.2889 entropy=16.1029 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 15880] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-325690.1 mean_steps=18.6
|
|
[Episode 15890] reward=-100765889.7 actor_loss=0.3617 critic_loss=136843924122.7907 entropy=16.1097 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 15900] reward=-106370588.3 actor_loss=0.2041 critic_loss=144053255281.7778 entropy=16.1269 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 15900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522778.6 mean_steps=14.2
|
|
[Episode 15910] reward=-106774672.3 actor_loss=0.2740 critic_loss=140272357284.9778 entropy=16.1349 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 15920] reward=-108554725.0 actor_loss=0.2826 critic_loss=145206655385.6000 entropy=16.1332 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 15920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561322.7 mean_steps=14.1
|
|
[Episode 15930] reward=-106807204.6 actor_loss=0.3455 critic_loss=142028265153.4222 entropy=16.1360 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 15940] reward=-105882467.4 actor_loss=0.3297 critic_loss=144253582995.9111 entropy=16.1130 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 15940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564509.8 mean_steps=13.8
|
|
[Episode 15950] reward=-103244108.4 actor_loss=0.2351 critic_loss=138259917118.5778 entropy=16.1043 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 15960] reward=-102329973.0 actor_loss=0.3229 critic_loss=134878582465.4222 entropy=16.1310 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 15960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552035.8 mean_steps=13.7
|
|
[Episode 15970] reward=-111099068.7 actor_loss=0.3927 critic_loss=152444586120.5333 entropy=16.1308 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 15980] reward=-101049512.8 actor_loss=0.3116 critic_loss=132574349084.4444 entropy=16.1291 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 15980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396334.7 mean_steps=16.6
|
|
[Episode 15990] reward=-107525763.9 actor_loss=0.3425 critic_loss=140074896588.8000 entropy=16.1378 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 16000] reward=-108720015.4 actor_loss=0.3867 critic_loss=145150142782.5778 entropy=16.1676 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 16000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-667997.4 mean_steps=11.6
|
|
[Episode 16010] reward=-102187228.8 actor_loss=0.3683 critic_loss=136423576917.3333 entropy=16.1761 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 16020] reward=-109460599.4 actor_loss=0.3020 critic_loss=146458762535.8222 entropy=16.1757 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 16020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-328092.6 mean_steps=17.1
|
|
[Episode 16030] reward=-110397662.2 actor_loss=0.2581 critic_loss=147897247698.4889 entropy=16.1929 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 16040] reward=-95206583.6 actor_loss=0.5018 critic_loss=130365716343.4667 entropy=16.2013 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 16040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490510.8 mean_steps=14.5
|
|
[Episode 16050] reward=-103797973.2 actor_loss=0.2870 critic_loss=136011194185.9556 entropy=16.2065 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 16060] reward=-108550544.5 actor_loss=0.2495 critic_loss=148401041863.1111 entropy=16.2215 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 16060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377981.1 mean_steps=16.8
|
|
[Episode 16070] reward=-104479601.9 actor_loss=0.3033 critic_loss=137681423200.7111 entropy=16.2156 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 16080] reward=-104602535.0 actor_loss=0.4231 critic_loss=141183436208.3556 entropy=16.2334 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 16080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528506.5 mean_steps=14.4
|
|
[Episode 16090] reward=-106733296.2 actor_loss=0.3236 critic_loss=143062717235.2000 entropy=16.2480 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 16100] reward=-106008622.7 actor_loss=0.3497 critic_loss=146195204778.6667 entropy=16.2592 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 16100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430750.4 mean_steps=15.0
|
|
[Episode 16110] reward=-103078391.7 actor_loss=0.3444 critic_loss=141047007732.6222 entropy=16.2577 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 16120] reward=-106198703.3 actor_loss=0.3829 critic_loss=142679926192.3556 entropy=16.2606 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 16120] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-705270.4 mean_steps=12.2
|
|
[Episode 16130] reward=-100442195.3 actor_loss=0.3081 critic_loss=132338670341.6889 entropy=16.2408 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 16140] reward=-111256509.8 actor_loss=0.4184 critic_loss=150108013636.2667 entropy=16.2408 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 16140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348924.5 mean_steps=17.4
|
|
[Episode 16150] reward=-117568647.4 actor_loss=0.3173 critic_loss=154471399059.9111 entropy=16.2498 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 16160] reward=-118045879.0 actor_loss=0.2295 critic_loss=160670317499.7333 entropy=16.2474 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 16160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366061.3 mean_steps=17.1
|
|
[Episode 16170] reward=-112511841.0 actor_loss=0.3450 critic_loss=155962143539.2000 entropy=16.2467 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 16180] reward=-103425610.3 actor_loss=0.3321 critic_loss=140010683505.7778 entropy=16.2301 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 16180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511274.9 mean_steps=14.0
|
|
[Episode 16190] reward=-104993314.4 actor_loss=0.4007 critic_loss=155287421656.1778 entropy=16.2358 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 16200] reward=-106571483.4 actor_loss=0.4112 critic_loss=145767887303.1111 entropy=16.2392 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 16200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485370.2 mean_steps=15.1
|
|
[Episode 16210] reward=-107439442.3 actor_loss=0.3441 critic_loss=145814773760.0000 entropy=16.2274 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 16220] reward=-103880179.2 actor_loss=0.3870 critic_loss=136063233956.9778 entropy=16.2003 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 16220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603609.1 mean_steps=12.8
|
|
[Episode 16230] reward=-118379943.5 actor_loss=0.2338 critic_loss=162650826706.4889 entropy=16.1966 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 16240] reward=-112285168.9 actor_loss=0.2778 critic_loss=150221876246.7556 entropy=16.1832 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 16240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550796.8 mean_steps=13.8
|
|
[Episode 16250] reward=-103591917.8 actor_loss=0.4308 critic_loss=138138734136.8889 entropy=16.2253 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 16260] reward=-111588033.4 actor_loss=0.3483 critic_loss=152829979306.6667 entropy=16.2180 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 16260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529800.6 mean_steps=13.4
|
|
[Episode 16270] reward=-116091928.3 actor_loss=0.3385 critic_loss=155227366286.2222 entropy=16.2312 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 16280] reward=-111253930.5 actor_loss=0.3208 critic_loss=150216364305.0667 entropy=16.2319 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 16280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-340782.7 mean_steps=16.1
|
|
[Episode 16290] reward=-100183927.9 actor_loss=0.3890 critic_loss=132608560696.8889 entropy=16.2138 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 16300] reward=-109737875.6 actor_loss=0.3993 critic_loss=148086959490.8445 entropy=16.2220 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 16300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-387039.3 mean_steps=16.2
|
|
[Episode 16310] reward=-100744616.9 actor_loss=0.3014 critic_loss=139700175576.1778 entropy=16.2325 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 16320] reward=-118995479.5 actor_loss=0.4268 critic_loss=161789603566.9333 entropy=16.2474 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 16320] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-379957.9 mean_steps=17.4
|
|
[Episode 16330] reward=-102730616.1 actor_loss=0.3189 critic_loss=143311660100.2667 entropy=16.2440 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 16340] reward=-99131449.8 actor_loss=0.3628 critic_loss=135823562433.4222 entropy=16.2433 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 16340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515658.9 mean_steps=13.5
|
|
[Episode 16350] reward=-107937722.5 actor_loss=0.3973 critic_loss=144766047391.2889 entropy=16.2469 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 16360] reward=-111805057.8 actor_loss=0.3449 critic_loss=145685138636.8000 entropy=16.2584 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 16360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473401.9 mean_steps=15.1
|
|
[Episode 16370] reward=-105031374.1 actor_loss=0.3737 critic_loss=143595459197.1555 entropy=16.2475 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 16380] reward=-105135231.5 actor_loss=0.3479 critic_loss=139014633335.4667 entropy=16.2378 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 16380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409531.0 mean_steps=15.7
|
|
[Episode 16390] reward=-110518203.8 actor_loss=0.3263 critic_loss=145730989442.8445 entropy=16.2368 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 16400] reward=-107390072.5 actor_loss=0.3575 critic_loss=140360680334.2222 entropy=16.2210 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 16400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379982.8 mean_steps=16.7
|
|
[Episode 16410] reward=-105098120.6 actor_loss=0.3696 critic_loss=141243698016.7111 entropy=16.2293 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 16420] reward=-109279037.0 actor_loss=0.3175 critic_loss=149156047348.6222 entropy=16.2400 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 16420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499327.3 mean_steps=14.2
|
|
[Episode 16430] reward=-101177360.4 actor_loss=0.3667 critic_loss=136552918948.9778 entropy=16.2707 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 16440] reward=-102288217.2 actor_loss=0.4093 critic_loss=141142229174.0444 entropy=16.2869 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 16440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438007.6 mean_steps=15.8
|
|
[Episode 16450] reward=-114959247.0 actor_loss=0.3540 critic_loss=153524503256.1778 entropy=16.2821 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 16460] reward=-118657086.7 actor_loss=0.3414 critic_loss=164931653905.0667 entropy=16.2965 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 16460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526690.5 mean_steps=14.6
|
|
[Episode 16470] reward=-107797186.6 actor_loss=0.2364 critic_loss=146848402272.7111 entropy=16.3083 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 16480] reward=-104084074.6 actor_loss=0.3417 critic_loss=139781595318.0444 entropy=16.3046 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 16480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400513.6 mean_steps=16.4
|
|
[Episode 16490] reward=-106618492.6 actor_loss=0.3891 critic_loss=147662707097.6000 entropy=16.2990 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 16500] reward=-105451625.3 actor_loss=0.3816 critic_loss=143578682163.2000 entropy=16.2918 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 16500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-452800.3 mean_steps=17.1
|
|
[Episode 16510] reward=-108053571.1 actor_loss=0.4021 critic_loss=145204827659.3778 entropy=16.2721 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 16520] reward=-104485776.7 actor_loss=0.2519 critic_loss=139654887287.4667 entropy=16.2921 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 16520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450917.7 mean_steps=16.2
|
|
[Episode 16530] reward=-107164329.1 actor_loss=0.3798 critic_loss=144047258191.6444 entropy=16.2860 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 16540] reward=-107084198.4 actor_loss=0.3384 critic_loss=141587313823.2889 entropy=16.3004 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 16540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417802.4 mean_steps=15.7
|
|
[Episode 16550] reward=-106364382.2 actor_loss=0.2980 critic_loss=143909889092.2667 entropy=16.2849 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 16560] reward=-110404897.1 actor_loss=0.3358 critic_loss=149533438407.1111 entropy=16.2947 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 16560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431972.5 mean_steps=15.6
|
|
[Episode 16570] reward=-115627477.3 actor_loss=0.3931 critic_loss=154695163357.8667 entropy=16.2975 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 16580] reward=-107907527.2 actor_loss=0.3466 critic_loss=142719592948.6222 entropy=16.2798 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 16580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469134.7 mean_steps=14.3
|
|
[Episode 16590] reward=-112286015.3 actor_loss=0.3564 critic_loss=151716320233.2444 entropy=16.2712 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 16600] reward=-98578480.6 actor_loss=0.3882 critic_loss=133491110889.2444 entropy=16.2820 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 16600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495177.6 mean_steps=14.9
|
|
[Episode 16610] reward=-104138248.8 actor_loss=0.3457 critic_loss=136669566475.3778 entropy=16.2848 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 16620] reward=-110177621.9 actor_loss=0.3548 critic_loss=148676108105.9556 entropy=16.3042 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 16620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493527.5 mean_steps=14.8
|
|
[Episode 16630] reward=-107023499.1 actor_loss=0.2971 critic_loss=142181983300.2667 entropy=16.3285 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 16640] reward=-108636634.4 actor_loss=0.3473 critic_loss=142159327323.0222 entropy=16.3179 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 16640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509052.2 mean_steps=14.4
|
|
[Episode 16650] reward=-110004284.3 actor_loss=0.3222 critic_loss=142989436882.4889 entropy=16.3297 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 16660] reward=-112314414.8 actor_loss=0.2415 critic_loss=150192442936.8889 entropy=16.3383 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 16660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465975.6 mean_steps=15.2
|
|
[Episode 16670] reward=-99487854.5 actor_loss=0.2870 critic_loss=127149401338.3111 entropy=16.3631 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 16680] reward=-105290636.0 actor_loss=0.4104 critic_loss=146846818668.0889 entropy=16.3590 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 16680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501770.9 mean_steps=14.2
|
|
[Episode 16690] reward=-111831200.3 actor_loss=0.3638 critic_loss=150967314568.5333 entropy=16.3898 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 16700] reward=-103301450.4 actor_loss=0.3270 critic_loss=138504015003.1515 entropy=16.3923 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 16700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418473.5 mean_steps=15.7
|
|
[Episode 16710] reward=-109542009.4 actor_loss=0.2328 critic_loss=148700447994.3111 entropy=16.4070 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 16720] reward=-114054450.6 actor_loss=0.3358 critic_loss=153907415540.6222 entropy=16.4129 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 16720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479609.1 mean_steps=15.6
|
|
[Episode 16730] reward=-113546040.3 actor_loss=0.3276 critic_loss=152834390607.6444 entropy=16.4194 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 16740] reward=-101481154.2 actor_loss=0.3120 critic_loss=133957599232.0000 entropy=16.4433 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 16740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471328.0 mean_steps=15.2
|
|
[Episode 16750] reward=-112381651.6 actor_loss=0.3566 critic_loss=146484570066.4889 entropy=16.4350 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 16760] reward=-109468617.6 actor_loss=0.3140 critic_loss=145702465991.1111 entropy=16.4495 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 16760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-627800.9 mean_steps=12.2
|
|
[Episode 16770] reward=-107961851.5 actor_loss=0.3068 critic_loss=140310066153.2444 entropy=16.4529 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 16780] reward=-108804598.5 actor_loss=0.4202 critic_loss=142693968918.7556 entropy=16.4625 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 16780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507666.7 mean_steps=14.2
|
|
[Episode 16790] reward=-114005971.5 actor_loss=0.3188 critic_loss=155507322242.8445 entropy=16.4686 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 16800] reward=-109484859.5 actor_loss=0.3800 critic_loss=145625171376.3556 entropy=16.4663 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 16800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-646678.0 mean_steps=12.4
|
|
[Episode 16810] reward=-107668604.0 actor_loss=0.2788 critic_loss=144457585095.1111 entropy=16.4663 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 16820] reward=-104043198.1 actor_loss=0.4634 critic_loss=138607413930.6667 entropy=16.4817 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 16820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559475.1 mean_steps=13.6
|
|
[Episode 16830] reward=-108786984.3 actor_loss=0.4066 critic_loss=141781795726.2222 entropy=16.4816 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 16840] reward=-113240561.6 actor_loss=0.2984 critic_loss=155688884269.5111 entropy=16.4863 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 16840] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-304958.3 mean_steps=16.7
|
|
[Episode 16850] reward=-105284495.4 actor_loss=0.2881 critic_loss=138556897689.6000 entropy=16.5144 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 16860] reward=-108416716.7 actor_loss=0.2906 critic_loss=146791944283.0222 entropy=16.5297 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 16860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409004.4 mean_steps=15.3
|
|
[Episode 16870] reward=-113881753.4 actor_loss=0.2836 critic_loss=152167386316.8000 entropy=16.5098 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 16880] reward=-114875665.9 actor_loss=0.3113 critic_loss=154398855805.1555 entropy=16.5127 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 16880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429026.8 mean_steps=15.7
|
|
[Episode 16890] reward=-110652894.6 actor_loss=0.3977 critic_loss=153455561204.6222 entropy=16.5220 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 16900] reward=-112809287.6 actor_loss=0.3313 critic_loss=156590152817.7778 entropy=16.5190 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 16900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603066.3 mean_steps=12.7
|
|
[Episode 16910] reward=-110973963.6 actor_loss=0.2004 critic_loss=151081386348.0889 entropy=16.5150 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 16920] reward=-108919802.1 actor_loss=0.2997 critic_loss=147073540278.0444 entropy=16.5226 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 16920] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-643285.6 mean_steps=11.3
|
|
[Episode 16930] reward=-108325711.2 actor_loss=0.4458 critic_loss=143869665644.0889 entropy=16.5129 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 16940] reward=-105988197.9 actor_loss=0.3703 critic_loss=142239350784.0000 entropy=16.5188 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 16940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513200.4 mean_steps=13.6
|
|
[Episode 16950] reward=-101239845.2 actor_loss=0.4814 critic_loss=132235663223.4667 entropy=16.5049 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 16960] reward=-104794269.2 actor_loss=0.4486 critic_loss=138320848486.4000 entropy=16.5011 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 16960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592908.4 mean_steps=13.1
|
|
[Episode 16970] reward=-108779957.9 actor_loss=0.3081 critic_loss=145527335958.7556 entropy=16.5049 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 16980] reward=-109315560.0 actor_loss=0.3419 critic_loss=148591646401.4222 entropy=16.5127 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 16980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559467.6 mean_steps=13.7
|
|
[Episode 16990] reward=-111492661.9 actor_loss=0.4287 critic_loss=149169915312.3556 entropy=16.5294 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 17000] reward=-107830324.7 actor_loss=0.4383 critic_loss=151177994057.9556 entropy=16.5396 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 17000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453925.7 mean_steps=14.8
|
|
[Episode 17010] reward=-116937322.7 actor_loss=0.2146 critic_loss=163570340841.2444 entropy=16.5227 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 17020] reward=-110383595.4 actor_loss=0.3997 critic_loss=147382905878.7556 entropy=16.5292 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 17020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-634153.1 mean_steps=12.6
|
|
[Episode 17030] reward=-103926941.7 actor_loss=0.3984 critic_loss=135458863877.6889 entropy=16.5248 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 17040] reward=-103870763.6 actor_loss=0.3639 critic_loss=139197849600.0000 entropy=16.5335 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 17040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540087.3 mean_steps=13.7
|
|
[Episode 17050] reward=-107627150.1 actor_loss=0.2640 critic_loss=143439215729.7778 entropy=16.5425 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 17060] reward=-106502606.0 actor_loss=0.2916 critic_loss=140471699046.4000 entropy=16.5555 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 17060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366324.4 mean_steps=16.5
|
|
[Episode 17070] reward=-116267810.2 actor_loss=0.3394 critic_loss=157636987380.6222 entropy=16.5614 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 17080] reward=-111974652.5 actor_loss=0.3289 critic_loss=148462186587.0222 entropy=16.5432 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 17080] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-655510.5 mean_steps=10.2
|
|
[Episode 17090] reward=-109951523.9 actor_loss=0.3093 critic_loss=148805181804.0889 entropy=16.5325 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 17100] reward=-106132651.1 actor_loss=0.3057 critic_loss=137209013680.3556 entropy=16.5181 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 17100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544119.0 mean_steps=14.6
|
|
[Episode 17110] reward=-114747840.7 actor_loss=0.3293 critic_loss=154784755803.0222 entropy=16.5304 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 17120] reward=-107202073.8 actor_loss=0.2878 critic_loss=140158501774.2222 entropy=16.5398 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 17120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-419942.7 mean_steps=13.5
|
|
[Episode 17130] reward=-114844427.5 actor_loss=0.2664 critic_loss=153105258541.5111 entropy=16.5415 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 17140] reward=-111573472.4 actor_loss=0.3238 critic_loss=150098860805.6889 entropy=16.5307 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 17140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-571343.6 mean_steps=14.8
|
|
[Episode 17150] reward=-107743937.1 actor_loss=0.2967 critic_loss=145573551763.9111 entropy=16.5380 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 17160] reward=-111874620.7 actor_loss=0.4285 critic_loss=155569381922.1333 entropy=16.5422 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 17160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509500.3 mean_steps=13.0
|
|
[Episode 17170] reward=-109484151.5 actor_loss=0.2839 critic_loss=148964152843.3778 entropy=16.5361 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 17180] reward=-105912046.6 actor_loss=0.2428 critic_loss=138905084450.1333 entropy=16.5180 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 17180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523354.7 mean_steps=14.4
|
|
[Episode 17190] reward=-105607361.1 actor_loss=0.3447 critic_loss=142702553875.6923 entropy=16.5014 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 17200] reward=-114301764.8 actor_loss=0.3787 critic_loss=148261650067.9111 entropy=16.5092 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 17200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495790.5 mean_steps=14.2
|
|
[Episode 17210] reward=-109186892.5 actor_loss=0.3720 critic_loss=147535251228.4445 entropy=16.5099 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 17220] reward=-105404692.1 actor_loss=0.3325 critic_loss=143452378089.2444 entropy=16.4896 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 17220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-384940.3 mean_steps=16.3
|
|
[Episode 17230] reward=-104941721.9 actor_loss=0.3232 critic_loss=136630221937.7778 entropy=16.5065 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 17240] reward=-103985855.7 actor_loss=0.3792 critic_loss=140379625517.5111 entropy=16.5055 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 17240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451834.8 mean_steps=15.9
|
|
[Episode 17250] reward=-109172259.9 actor_loss=0.2797 critic_loss=143838060726.0444 entropy=16.4992 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 17260] reward=-113037242.4 actor_loss=0.3649 critic_loss=149800405128.5333 entropy=16.4929 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 17260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-406663.2 mean_steps=14.5
|
|
[Episode 17270] reward=-109888594.4 actor_loss=0.4240 critic_loss=142479026858.6667 entropy=16.5234 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 17280] reward=-110144921.4 actor_loss=0.2097 critic_loss=148068949833.9556 entropy=16.5277 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 17280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-365975.5 mean_steps=17.2
|
|
[Episode 17290] reward=-115689212.6 actor_loss=0.3632 critic_loss=157317387969.4222 entropy=16.5246 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 17300] reward=-106875886.8 actor_loss=0.2595 critic_loss=142336137443.5555 entropy=16.5198 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 17300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-613269.2 mean_steps=12.2
|
|
[Episode 17310] reward=-110658295.7 actor_loss=0.3437 critic_loss=152631345880.1778 entropy=16.5083 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 17320] reward=-109342223.3 actor_loss=0.3198 critic_loss=143008065126.4000 entropy=16.5516 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 17320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463721.6 mean_steps=14.2
|
|
[Episode 17330] reward=-113899943.7 actor_loss=0.2668 critic_loss=154840643811.5555 entropy=16.5427 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 17340] reward=-113062364.6 actor_loss=0.3930 critic_loss=148210097993.9556 entropy=16.5786 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 17340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532097.1 mean_steps=13.3
|
|
[Episode 17350] reward=-109510080.9 actor_loss=0.2833 critic_loss=145448337408.0000 entropy=16.5799 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 17360] reward=-107831265.1 actor_loss=0.4732 critic_loss=143678613640.5333 entropy=16.5755 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 17360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402906.6 mean_steps=15.8
|
|
[Episode 17370] reward=-113512548.2 actor_loss=0.2299 critic_loss=148175336607.2889 entropy=16.5888 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 17380] reward=-110733225.8 actor_loss=0.2968 critic_loss=147523728849.4546 entropy=16.6042 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 17380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578370.2 mean_steps=13.4
|
|
[Episode 17390] reward=-113668035.6 actor_loss=0.2989 critic_loss=149322588394.0571 entropy=16.5986 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 17400] reward=-111415476.6 actor_loss=0.3303 critic_loss=151245294796.8000 entropy=16.6044 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 17400] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-339914.7 mean_steps=18.2
|
|
[Episode 17410] reward=-118363123.0 actor_loss=0.2561 critic_loss=159050646764.3077 entropy=16.6045 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 17420] reward=-114296453.6 actor_loss=0.3718 critic_loss=152640759398.4000 entropy=16.6057 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 17420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410952.9 mean_steps=17.4
|
|
[Episode 17430] reward=-115546047.7 actor_loss=0.3257 critic_loss=158153614950.4000 entropy=16.6161 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 17440] reward=-107831903.0 actor_loss=0.2914 critic_loss=154162322909.8667 entropy=16.6003 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 17440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-508676.6 mean_steps=15.2
|
|
[Episode 17450] reward=-116787956.8 actor_loss=0.2662 critic_loss=156014927127.2727 entropy=16.6218 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 17460] reward=-112048630.1 actor_loss=0.3443 critic_loss=147439968477.4054 entropy=16.6171 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 17460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427502.2 mean_steps=15.4
|
|
[Episode 17470] reward=-111586944.3 actor_loss=0.3042 critic_loss=153040483487.2889 entropy=16.6235 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 17480] reward=-107498234.0 actor_loss=0.2763 critic_loss=144091190067.2000 entropy=16.6174 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 17480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402068.2 mean_steps=15.6
|
|
[Episode 17490] reward=-110648967.9 actor_loss=0.2708 critic_loss=145242703462.4000 entropy=16.6165 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 17500] reward=-110901796.7 actor_loss=0.3370 critic_loss=145752615230.5778 entropy=16.6034 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 17500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-507145.3 mean_steps=12.0
|
|
[Episode 17510] reward=-113546109.3 actor_loss=0.2792 critic_loss=150857360998.4000 entropy=16.6121 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 17520] reward=-108125502.9 actor_loss=0.4005 critic_loss=142338524457.2903 entropy=16.6318 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 17520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-542649.9 mean_steps=12.8
|
|
[Episode 17530] reward=-115859667.8 actor_loss=0.3030 critic_loss=157650335243.3778 entropy=16.6108 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 17540] reward=-106898951.7 actor_loss=0.3367 critic_loss=145681963235.5555 entropy=16.6058 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 17540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461931.3 mean_steps=14.2
|
|
[Episode 17550] reward=-106177988.8 actor_loss=0.3867 critic_loss=145674798148.2667 entropy=16.6025 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 17560] reward=-103847102.5 actor_loss=0.3480 critic_loss=136501454074.3111 entropy=16.5822 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 17560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604030.2 mean_steps=12.8
|
|
[Episode 17570] reward=-110858815.3 actor_loss=0.3186 critic_loss=151501343948.8000 entropy=16.5996 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 17580] reward=-112159431.3 actor_loss=0.3149 critic_loss=148901535379.9111 entropy=16.5706 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 17580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-527571.7 mean_steps=12.8
|
|
[Episode 17590] reward=-113015467.6 actor_loss=0.3326 critic_loss=152403976972.1905 entropy=16.5552 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 17600] reward=-111172882.5 actor_loss=0.2747 critic_loss=152534133418.6667 entropy=16.5497 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 17600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562852.0 mean_steps=12.8
|
|
[Episode 17610] reward=-107792479.6 actor_loss=0.2385 critic_loss=142617239552.0000 entropy=16.5372 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 17620] reward=-112888188.2 actor_loss=0.3067 critic_loss=151186574358.7556 entropy=16.5455 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 17620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573676.4 mean_steps=13.7
|
|
[Episode 17630] reward=-113531436.5 actor_loss=0.3288 critic_loss=149570426925.5111 entropy=16.5406 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 17640] reward=-102249461.2 actor_loss=0.3933 critic_loss=129364538709.3333 entropy=16.5287 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 17640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521381.0 mean_steps=14.1
|
|
[Episode 17650] reward=-108921353.1 actor_loss=0.3903 critic_loss=143260294257.7778 entropy=16.5530 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 17660] reward=-109569277.8 actor_loss=0.4810 critic_loss=146351393450.6667 entropy=16.5739 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 17660] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-693582.1 mean_steps=10.4
|
|
[Episode 17670] reward=-104277852.3 actor_loss=0.4647 critic_loss=136787349595.0222 entropy=16.5612 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 17680] reward=-106223239.6 actor_loss=0.4141 critic_loss=140799346460.4445 entropy=16.5820 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 17680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572631.6 mean_steps=13.0
|
|
[Episode 17690] reward=-105627526.3 actor_loss=0.1769 critic_loss=137527818831.6444 entropy=16.5809 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 17700] reward=-111601413.8 actor_loss=0.3794 critic_loss=148703197775.6444 entropy=16.5928 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 17700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630889.5 mean_steps=12.7
|
|
[Episode 17710] reward=-104149720.6 actor_loss=0.3932 critic_loss=144380889411.3684 entropy=16.6039 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 17720] reward=-106566802.3 actor_loss=0.3078 critic_loss=139873021314.8445 entropy=16.5964 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 17720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458846.3 mean_steps=14.8
|
|
[Episode 17730] reward=-107020052.5 actor_loss=0.3365 critic_loss=142880861817.9048 entropy=16.5970 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 17740] reward=-112253941.3 actor_loss=0.3330 critic_loss=151007559680.0000 entropy=16.5960 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 17740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-466032.4 mean_steps=15.8
|
|
[Episode 17750] reward=-113804054.7 actor_loss=0.2869 critic_loss=150253117622.0444 entropy=16.5984 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 17760] reward=-118890129.5 actor_loss=0.3037 critic_loss=157521383697.0667 entropy=16.6104 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 17760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-460616.7 mean_steps=16.2
|
|
[Episode 17770] reward=-115472911.9 actor_loss=0.3181 critic_loss=155112524334.5454 entropy=16.6440 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 17780] reward=-107089676.0 actor_loss=0.3707 critic_loss=142467341243.7333 entropy=16.6425 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 17780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539673.8 mean_steps=13.4
|
|
[Episode 17790] reward=-108919578.9 actor_loss=0.3354 critic_loss=152571500071.3846 entropy=16.6343 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 17800] reward=-106847572.1 actor_loss=0.3853 critic_loss=142409514643.9111 entropy=16.6367 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 17800] success_rate=0.300 qp_infeasible_rate=0.650 mean_return=-527627.8 mean_steps=173.4
|
|
[Episode 17810] reward=-104881707.4 actor_loss=0.2840 critic_loss=138686452829.0909 entropy=16.6533 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 17820] reward=-118912125.2 actor_loss=0.3285 critic_loss=161381389289.2444 entropy=16.6409 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 17820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424038.1 mean_steps=14.6
|
|
[Episode 17830] reward=-116964367.6 actor_loss=0.2775 critic_loss=153999286454.0444 entropy=16.6445 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 17840] reward=-115867450.7 actor_loss=0.2380 critic_loss=152944248149.3333 entropy=16.6429 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 17840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570284.1 mean_steps=12.8
|
|
[Episode 17850] reward=-115075941.6 actor_loss=0.2895 critic_loss=158270131299.0968 entropy=16.6598 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 17860] reward=-113548551.9 actor_loss=0.2947 critic_loss=151410205218.1333 entropy=16.6590 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 17860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510067.9 mean_steps=13.2
|
|
[Episode 17870] reward=-107969586.0 actor_loss=0.3934 critic_loss=143842560500.6222 entropy=16.6576 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 17880] reward=-108740219.1 actor_loss=0.3215 critic_loss=142463181346.1333 entropy=16.6573 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 17880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471786.9 mean_steps=13.8
|
|
[Episode 17890] reward=-111244627.2 actor_loss=0.2963 critic_loss=149017273344.0000 entropy=16.6547 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 17900] reward=-107299210.2 actor_loss=0.4158 critic_loss=138934687425.4222 entropy=16.6448 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 17900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515728.3 mean_steps=14.2
|
|
[Episode 17910] reward=-111825800.9 actor_loss=0.2669 critic_loss=151574069760.0000 entropy=16.6314 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 17920] reward=-111072938.7 actor_loss=0.4139 critic_loss=149193184597.3333 entropy=16.6270 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 17920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-495002.5 mean_steps=13.4
|
|
[Episode 17930] reward=-111990991.2 actor_loss=0.2604 critic_loss=150006355649.4222 entropy=16.6351 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 17940] reward=-115388939.7 actor_loss=0.2772 critic_loss=151932335991.4667 entropy=16.6409 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 17940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431187.6 mean_steps=15.7
|
|
[Episode 17950] reward=-107388833.6 actor_loss=0.4325 critic_loss=139762995655.1111 entropy=16.6360 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 17960] reward=-110658143.8 actor_loss=0.3974 critic_loss=152874050446.2222 entropy=16.6343 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 17960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489361.2 mean_steps=14.1
|
|
[Episode 17970] reward=-113114637.6 actor_loss=0.2472 critic_loss=150802986507.3778 entropy=16.6360 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 17980] reward=-107637429.0 actor_loss=0.3461 critic_loss=142269676657.7778 entropy=16.6548 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 17980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501967.3 mean_steps=13.3
|
|
[Episode 17990] reward=-111666608.9 actor_loss=0.3700 critic_loss=152254924208.3556 entropy=16.6636 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 18000] reward=-106205243.8 actor_loss=0.4056 critic_loss=140593898382.2222 entropy=16.6862 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 18000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562475.1 mean_steps=12.9
|
|
[Episode 18010] reward=-116845689.4 actor_loss=0.2940 critic_loss=151661475521.4222 entropy=16.6984 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 18020] reward=-111718220.1 actor_loss=0.2826 critic_loss=145366856863.2889 entropy=16.7095 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 18020] success_rate=0.200 qp_infeasible_rate=0.750 mean_return=-585152.1 mean_steps=171.6
|
|
[Episode 18030] reward=-111110543.5 actor_loss=0.3311 critic_loss=146142021586.4889 entropy=16.7155 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 18040] reward=-112080413.0 actor_loss=0.2793 critic_loss=147831867528.5333 entropy=16.7318 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 18040] success_rate=0.250 qp_infeasible_rate=0.700 mean_return=-533432.0 mean_steps=172.2
|
|
[Episode 18050] reward=-110505459.7 actor_loss=0.2450 critic_loss=149384668228.2667 entropy=16.7293 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 18060] reward=-113005098.4 actor_loss=0.2610 critic_loss=152048327856.5517 entropy=16.7162 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 18060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463923.7 mean_steps=15.3
|
|
[Episode 18070] reward=-110699715.7 actor_loss=0.4170 critic_loss=147347741857.6842 entropy=16.7016 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 18080] reward=-103609988.9 actor_loss=0.3387 critic_loss=139817788302.2222 entropy=16.6931 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 18080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535849.1 mean_steps=13.9
|
|
[Episode 18090] reward=-103514519.2 actor_loss=0.3129 critic_loss=146477911972.9778 entropy=16.6775 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 18100] reward=-112775194.4 actor_loss=0.3639 critic_loss=154187487277.5111 entropy=16.6796 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 18100] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-651005.0 mean_steps=10.3
|
|
[Episode 18110] reward=-104581861.9 actor_loss=0.4125 critic_loss=136818051208.5333 entropy=16.6748 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 18120] reward=-110104444.4 actor_loss=0.3409 critic_loss=144889611825.5484 entropy=16.6797 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 18120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-543305.0 mean_steps=12.6
|
|
[Episode 18130] reward=-106781667.8 actor_loss=0.3354 critic_loss=142650150638.9333 entropy=16.6806 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 18140] reward=-113406731.7 actor_loss=0.3094 critic_loss=151670902620.1600 entropy=16.6838 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 18140] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-709464.9 mean_steps=10.8
|
|
[Episode 18150] reward=-108247836.5 actor_loss=0.3201 critic_loss=142305569996.8000 entropy=16.6894 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18160] reward=-112774079.2 actor_loss=0.2798 critic_loss=152727343377.0667 entropy=16.6921 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 18160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-542421.4 mean_steps=12.2
|
|
[Episode 18170] reward=-108862690.1 actor_loss=0.3489 critic_loss=142049513745.0667 entropy=16.7080 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 18180] reward=-117861575.3 actor_loss=0.2822 critic_loss=159774057722.3111 entropy=16.7129 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 18180] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-285245.7 mean_steps=18.0
|
|
[Episode 18190] reward=-113186693.7 actor_loss=0.3834 critic_loss=156012997290.6667 entropy=16.7211 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 18200] reward=-112000955.4 actor_loss=0.3057 critic_loss=153302998493.8667 entropy=16.7315 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 18200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-414298.5 mean_steps=14.7
|
|
[Episode 18210] reward=-111412182.7 actor_loss=0.3086 critic_loss=146666467419.0222 entropy=16.7370 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 18220] reward=-108136861.4 actor_loss=0.2916 critic_loss=145967376702.5778 entropy=16.7339 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 18220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405602.8 mean_steps=15.3
|
|
[Episode 18230] reward=-110513367.8 actor_loss=0.3632 critic_loss=149364397033.2444 entropy=16.7306 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 18240] reward=-111094926.2 actor_loss=0.3610 critic_loss=153549540631.2727 entropy=16.7163 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 18240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540357.4 mean_steps=13.3
|
|
[Episode 18250] reward=-104942204.3 actor_loss=0.3623 critic_loss=141487530530.1333 entropy=16.7061 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 18260] reward=-106668350.3 actor_loss=0.3073 critic_loss=142881876587.1628 entropy=16.7092 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 18260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524542.0 mean_steps=14.1
|
|
[Episode 18270] reward=-108688303.2 actor_loss=0.3652 critic_loss=142847260717.5111 entropy=16.7231 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18280] reward=-116834231.3 actor_loss=0.3160 critic_loss=157714670478.2222 entropy=16.7292 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 18280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563420.7 mean_steps=14.0
|
|
[Episode 18290] reward=-113769472.9 actor_loss=0.3654 critic_loss=151499887957.3333 entropy=16.7379 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 18300] reward=-108468398.1 actor_loss=0.3691 critic_loss=145868505816.1778 entropy=16.7696 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 18300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481546.3 mean_steps=14.3
|
|
[Episode 18310] reward=-104661745.8 actor_loss=0.3670 critic_loss=141348583424.0000 entropy=16.7694 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18320] reward=-113695201.7 actor_loss=0.3146 critic_loss=150376193024.0000 entropy=16.7658 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 18320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516613.4 mean_steps=15.6
|
|
[Episode 18330] reward=-115860695.8 actor_loss=0.3040 critic_loss=160421419417.6000 entropy=16.7687 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 18340] reward=-116598798.3 actor_loss=0.2990 critic_loss=156390055073.6842 entropy=16.7580 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 18340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536672.1 mean_steps=14.1
|
|
[Episode 18350] reward=-111471678.6 actor_loss=0.4088 critic_loss=153282865470.5778 entropy=16.7539 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 18360] reward=-112867396.6 actor_loss=0.2946 critic_loss=150868210119.1111 entropy=16.7590 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 18360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402632.6 mean_steps=16.4
|
|
[Episode 18370] reward=-115247312.8 actor_loss=0.3887 critic_loss=151066419681.8824 entropy=16.7758 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 18380] reward=-107540002.2 actor_loss=0.3043 critic_loss=139365726344.5333 entropy=16.7755 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 18380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-495804.1 mean_steps=13.4
|
|
[Episode 18390] reward=-103875561.9 actor_loss=0.4115 critic_loss=145482031650.1333 entropy=16.7845 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 18400] reward=-108488930.9 actor_loss=0.3091 critic_loss=144640995783.1111 entropy=16.7855 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 18400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436248.2 mean_steps=16.4
|
|
[Episode 18410] reward=-109935139.0 actor_loss=0.4450 critic_loss=147241428036.2667 entropy=16.8278 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 18420] reward=-105934246.4 actor_loss=0.3207 critic_loss=147134063372.1905 entropy=16.8252 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 18420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-440791.7 mean_steps=17.1
|
|
[Episode 18430] reward=-115634199.1 actor_loss=0.2521 critic_loss=160302488234.6667 entropy=16.8356 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 18440] reward=-114911620.2 actor_loss=0.2650 critic_loss=156233011837.1555 entropy=16.8330 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 18440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-599274.0 mean_steps=13.8
|
|
[Episode 18450] reward=-116521263.5 actor_loss=0.3031 critic_loss=155136166496.8649 entropy=16.8339 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 18460] reward=-109749435.4 actor_loss=0.3193 critic_loss=142921811922.4889 entropy=16.8210 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 18460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462369.6 mean_steps=14.2
|
|
[Episode 18470] reward=-114697055.7 actor_loss=0.3137 critic_loss=158968148878.2222 entropy=16.8111 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18480] reward=-113126245.3 actor_loss=0.2914 critic_loss=149227197053.1555 entropy=16.8095 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 18480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434074.9 mean_steps=14.8
|
|
[Episode 18490] reward=-115195297.6 actor_loss=0.3188 critic_loss=156432237636.2667 entropy=16.8121 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 18500] reward=-106817334.4 actor_loss=0.3052 critic_loss=142968191021.5111 entropy=16.7881 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 18500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463199.2 mean_steps=16.2
|
|
[Episode 18510] reward=-113350674.1 actor_loss=0.3407 critic_loss=152398692898.1333 entropy=16.7982 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 18520] reward=-111293938.0 actor_loss=0.3209 critic_loss=147417685168.5517 entropy=16.8060 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 18520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433447.8 mean_steps=16.1
|
|
[Episode 18530] reward=-118734859.5 actor_loss=0.3355 critic_loss=161605847904.7111 entropy=16.7944 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 18540] reward=-116793772.6 actor_loss=0.2797 critic_loss=158348256506.3111 entropy=16.7874 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 18540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481441.7 mean_steps=14.1
|
|
[Episode 18550] reward=-117042306.5 actor_loss=0.2868 critic_loss=153395868467.2000 entropy=16.7957 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 18560] reward=-109783412.2 actor_loss=0.3048 critic_loss=150295689443.5555 entropy=16.8086 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 18560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510290.4 mean_steps=15.4
|
|
[Episode 18570] reward=-102061694.1 actor_loss=0.3339 critic_loss=138712900364.1905 entropy=16.8285 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 18580] reward=-114398518.7 actor_loss=0.2805 critic_loss=152862845246.5778 entropy=16.8314 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 18580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424937.4 mean_steps=14.4
|
|
[Episode 18590] reward=-114206855.3 actor_loss=0.3693 critic_loss=154151887394.1333 entropy=16.8429 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 18600] reward=-109234448.4 actor_loss=0.3301 critic_loss=148478572407.4667 entropy=16.8477 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 18600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426277.2 mean_steps=15.2
|
|
[Episode 18610] reward=-113531339.8 actor_loss=0.3545 critic_loss=200326496984.1778 entropy=16.8566 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 18620] reward=-115434192.9 actor_loss=0.3591 critic_loss=170802590641.2308 entropy=16.8738 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 18620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-387431.0 mean_steps=17.2
|
|
[Episode 18630] reward=-114682859.3 actor_loss=0.2981 critic_loss=161781912274.8235 entropy=16.8677 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 18640] reward=-111400368.2 actor_loss=0.3769 critic_loss=149883167630.2222 entropy=16.8559 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 18640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443116.2 mean_steps=14.7
|
|
[Episode 18650] reward=-111793381.1 actor_loss=0.3351 critic_loss=153651215291.7333 entropy=16.8471 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18660] reward=-113627788.6 actor_loss=0.2727 critic_loss=150801819602.4889 entropy=16.8597 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 18660] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-258482.8 mean_steps=18.0
|
|
[Episode 18670] reward=-112976160.6 actor_loss=0.3082 critic_loss=155530943237.6889 entropy=16.8618 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 18680] reward=-113020345.4 actor_loss=0.3036 critic_loss=155205278105.6000 entropy=16.8703 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 18680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528077.6 mean_steps=14.5
|
|
[Episode 18690] reward=-115776241.0 actor_loss=0.2562 critic_loss=155879532134.4000 entropy=16.8607 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 18700] reward=-115296131.2 actor_loss=0.3454 critic_loss=154422786275.5555 entropy=16.8516 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 18700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445290.7 mean_steps=15.8
|
|
[Episode 18710] reward=-109888179.4 actor_loss=0.4826 critic_loss=145580872317.1555 entropy=16.8290 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 18720] reward=-117672343.6 actor_loss=0.2447 critic_loss=155757233617.4546 entropy=16.8008 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 18720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-339785.2 mean_steps=16.1
|
|
[Episode 18730] reward=-104729363.7 actor_loss=0.4162 critic_loss=141395880800.7111 entropy=16.7806 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 18740] reward=-106113191.6 actor_loss=0.2830 critic_loss=143692835986.2857 entropy=16.7997 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 18740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409442.0 mean_steps=16.5
|
|
[Episode 18750] reward=-109481339.8 actor_loss=0.2911 critic_loss=140362963535.6444 entropy=16.7729 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 18760] reward=-113750378.6 actor_loss=0.5332 critic_loss=155358628704.7111 entropy=16.7991 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1536 front_blocked=0
|
|
[Eval 18760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584266.0 mean_steps=13.1
|
|
[Episode 18770] reward=-111046930.4 actor_loss=0.3536 critic_loss=146401290740.6222 entropy=16.8052 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 18780] reward=-114273982.5 actor_loss=0.2223 critic_loss=156783491299.5555 entropy=16.8304 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 18780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-629741.2 mean_steps=13.4
|
|
[Episode 18790] reward=-114423959.8 actor_loss=0.2981 critic_loss=152301536560.4324 entropy=16.8269 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 18800] reward=-110825287.4 actor_loss=0.3468 critic_loss=145654472400.5926 entropy=16.8343 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 18800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563286.6 mean_steps=13.4
|
|
[Episode 18810] reward=-117345312.9 actor_loss=0.2693 critic_loss=162705581670.4000 entropy=16.8541 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 18820] reward=-113036433.8 actor_loss=0.2775 critic_loss=149634672230.4000 entropy=16.8360 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 18820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573677.5 mean_steps=13.8
|
|
[Episode 18830] reward=-109756601.7 actor_loss=0.2878 critic_loss=147818137372.4445 entropy=16.8410 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 18840] reward=-111327481.5 actor_loss=0.3593 critic_loss=142899461597.8667 entropy=16.8257 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 18840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591190.3 mean_steps=13.2
|
|
[Episode 18850] reward=-110472464.2 actor_loss=0.3280 critic_loss=147153481090.8445 entropy=16.8213 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 18860] reward=-112889425.5 actor_loss=0.2497 critic_loss=147848078222.2222 entropy=16.8230 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 18860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553850.7 mean_steps=13.6
|
|
[Episode 18870] reward=-117246323.2 actor_loss=0.2613 critic_loss=155615503974.4000 entropy=16.8226 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 18880] reward=-114179906.7 actor_loss=0.3058 critic_loss=146583985629.8667 entropy=16.8295 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 18880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-625692.9 mean_steps=12.8
|
|
[Episode 18890] reward=-110939314.0 actor_loss=0.2609 critic_loss=142340651053.5111 entropy=16.8485 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 18900] reward=-116489096.7 actor_loss=0.2762 critic_loss=158008015803.7333 entropy=16.8440 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 18900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535765.2 mean_steps=13.8
|
|
[Episode 18910] reward=-111313703.9 actor_loss=0.3309 critic_loss=143161456321.4222 entropy=16.8554 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18920] reward=-113489418.5 actor_loss=0.3784 critic_loss=149939626894.2222 entropy=16.8464 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 18920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564000.7 mean_steps=13.9
|
|
[Episode 18930] reward=-111130180.5 actor_loss=0.3283 critic_loss=139828154459.0222 entropy=16.8202 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 18940] reward=-117697266.3 actor_loss=0.2729 critic_loss=153423118336.0000 entropy=16.8045 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 18940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505090.0 mean_steps=14.3
|
|
[Episode 18950] reward=-108971101.1 actor_loss=0.2652 critic_loss=144093308154.3111 entropy=16.8232 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 18960] reward=-114526276.3 actor_loss=0.3432 critic_loss=149023923833.9048 entropy=16.8235 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 18960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-424464.1 mean_steps=13.8
|
|
[Episode 18970] reward=-115396148.2 actor_loss=0.2791 critic_loss=153217090992.3556 entropy=16.8292 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 18980] reward=-109455272.5 actor_loss=0.3105 critic_loss=140674674688.0000 entropy=16.8237 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 18980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528841.4 mean_steps=13.2
|
|
[Episode 18990] reward=-115041769.8 actor_loss=0.2628 critic_loss=153458656324.2667 entropy=16.8319 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 19000] reward=-118382105.3 actor_loss=0.3639 critic_loss=159320789937.2308 entropy=16.8335 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 19000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543747.7 mean_steps=13.6
|
|
[Episode 19010] reward=-113845959.3 actor_loss=0.2879 critic_loss=148296765952.0000 entropy=16.8425 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 19020] reward=-118788512.5 actor_loss=0.3059 critic_loss=157951262720.0000 entropy=16.8487 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 19020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-475711.7 mean_steps=15.6
|
|
[Episode 19030] reward=-108812721.9 actor_loss=0.3737 critic_loss=142346187651.1219 entropy=16.8347 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 19040] reward=-108336302.2 actor_loss=0.3648 critic_loss=140287497739.3778 entropy=16.8393 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 19040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540657.2 mean_steps=13.3
|
|
[Episode 19050] reward=-113394115.1 actor_loss=0.2931 critic_loss=147175653558.0444 entropy=16.8433 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 19060] reward=-116874706.9 actor_loss=0.2516 critic_loss=159404838412.4878 entropy=16.8450 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 19060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507194.3 mean_steps=13.2
|
|
[Episode 19070] reward=-112795314.4 actor_loss=0.2411 critic_loss=147453815552.0000 entropy=16.8715 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 19080] reward=-115054010.8 actor_loss=0.2384 critic_loss=151675357803.1628 entropy=16.8417 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 19080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583321.2 mean_steps=12.6
|
|
[Episode 19090] reward=-117354808.4 actor_loss=0.3619 critic_loss=157765440853.3333 entropy=16.8449 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 19100] reward=-113620928.8 actor_loss=0.3020 critic_loss=151983187558.4000 entropy=16.8612 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 19100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538468.5 mean_steps=13.9
|
|
[Episode 19110] reward=-111342442.9 actor_loss=0.2730 critic_loss=141705221643.3778 entropy=16.8559 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 19120] reward=-113405464.1 actor_loss=0.3128 critic_loss=151986079880.5333 entropy=16.8736 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 19120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475597.9 mean_steps=14.7
|
|
[Episode 19130] reward=-110324733.5 actor_loss=0.3156 critic_loss=141796676221.1555 entropy=16.8785 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 19140] reward=-117919392.0 actor_loss=0.2541 critic_loss=152481654374.4000 entropy=16.8955 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 19140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553707.3 mean_steps=13.2
|
|
[Episode 19150] reward=-114437853.1 actor_loss=0.3501 critic_loss=157643189270.7556 entropy=16.9063 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 19160] reward=-120317505.6 actor_loss=0.2457 critic_loss=169606759310.2222 entropy=16.9001 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 19160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488894.8 mean_steps=14.6
|
|
[Episode 19170] reward=-116564646.6 actor_loss=0.3860 critic_loss=149738971682.1333 entropy=16.9257 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 19180] reward=-117771672.5 actor_loss=0.2784 critic_loss=160790125317.6889 entropy=16.9312 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 19180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430796.0 mean_steps=14.6
|
|
[Episode 19190] reward=-114490128.9 actor_loss=0.2742 critic_loss=154282108928.0000 entropy=16.9161 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 19200] reward=-114008535.3 actor_loss=0.3446 critic_loss=162680174425.9460 entropy=16.9525 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 19200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-361584.9 mean_steps=15.9
|
|
[Episode 19210] reward=-112041113.6 actor_loss=0.2083 critic_loss=146444223427.7647 entropy=16.9563 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 19220] reward=-112617998.0 actor_loss=0.3503 critic_loss=159421872061.9355 entropy=16.9453 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 19220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-381153.4 mean_steps=17.1
|
|
[Episode 19230] reward=-113647760.6 actor_loss=0.2702 critic_loss=147171114279.8222 entropy=16.9753 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 19240] reward=-116328352.5 actor_loss=0.3079 critic_loss=154355248911.0588 entropy=17.0069 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 19240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-494027.6 mean_steps=12.9
|
|
[Episode 19250] reward=-114986366.5 actor_loss=0.2656 critic_loss=154177971086.2222 entropy=17.0087 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 19260] reward=-114587693.7 actor_loss=0.2682 critic_loss=156075912457.4815 entropy=17.0036 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 19260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434664.2 mean_steps=14.2
|
|
[Episode 19270] reward=-116050884.6 actor_loss=0.2519 critic_loss=155009630736.5161 entropy=17.0049 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 19280] reward=-116441307.6 actor_loss=0.4436 critic_loss=155733273395.2000 entropy=17.0363 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 19280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-645701.8 mean_steps=11.9
|
|
[Episode 19290] reward=-114009432.1 actor_loss=0.3694 critic_loss=149192500163.7647 entropy=17.0448 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 19300] reward=-120106890.7 actor_loss=0.2045 critic_loss=158849176917.3333 entropy=17.0341 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 19300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585254.4 mean_steps=13.2
|
|
[Episode 19310] reward=-114311448.5 actor_loss=0.3843 critic_loss=151108820081.7778 entropy=17.0395 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 19320] reward=-106234681.0 actor_loss=0.3792 critic_loss=140584324066.7429 entropy=17.0520 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 19320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440682.2 mean_steps=15.8
|
|
[Episode 19330] reward=-117877548.8 actor_loss=0.2991 critic_loss=155457866020.5714 entropy=17.0624 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 19340] reward=-106505772.6 actor_loss=0.2746 critic_loss=150023120850.4889 entropy=17.0641 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 19340] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-665713.5 mean_steps=11.3
|
|
[Episode 19350] reward=-113189591.8 actor_loss=0.2441 critic_loss=152144613284.9778 entropy=17.0846 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 19360] reward=-115696202.1 actor_loss=0.2864 critic_loss=174505409299.6923 entropy=17.0801 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 19360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597601.8 mean_steps=12.6
|
|
[Episode 19370] reward=-114088083.6 actor_loss=0.2167 critic_loss=152556809602.8445 entropy=17.0815 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 19380] reward=-113928647.3 actor_loss=0.2951 critic_loss=156648780396.6060 entropy=17.0834 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 19380] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-313812.6 mean_steps=18.5
|
|
[Episode 19390] reward=-113787109.4 actor_loss=0.3227 critic_loss=158005427010.3704 entropy=17.0690 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 19400] reward=-117280592.1 actor_loss=0.3030 critic_loss=158944653721.6000 entropy=17.0811 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 19400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-460721.1 mean_steps=15.6
|
|
[Episode 19410] reward=-108806724.5 actor_loss=0.3533 critic_loss=150229333515.3778 entropy=17.0677 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 19420] reward=-119230517.9 actor_loss=0.1661 critic_loss=158137258257.0667 entropy=17.0637 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 19420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509183.5 mean_steps=14.8
|
|
[Episode 19430] reward=-115356180.6 actor_loss=0.4012 critic_loss=156819811714.8445 entropy=17.0560 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 19440] reward=-117752059.4 actor_loss=0.3162 critic_loss=162116479502.6286 entropy=17.0732 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 19440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541187.0 mean_steps=14.1
|
|
[Episode 19450] reward=-109454072.4 actor_loss=0.4358 critic_loss=143873014351.6444 entropy=17.0749 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 19460] reward=-116557963.9 actor_loss=0.3403 critic_loss=156017521152.0000 entropy=17.0474 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 19460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553311.6 mean_steps=14.4
|
|
[Episode 19470] reward=-114521947.7 actor_loss=0.3241 critic_loss=158995523538.4889 entropy=17.0361 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 19480] reward=-111821625.4 actor_loss=0.3871 critic_loss=148120633799.1111 entropy=17.0379 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 19480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505938.1 mean_steps=14.2
|
|
[Episode 19490] reward=-114847788.8 actor_loss=0.2794 critic_loss=157436783638.7556 entropy=17.0369 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 19500] reward=-124046985.1 actor_loss=0.1953 critic_loss=179588537093.6889 entropy=17.0244 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 19500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429414.0 mean_steps=15.0
|
|
[Episode 19510] reward=-115643111.1 actor_loss=0.3497 critic_loss=152402023037.1555 entropy=17.0309 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 19520] reward=-113826381.8 actor_loss=0.3970 critic_loss=155407081472.0000 entropy=17.0530 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 19520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476745.8 mean_steps=13.6
|
|
[Episode 19530] reward=-119067346.8 actor_loss=0.3302 critic_loss=161867733765.6889 entropy=17.0385 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 19540] reward=-110146740.4 actor_loss=0.3392 critic_loss=149599985078.8571 entropy=17.0511 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 19540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494059.0 mean_steps=14.2
|
|
[Episode 19550] reward=-113772510.4 actor_loss=0.2713 critic_loss=163224527030.0444 entropy=17.0422 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 19560] reward=-110902597.2 actor_loss=0.2952 critic_loss=151632695933.1555 entropy=17.0508 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 19560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587175.7 mean_steps=13.9
|
|
[Episode 19570] reward=-114117640.5 actor_loss=0.2994 critic_loss=151512958020.2667 entropy=17.0276 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 19580] reward=-116090119.0 actor_loss=0.3005 critic_loss=154594791575.7037 entropy=17.0397 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 19580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617859.8 mean_steps=12.2
|
|
[Episode 19590] reward=-120883827.9 actor_loss=0.2940 critic_loss=161082062848.0000 entropy=17.0411 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 19600] reward=-109619779.7 actor_loss=0.3213 critic_loss=147374925141.3333 entropy=17.0356 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 19600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466169.1 mean_steps=15.2
|
|
[Episode 19610] reward=-121198032.0 actor_loss=0.2139 critic_loss=165588464515.1219 entropy=17.0552 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 19620] reward=-112223874.3 actor_loss=0.2483 critic_loss=147737083483.8974 entropy=17.0504 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 19620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529068.3 mean_steps=14.7
|
|
[Episode 19630] reward=-119777738.3 actor_loss=0.2274 critic_loss=161994754184.5333 entropy=17.0425 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 19640] reward=-112970213.1 actor_loss=0.2785 critic_loss=151296070997.3333 entropy=17.0499 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 19640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512483.3 mean_steps=14.3
|
|
[Episode 19650] reward=-112872017.1 actor_loss=0.3857 critic_loss=153886928404.4800 entropy=17.0605 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 19660] reward=-108896359.9 actor_loss=0.3415 critic_loss=144298622464.0000 entropy=17.0485 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 19660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-618024.6 mean_steps=13.2
|
|
[Episode 19670] reward=-109467964.1 actor_loss=0.3240 critic_loss=144228163948.0889 entropy=17.0544 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 19680] reward=-117507436.5 actor_loss=0.2739 critic_loss=155927824520.5333 entropy=17.0486 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 19680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-333550.5 mean_steps=15.9
|
|
[Episode 19690] reward=-116939187.1 actor_loss=0.2905 critic_loss=154745734576.3556 entropy=17.0399 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 19700] reward=-113966614.3 actor_loss=0.3461 critic_loss=149810482380.8000 entropy=17.0411 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 19700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476010.7 mean_steps=15.3
|
|
[Episode 19710] reward=-111515310.4 actor_loss=0.2923 critic_loss=155999881898.6667 entropy=17.0513 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 19720] reward=-112226479.6 actor_loss=0.3146 critic_loss=153266432445.2174 entropy=17.0610 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 19720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-633410.6 mean_steps=12.2
|
|
[Episode 19730] reward=-113454892.4 actor_loss=0.2844 critic_loss=149939604684.8000 entropy=17.0743 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 19740] reward=-110691267.5 actor_loss=0.2943 critic_loss=148233209992.5333 entropy=17.0992 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 19740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-540198.5 mean_steps=12.6
|
|
[Episode 19750] reward=-112970743.7 actor_loss=0.3530 critic_loss=158182348845.5111 entropy=17.0959 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 19760] reward=-121666354.5 actor_loss=0.2744 critic_loss=163974872715.6364 entropy=17.1019 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 19760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576156.9 mean_steps=13.7
|
|
[Episode 19770] reward=-110653929.0 actor_loss=0.3229 critic_loss=185169442679.4667 entropy=17.0901 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 19780] reward=-120831997.0 actor_loss=0.3372 critic_loss=164499362793.2444 entropy=17.0979 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 19780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459826.1 mean_steps=14.9
|
|
[Episode 19790] reward=-113141600.7 actor_loss=0.2296 critic_loss=154262246134.5185 entropy=17.0799 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 19800] reward=-138775995.4 actor_loss=0.2969 critic_loss=2088121994288.7620 entropy=17.0982 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 19800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537017.6 mean_steps=13.8
|
|
[Episode 19810] reward=-117944781.4 actor_loss=0.4045 critic_loss=157167665152.0000 entropy=17.0940 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 19820] reward=-114338277.9 actor_loss=0.3426 critic_loss=151020438232.1778 entropy=17.1028 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 19820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436377.8 mean_steps=14.8
|
|
[Episode 19830] reward=-112869417.7 actor_loss=0.3416 critic_loss=149604231668.6222 entropy=17.0887 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 19840] reward=-116449352.3 actor_loss=0.3333 critic_loss=160639449245.5385 entropy=17.0893 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 19840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-374725.4 mean_steps=14.8
|
|
[Episode 19850] reward=-117807013.8 actor_loss=0.3490 critic_loss=159144528858.0741 entropy=17.0946 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 19860] reward=-116333798.4 actor_loss=0.3071 critic_loss=154186699753.2444 entropy=17.0893 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 19860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482459.1 mean_steps=14.8
|
|
[Episode 19870] reward=-111155136.8 actor_loss=0.2944 critic_loss=156318920004.6829 entropy=17.1032 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 19880] reward=-116020203.0 actor_loss=0.2859 critic_loss=153981244393.2444 entropy=17.0839 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 19880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-395537.9 mean_steps=15.7
|
|
[Episode 19890] reward=-115350908.4 actor_loss=0.3412 critic_loss=155543701640.5333 entropy=17.0799 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 19900] reward=-116916161.0 actor_loss=0.2160 critic_loss=171598995720.2581 entropy=17.0778 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 19900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516823.1 mean_steps=15.4
|
|
[Episode 19910] reward=-111187095.4 actor_loss=0.3757 critic_loss=154969710309.5172 entropy=17.0667 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 19920] reward=-119361066.3 actor_loss=0.2877 critic_loss=165919544164.8485 entropy=17.0805 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 19920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-646665.2 mean_steps=12.6
|
|
[Episode 19930] reward=-119143415.4 actor_loss=0.3717 critic_loss=169576451780.9231 entropy=17.0825 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 19940] reward=-112842208.7 actor_loss=0.3712 critic_loss=146437269094.4000 entropy=17.0927 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 19940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469806.7 mean_steps=15.2
|
|
[Episode 19950] reward=-117723242.9 actor_loss=0.1846 critic_loss=151507245465.6000 entropy=17.0869 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 19960] reward=-113942404.9 actor_loss=0.2954 critic_loss=151648535620.2667 entropy=17.0787 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 19960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562759.7 mean_steps=13.3
|
|
[Episode 19970] reward=-115087415.8 actor_loss=0.2725 critic_loss=156145627451.0769 entropy=17.0820 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 19980] reward=-113332388.8 actor_loss=0.2979 critic_loss=152287022284.8000 entropy=17.0741 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 19980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-713907.9 mean_steps=11.5
|
|
[Episode 19990] reward=-122216385.2 actor_loss=0.3001 critic_loss=162227017669.4857 entropy=17.0746 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 20000] reward=-115096811.6 actor_loss=0.3292 critic_loss=153180977754.3529 entropy=17.0709 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 20000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-277978.7 mean_steps=16.4
|
|
[Episode 20010] reward=-117624464.2 actor_loss=0.3814 critic_loss=159411159040.0000 entropy=17.0694 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 20020] reward=-113943633.5 actor_loss=0.3282 critic_loss=146240765132.8000 entropy=17.0706 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 20020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622928.4 mean_steps=11.9
|
|
[Episode 20030] reward=-121977139.4 actor_loss=0.3152 critic_loss=167552548208.6400 entropy=17.0888 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 20040] reward=-110377645.8 actor_loss=0.3234 critic_loss=142865102893.5111 entropy=17.0954 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 20040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-362373.4 mean_steps=16.2
|
|
[Episode 20050] reward=-123886731.5 actor_loss=0.2891 critic_loss=166457897415.1111 entropy=17.0986 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 20060] reward=-114590740.0 actor_loss=0.4231 critic_loss=152875242837.3333 entropy=17.0911 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 20060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-429948.2 mean_steps=17.8
|
|
[Episode 20070] reward=-112386172.3 actor_loss=0.3671 critic_loss=151338986154.6667 entropy=17.0803 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 20080] reward=-113077859.2 actor_loss=0.3227 critic_loss=146149058878.5778 entropy=17.0781 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-481998.5 mean_steps=13.2
|
|
[Episode 20090] reward=-114512627.7 actor_loss=0.3960 critic_loss=156394941518.7692 entropy=17.0888 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 20100] reward=-113759661.4 actor_loss=0.2703 critic_loss=147827382164.2105 entropy=17.1024 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 20100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398442.7 mean_steps=16.6
|
|
[Episode 20110] reward=-112660990.1 actor_loss=0.3720 critic_loss=151935544706.8445 entropy=17.1070 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 20120] reward=-116432647.4 actor_loss=0.2477 critic_loss=153817776492.0889 entropy=17.1330 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 20120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586051.9 mean_steps=12.9
|
|
[Episode 20130] reward=-111340827.9 actor_loss=0.3639 critic_loss=147050437745.7778 entropy=17.1267 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 20140] reward=-113988338.3 actor_loss=0.4193 critic_loss=150194125482.6667 entropy=17.1401 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 20140] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-696857.5 mean_steps=10.7
|
|
[Episode 20150] reward=-117373653.6 actor_loss=0.2830 critic_loss=159849970654.9677 entropy=17.1317 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 20160] reward=-114816792.1 actor_loss=0.2263 critic_loss=145396497885.8667 entropy=17.1395 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 20160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-469233.1 mean_steps=15.6
|
|
[Episode 20170] reward=-120365080.1 actor_loss=0.2899 critic_loss=158300720696.8889 entropy=17.1333 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 20180] reward=-113625973.2 actor_loss=0.3931 critic_loss=153741112479.2889 entropy=17.1445 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 20180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-543961.8 mean_steps=14.8
|
|
[Episode 20190] reward=-116198453.7 actor_loss=0.2778 critic_loss=151014434328.3810 entropy=17.1508 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 20200] reward=-114016360.8 actor_loss=0.3987 critic_loss=156697822640.3556 entropy=17.1618 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 20200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465022.7 mean_steps=14.8
|
|
[Episode 20210] reward=-116417998.9 actor_loss=0.3764 critic_loss=156636942609.0667 entropy=17.1807 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 20220] reward=-118700867.2 actor_loss=0.3057 critic_loss=157787676240.8421 entropy=17.1652 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 20220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486317.9 mean_steps=15.1
|
|
[Episode 20230] reward=-115545094.1 actor_loss=0.3363 critic_loss=154856702714.0465 entropy=17.1991 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 20240] reward=-116399077.5 actor_loss=0.3014 critic_loss=156832264086.9744 entropy=17.2027 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 20240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507486.7 mean_steps=14.3
|
|
[Episode 20250] reward=-113406314.3 actor_loss=0.3510 critic_loss=156077967132.4445 entropy=17.2050 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 20260] reward=-121544085.2 actor_loss=0.2936 critic_loss=164171140995.8788 entropy=17.1988 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425302.6 mean_steps=16.1
|
|
[Episode 20270] reward=-119004922.4 actor_loss=0.2553 critic_loss=157653709085.7675 entropy=17.2137 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 20280] reward=-111180165.3 actor_loss=0.3910 critic_loss=146196377144.8889 entropy=17.2159 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 20280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-336427.3 mean_steps=17.3
|
|
[Episode 20290] reward=-116315005.4 actor_loss=0.3335 critic_loss=156306698649.6000 entropy=17.2261 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 20300] reward=-115439470.4 actor_loss=0.2350 critic_loss=151491298540.3077 entropy=17.2169 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 20300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491516.7 mean_steps=13.5
|
|
[Episode 20310] reward=-120931177.0 actor_loss=0.2214 critic_loss=165903315035.0222 entropy=17.2196 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 20320] reward=-120567378.9 actor_loss=0.2752 critic_loss=157109115037.5385 entropy=17.2200 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 20320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574899.2 mean_steps=14.1
|
|
[Episode 20330] reward=-121148817.6 actor_loss=0.3033 critic_loss=164834046634.6667 entropy=17.2268 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 20340] reward=-117965695.2 actor_loss=0.2242 critic_loss=155429919129.6000 entropy=17.2404 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 20340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585238.9 mean_steps=12.8
|
|
[Episode 20350] reward=-113085547.2 actor_loss=0.2827 critic_loss=148635698426.3111 entropy=17.2466 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 20360] reward=-113053135.8 actor_loss=0.2922 critic_loss=150961453974.0690 entropy=17.2571 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 20360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-447850.5 mean_steps=13.6
|
|
[Episode 20370] reward=-118284186.6 actor_loss=0.2840 critic_loss=159341890218.6667 entropy=17.2591 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 20380] reward=-118229641.0 actor_loss=0.2479 critic_loss=153664117873.7778 entropy=17.2518 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 20380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518793.1 mean_steps=14.3
|
|
[Episode 20390] reward=-124362786.1 actor_loss=0.2762 critic_loss=173298160981.3333 entropy=17.2417 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 20400] reward=-116251875.6 actor_loss=0.2716 critic_loss=158283477133.2414 entropy=17.2667 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 20400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-529846.2 mean_steps=12.4
|
|
[Episode 20410] reward=-109112807.6 actor_loss=0.3772 critic_loss=143753388032.0000 entropy=17.2671 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 20420] reward=-119327562.6 actor_loss=0.3277 critic_loss=163292862873.6000 entropy=17.2669 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 20420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513518.5 mean_steps=14.6
|
|
[Episode 20430] reward=-123543499.3 actor_loss=0.2543 critic_loss=161598800236.0889 entropy=17.2674 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 20440] reward=-118001306.2 actor_loss=0.2482 critic_loss=162261429452.8000 entropy=17.2791 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 20440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-406835.8 mean_steps=14.2
|
|
[Episode 20450] reward=-113469006.4 actor_loss=0.3850 critic_loss=147950814640.3556 entropy=17.2834 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 20460] reward=-114295953.2 actor_loss=0.2465 critic_loss=154325239974.0540 entropy=17.3004 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 20460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505816.7 mean_steps=13.4
|
|
[Episode 20470] reward=-118218424.4 actor_loss=0.2763 critic_loss=178563207805.1555 entropy=17.2955 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 20480] reward=-114009888.9 actor_loss=0.3615 critic_loss=152123750462.0606 entropy=17.2882 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 20480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619959.5 mean_steps=12.8
|
|
[Episode 20490] reward=-118162569.6 actor_loss=0.2011 critic_loss=157419807788.5217 entropy=17.2921 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 20500] reward=-111190705.0 actor_loss=0.2210 critic_loss=142763677013.3333 entropy=17.2836 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 20500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592535.7 mean_steps=14.3
|
|
[Episode 20510] reward=-118027299.9 actor_loss=0.3450 critic_loss=153394356770.1333 entropy=17.2943 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 20520] reward=-120246448.3 actor_loss=0.2948 critic_loss=163492292926.5778 entropy=17.2803 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 20520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-692206.6 mean_steps=12.7
|
|
[Episode 20530] reward=-118359185.1 actor_loss=0.3346 critic_loss=162196007594.6667 entropy=17.2794 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 20540] reward=-115443951.2 actor_loss=0.3526 critic_loss=155817054966.5185 entropy=17.2849 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-621304.9 mean_steps=13.2
|
|
[Episode 20550] reward=-115370634.4 actor_loss=0.3328 critic_loss=165406680795.4286 entropy=17.2796 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 20560] reward=-115736477.0 actor_loss=0.3086 critic_loss=152059060224.0000 entropy=17.2884 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363828.6 mean_steps=16.3
|
|
[Episode 20570] reward=-119957029.2 actor_loss=0.3347 critic_loss=161638287132.4445 entropy=17.2940 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 20580] reward=-113932125.3 actor_loss=0.2654 critic_loss=148871186204.4445 entropy=17.3270 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 20580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521670.0 mean_steps=15.2
|
|
[Episode 20590] reward=-111385613.3 actor_loss=0.3237 critic_loss=154024902656.0000 entropy=17.3446 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 20600] reward=-117220031.4 actor_loss=0.2323 critic_loss=170952248888.8889 entropy=17.3311 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 20600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531618.5 mean_steps=14.4
|
|
[Episode 20610] reward=-123541353.0 actor_loss=0.3388 critic_loss=170318698177.4222 entropy=17.3139 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 20620] reward=-118755382.1 actor_loss=0.2352 critic_loss=156739363726.2222 entropy=17.3011 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 20620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-373374.2 mean_steps=16.3
|
|
[Episode 20630] reward=-112812462.7 actor_loss=0.2877 critic_loss=153182692966.4000 entropy=17.3025 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 20640] reward=-113369416.7 actor_loss=0.2999 critic_loss=158903105763.5555 entropy=17.3015 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 20640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440851.5 mean_steps=15.8
|
|
[Episode 20650] reward=-113855010.8 actor_loss=0.3134 critic_loss=153076411830.8571 entropy=17.2906 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 20660] reward=-114873521.5 actor_loss=0.3179 critic_loss=156341717978.0741 entropy=17.2779 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 20660] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-682303.1 mean_steps=11.7
|
|
[Episode 20670] reward=-113014871.4 actor_loss=0.3862 critic_loss=150137829309.9355 entropy=17.2667 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 20680] reward=-114594941.0 actor_loss=0.4258 critic_loss=159273616452.2667 entropy=17.2583 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 20680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619084.2 mean_steps=13.0
|
|
[Episode 20690] reward=-109938674.9 actor_loss=0.3142 critic_loss=173631232773.6889 entropy=17.2558 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 20700] reward=-113693313.8 actor_loss=0.3373 critic_loss=156483839226.3111 entropy=17.2556 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 20700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617268.4 mean_steps=11.9
|
|
[Episode 20710] reward=-116483769.8 actor_loss=0.3069 critic_loss=154588278307.7209 entropy=17.2914 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 20720] reward=-113796671.5 actor_loss=0.2819 critic_loss=150745144797.8667 entropy=17.2819 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 20720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-582470.0 mean_steps=11.8
|
|
[Episode 20730] reward=-113682515.2 actor_loss=0.3494 critic_loss=150938433399.4667 entropy=17.2839 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 20740] reward=-121105951.8 actor_loss=0.2298 critic_loss=174300163822.9333 entropy=17.2991 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 20740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414066.6 mean_steps=15.6
|
|
[Episode 20750] reward=-121684651.6 actor_loss=0.2456 critic_loss=168477720120.8889 entropy=17.2841 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 20760] reward=-117349279.6 actor_loss=0.3645 critic_loss=160426505011.2000 entropy=17.2821 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 20760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442421.0 mean_steps=14.9
|
|
[Episode 20770] reward=-115509433.1 actor_loss=0.1867 critic_loss=148779957840.8421 entropy=17.3104 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 20780] reward=-115645896.1 actor_loss=0.2012 critic_loss=146437607332.9778 entropy=17.3076 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 20780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458429.7 mean_steps=15.1
|
|
[Episode 20790] reward=-121335558.0 actor_loss=0.2851 critic_loss=159413484748.8000 entropy=17.3057 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 20800] reward=-114241741.6 actor_loss=0.3493 critic_loss=153556854473.6970 entropy=17.3014 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 20800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-471943.2 mean_steps=12.9
|
|
[Episode 20810] reward=-119757406.5 actor_loss=0.2355 critic_loss=159399136162.9091 entropy=17.3192 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 20820] reward=-116699801.0 actor_loss=0.2241 critic_loss=154519036450.1333 entropy=17.2997 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 20820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420840.7 mean_steps=15.7
|
|
[Episode 20830] reward=-119140882.2 actor_loss=0.3190 critic_loss=245062808234.6667 entropy=17.2917 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 20840] reward=-118842324.8 actor_loss=0.3379 critic_loss=175640439974.6977 entropy=17.2856 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 20840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491901.8 mean_steps=15.2
|
|
[Episode 20850] reward=-115563261.3 actor_loss=0.2119 critic_loss=159160116955.4286 entropy=17.2686 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 20860] reward=-116022428.0 actor_loss=0.2539 critic_loss=159623636218.3111 entropy=17.2799 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 20860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631416.6 mean_steps=12.0
|
|
[Episode 20870] reward=-120144114.8 actor_loss=0.2889 critic_loss=168153224078.2222 entropy=17.2809 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 20880] reward=-112852783.5 actor_loss=0.3852 critic_loss=156117016185.9048 entropy=17.2897 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 20880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-362580.0 mean_steps=15.4
|
|
[Episode 20890] reward=-119713008.8 actor_loss=0.3266 critic_loss=159853647917.5111 entropy=17.2704 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 20900] reward=-115957571.1 actor_loss=0.2847 critic_loss=155900912786.2857 entropy=17.2622 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 20900] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-299069.4 mean_steps=16.9
|
|
[Episode 20910] reward=-118811393.3 actor_loss=0.2466 critic_loss=154412464420.5714 entropy=17.2701 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 20920] reward=-112407383.1 actor_loss=0.3802 critic_loss=152298414080.0000 entropy=17.2665 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434209.9 mean_steps=14.5
|
|
[Episode 20930] reward=-115312128.2 actor_loss=0.3285 critic_loss=152205076230.2439 entropy=17.2855 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 20940] reward=-120410279.2 actor_loss=0.2009 critic_loss=162384097735.1111 entropy=17.2796 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 20940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-606906.1 mean_steps=11.9
|
|
[Episode 20950] reward=-119721460.4 actor_loss=0.3688 critic_loss=157156929357.9131 entropy=17.2903 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 20960] reward=-117817895.0 actor_loss=0.3032 critic_loss=155872166687.2195 entropy=17.2900 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 20960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563559.1 mean_steps=12.9
|
|
[Episode 20970] reward=-115379357.6 actor_loss=0.3440 critic_loss=157530606369.3913 entropy=17.2851 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 20980] reward=-119255227.9 actor_loss=0.3219 critic_loss=158449453899.2941 entropy=17.3039 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379415.7 mean_steps=16.1
|
|
[Episode 20990] reward=-120395366.1 actor_loss=0.2509 critic_loss=244503901059.1219 entropy=17.2950 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 21000] reward=-118124882.1 actor_loss=0.3246 critic_loss=155178581651.9111 entropy=17.2935 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 21000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452601.8 mean_steps=16.1
|
|
[Episode 21010] reward=-118008701.9 actor_loss=0.2850 critic_loss=156665312530.7317 entropy=17.2903 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 21020] reward=-113103998.9 actor_loss=0.3726 critic_loss=154938790161.0667 entropy=17.2845 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 21020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423566.3 mean_steps=16.8
|
|
[Episode 21030] reward=-116717242.0 actor_loss=0.3205 critic_loss=155800988330.6667 entropy=17.2854 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21040] reward=-122498541.0 actor_loss=0.2066 critic_loss=164542588660.8696 entropy=17.2738 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 21040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615835.3 mean_steps=13.1
|
|
[Episode 21050] reward=-115709937.4 actor_loss=0.3050 critic_loss=152682755557.0526 entropy=17.2975 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 21060] reward=-116477563.0 actor_loss=0.3422 critic_loss=153871369485.4737 entropy=17.2931 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 21060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493927.9 mean_steps=14.2
|
|
[Episode 21070] reward=-114041804.3 actor_loss=0.2784 critic_loss=152461024369.7778 entropy=17.2857 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 21080] reward=-118647196.4 actor_loss=0.2970 critic_loss=156850360980.6452 entropy=17.2782 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 21080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525672.8 mean_steps=13.1
|
|
[Episode 21090] reward=-118115440.9 actor_loss=0.2813 critic_loss=160843097063.0244 entropy=17.2763 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 21100] reward=-116479653.1 actor_loss=0.2785 critic_loss=159697613917.0909 entropy=17.2703 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 21100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-544974.4 mean_steps=12.5
|
|
[Episode 21110] reward=-116860853.9 actor_loss=0.4079 critic_loss=162455563468.8000 entropy=17.2857 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 21120] reward=-115520793.4 actor_loss=0.3783 critic_loss=156566668709.6471 entropy=17.2964 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 21120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475920.0 mean_steps=14.0
|
|
[Episode 21130] reward=-123090092.3 actor_loss=0.2829 critic_loss=165494470519.4667 entropy=17.3073 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 21140] reward=-117600776.0 actor_loss=0.3453 critic_loss=154425840981.3333 entropy=17.3064 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 21140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483598.2 mean_steps=14.0
|
|
[Episode 21150] reward=-116991795.2 actor_loss=0.3496 critic_loss=154709799367.1111 entropy=17.3033 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 21160] reward=-119432931.6 actor_loss=0.2298 critic_loss=157719942485.3333 entropy=17.3066 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 21160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539794.3 mean_steps=14.8
|
|
[Episode 21170] reward=-117313963.7 actor_loss=0.2889 critic_loss=155691544849.0667 entropy=17.2971 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 21180] reward=-116480974.4 actor_loss=0.3086 critic_loss=150676678168.3810 entropy=17.3110 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 21180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-512380.2 mean_steps=12.3
|
|
[Episode 21190] reward=-116886362.7 actor_loss=0.3944 critic_loss=151325892790.0444 entropy=17.3239 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 21200] reward=-124148650.4 actor_loss=0.3550 critic_loss=169827288600.3810 entropy=17.3170 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 21200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-665255.5 mean_steps=11.9
|
|
[Episode 21210] reward=-116213854.0 actor_loss=0.2789 critic_loss=156074363562.6667 entropy=17.3004 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 21220] reward=-113064518.4 actor_loss=0.3139 critic_loss=153683364704.7111 entropy=17.2944 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 21220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-579165.6 mean_steps=13.9
|
|
[Episode 21230] reward=-117686357.5 actor_loss=0.2795 critic_loss=154922995396.9231 entropy=17.2981 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 21240] reward=-116424208.3 actor_loss=0.2675 critic_loss=151031888463.6444 entropy=17.3216 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 21240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-658771.3 mean_steps=12.3
|
|
[Episode 21250] reward=-112534884.9 actor_loss=0.2626 critic_loss=148001447025.7778 entropy=17.3270 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 21260] reward=-113618524.0 actor_loss=0.2756 critic_loss=150984646473.9556 entropy=17.3136 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 21260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537262.2 mean_steps=14.2
|
|
[Episode 21270] reward=-117845419.6 actor_loss=0.3444 critic_loss=166350141889.5610 entropy=17.2942 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 21280] reward=-118377098.2 actor_loss=0.3209 critic_loss=170803938736.3556 entropy=17.2940 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 21280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505904.8 mean_steps=14.9
|
|
[Episode 21290] reward=-117140983.7 actor_loss=0.2510 critic_loss=153868777110.5882 entropy=17.3012 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 21300] reward=-113889812.8 actor_loss=0.3177 critic_loss=158121220141.5111 entropy=17.2993 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 21300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-614002.5 mean_steps=14.1
|
|
[Episode 21310] reward=-121305627.0 actor_loss=0.2406 critic_loss=158470418747.0769 entropy=17.2908 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21320] reward=-118852217.7 actor_loss=0.3332 critic_loss=156851215837.8667 entropy=17.3006 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 21320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-452108.4 mean_steps=16.4
|
|
[Episode 21330] reward=-123331420.9 actor_loss=0.2135 critic_loss=165654907744.7111 entropy=17.3090 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 21340] reward=-118020342.9 actor_loss=0.4243 critic_loss=157614577527.4667 entropy=17.3085 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 21340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551991.7 mean_steps=13.0
|
|
[Episode 21350] reward=-121806120.0 actor_loss=0.2647 critic_loss=164553268701.8667 entropy=17.3231 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 21360] reward=-120295831.6 actor_loss=0.2425 critic_loss=159094741401.6000 entropy=17.3270 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 21360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454346.4 mean_steps=15.2
|
|
[Episode 21370] reward=-115426535.8 actor_loss=0.3423 critic_loss=153770368231.2258 entropy=17.3402 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 21380] reward=-119540027.9 actor_loss=0.3569 critic_loss=156433395126.8571 entropy=17.3321 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 21380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529247.2 mean_steps=162.1
|
|
[Episode 21390] reward=-115875035.5 actor_loss=0.3525 critic_loss=154746270967.1724 entropy=17.3432 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 21400] reward=-120918694.6 actor_loss=0.3186 critic_loss=163892237458.2857 entropy=17.3598 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 21400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423742.9 mean_steps=15.7
|
|
[Episode 21410] reward=-122767480.9 actor_loss=0.1716 critic_loss=166419417560.6154 entropy=17.3744 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 21420] reward=-114807592.7 actor_loss=0.2923 critic_loss=152542432460.8000 entropy=17.3857 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 21420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470296.0 mean_steps=15.1
|
|
[Episode 21430] reward=-114032417.7 actor_loss=0.3679 critic_loss=148736188962.1333 entropy=17.4021 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 21440] reward=-117538546.4 actor_loss=0.2540 critic_loss=158366413040.9412 entropy=17.4229 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 21440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488272.7 mean_steps=15.1
|
|
[Episode 21450] reward=-123384680.9 actor_loss=0.3211 critic_loss=165666401666.8445 entropy=17.4328 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 21460] reward=-118752162.0 actor_loss=0.3038 critic_loss=157612514417.7778 entropy=17.4221 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 21460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-512954.5 mean_steps=16.3
|
|
[Episode 21470] reward=-113977911.6 actor_loss=0.4197 critic_loss=151399574186.6667 entropy=17.4229 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 21480] reward=-117716082.6 actor_loss=0.2892 critic_loss=155130841088.0000 entropy=17.4191 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 21480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481182.2 mean_steps=15.0
|
|
[Episode 21490] reward=-122807230.9 actor_loss=0.2816 critic_loss=165481200103.6190 entropy=17.4132 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21500] reward=-110116987.6 actor_loss=0.3530 critic_loss=150246564386.1333 entropy=17.4240 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 21500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544815.9 mean_steps=13.7
|
|
[Episode 21510] reward=-121864000.0 actor_loss=0.2206 critic_loss=162364690080.9143 entropy=17.4361 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 21520] reward=-121167524.9 actor_loss=0.2453 critic_loss=161384971195.7333 entropy=17.4304 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 21520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-356335.4 mean_steps=15.7
|
|
[Episode 21530] reward=-116492523.8 actor_loss=0.2161 critic_loss=158499116646.4000 entropy=17.4333 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 21540] reward=-115988961.6 actor_loss=0.3181 critic_loss=152414143192.1778 entropy=17.4265 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 21540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-400302.4 mean_steps=17.4
|
|
[Episode 21550] reward=-119024686.4 actor_loss=0.3686 critic_loss=158736888093.7675 entropy=17.4128 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 21560] reward=-113957509.0 actor_loss=0.2697 critic_loss=150221694855.5294 entropy=17.4294 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 21560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420555.3 mean_steps=14.6
|
|
[Episode 21570] reward=-114722284.1 actor_loss=0.4109 critic_loss=157122850907.0222 entropy=17.4332 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 21580] reward=-114571338.1 actor_loss=0.2111 critic_loss=155812679094.8571 entropy=17.4276 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 21580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441523.3 mean_steps=15.3
|
|
[Episode 21590] reward=-112436433.6 actor_loss=0.4814 critic_loss=149621443309.2683 entropy=17.4252 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 21600] reward=-116399409.8 actor_loss=0.2871 critic_loss=162609861778.2857 entropy=17.3895 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 21600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-305059.4 mean_steps=17.3
|
|
[Episode 21610] reward=-114128193.0 actor_loss=0.2760 critic_loss=152714867858.2857 entropy=17.3916 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 21620] reward=-117206768.7 actor_loss=0.3265 critic_loss=154147914508.1905 entropy=17.4012 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 21620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551167.6 mean_steps=12.4
|
|
[Episode 21630] reward=-118454822.2 actor_loss=0.2452 critic_loss=160349004686.2222 entropy=17.4281 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 21640] reward=-119035253.5 actor_loss=0.2694 critic_loss=157917239237.4857 entropy=17.4244 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 21640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589283.0 mean_steps=12.6
|
|
[Episode 21650] reward=-121558100.3 actor_loss=0.2922 critic_loss=168026378148.9778 entropy=17.4296 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 21660] reward=-112975377.7 actor_loss=0.2374 critic_loss=146020242773.3333 entropy=17.4130 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 21660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630877.6 mean_steps=12.8
|
|
[Episode 21670] reward=-120508179.9 actor_loss=0.2296 critic_loss=160306629290.6667 entropy=17.4358 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 21680] reward=-109911789.1 actor_loss=0.2333 critic_loss=142281563648.0000 entropy=17.4533 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 21680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479758.5 mean_steps=16.0
|
|
[Episode 21690] reward=-124632572.9 actor_loss=0.1811 critic_loss=166164644386.1333 entropy=17.4494 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 21700] reward=-116395475.1 actor_loss=0.2728 critic_loss=160289065171.8621 entropy=17.4427 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 21700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576575.7 mean_steps=13.4
|
|
[Episode 21710] reward=-113416009.8 actor_loss=0.2033 critic_loss=148658389504.0000 entropy=17.4417 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 21720] reward=-123813684.1 actor_loss=0.2094 critic_loss=174709199667.2000 entropy=17.4476 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 21720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-579716.3 mean_steps=11.8
|
|
[Episode 21730] reward=-121304190.3 actor_loss=0.2923 critic_loss=163161604096.0000 entropy=17.4368 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21740] reward=-120474557.5 actor_loss=0.2299 critic_loss=166548445125.4857 entropy=17.4373 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 21740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537539.0 mean_steps=14.6
|
|
[Episode 21750] reward=-117391657.4 actor_loss=0.2237 critic_loss=154533398078.4390 entropy=17.4238 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 21760] reward=-114868120.9 actor_loss=0.3029 critic_loss=151120919171.6571 entropy=17.4415 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 21760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556348.4 mean_steps=14.2
|
|
[Episode 21770] reward=-117761748.3 actor_loss=0.3058 critic_loss=160080872880.3556 entropy=17.4398 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21780] reward=-112321942.3 actor_loss=0.3488 critic_loss=153208448614.4000 entropy=17.4495 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 21780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496688.1 mean_steps=14.0
|
|
[Episode 21790] reward=-121094960.0 actor_loss=0.3031 critic_loss=160766227251.2000 entropy=17.4427 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 21800] reward=-121963617.8 actor_loss=0.3050 critic_loss=164475250688.0000 entropy=17.4287 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 21800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548351.3 mean_steps=14.2
|
|
[Episode 21810] reward=-116155696.4 actor_loss=0.3501 critic_loss=159011276889.0435 entropy=17.4175 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 21820] reward=-120796694.3 actor_loss=0.2497 critic_loss=161153373525.3333 entropy=17.4193 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 21820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426456.7 mean_steps=15.4
|
|
[Episode 21830] reward=-121566279.2 actor_loss=0.2799 critic_loss=162550053819.7333 entropy=17.4143 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21840] reward=-118471790.9 actor_loss=0.2686 critic_loss=154624619315.2000 entropy=17.4189 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 21840] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348142.8 mean_steps=16.6
|
|
[Episode 21850] reward=-118880527.6 actor_loss=0.2995 critic_loss=163396171275.3778 entropy=17.4277 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 21860] reward=-119356004.6 actor_loss=0.3250 critic_loss=160072327714.1333 entropy=17.4213 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 21860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492558.8 mean_steps=15.1
|
|
[Episode 21870] reward=-114088480.1 actor_loss=0.2873 critic_loss=148187526667.3778 entropy=17.4303 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 21880] reward=-122857689.2 actor_loss=0.3182 critic_loss=164217184438.0444 entropy=17.4262 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 21880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598455.6 mean_steps=12.9
|
|
[Episode 21890] reward=-123301740.7 actor_loss=0.2084 critic_loss=161901125222.4000 entropy=17.4127 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 21900] reward=-115402198.2 actor_loss=0.3764 critic_loss=155486925544.7273 entropy=17.3932 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 21900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513957.9 mean_steps=14.1
|
|
[Episode 21910] reward=-118186234.5 actor_loss=0.2786 critic_loss=161801292068.5714 entropy=17.3942 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 21920] reward=-122946982.1 actor_loss=0.2022 critic_loss=169514340937.1429 entropy=17.3861 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 21920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575695.0 mean_steps=13.7
|
|
[Episode 21930] reward=-113068260.3 actor_loss=0.3847 critic_loss=151823609483.6364 entropy=17.3881 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 21940] reward=-121677894.3 actor_loss=0.3611 critic_loss=180619645574.7368 entropy=17.3849 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 21940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635154.1 mean_steps=12.1
|
|
[Episode 21950] reward=-120379376.8 actor_loss=0.3328 critic_loss=161766991098.3111 entropy=17.3744 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 21960] reward=-116385666.8 actor_loss=0.3178 critic_loss=155218318313.2444 entropy=17.3696 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 21960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-319466.3 mean_steps=16.6
|
|
[Episode 21970] reward=-122128580.9 actor_loss=0.2624 critic_loss=167486060953.6000 entropy=17.3732 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 21980] reward=-115821093.4 actor_loss=0.2726 critic_loss=154862945348.2667 entropy=17.3666 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 21980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559471.1 mean_steps=12.6
|
|
[Episode 21990] reward=-115217875.5 actor_loss=0.2454 critic_loss=146130666746.3111 entropy=17.3712 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 22000] reward=-121131347.8 actor_loss=0.3119 critic_loss=165510896025.6000 entropy=17.3801 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 22000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491335.0 mean_steps=14.3
|
|
[Episode 22010] reward=-119106673.6 actor_loss=0.3029 critic_loss=159708003696.6400 entropy=17.3983 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22020] reward=-125491090.1 actor_loss=0.2648 critic_loss=167477558567.8222 entropy=17.4090 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 22020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366230.3 mean_steps=16.1
|
|
[Episode 22030] reward=-118175642.0 actor_loss=0.3030 critic_loss=159720367354.3111 entropy=17.4076 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 22040] reward=-116812219.4 actor_loss=0.3364 critic_loss=149169139438.9333 entropy=17.4087 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 22040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-627144.3 mean_steps=12.6
|
|
[Episode 22050] reward=-119287492.4 actor_loss=0.2932 critic_loss=161032085738.0571 entropy=17.4094 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 22060] reward=-116653348.7 actor_loss=0.3220 critic_loss=159236855853.5111 entropy=17.4090 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 22060] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-690967.5 mean_steps=11.6
|
|
[Episode 22070] reward=-123816130.5 actor_loss=0.3506 critic_loss=168304921258.6667 entropy=17.4154 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 22080] reward=-120130380.1 actor_loss=0.2779 critic_loss=160038359412.3636 entropy=17.4170 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-584572.3 mean_steps=11.9
|
|
[Episode 22090] reward=-114546726.2 actor_loss=0.4323 critic_loss=148726679688.5333 entropy=17.4264 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 22100] reward=-121965855.6 actor_loss=0.2483 critic_loss=160701763049.7391 entropy=17.4307 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 22100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447490.2 mean_steps=15.7
|
|
[Episode 22110] reward=-110912714.7 actor_loss=0.3494 critic_loss=146450038363.8974 entropy=17.4362 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 22120] reward=-115745107.3 actor_loss=0.2487 critic_loss=148624969272.8889 entropy=17.4426 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 22120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540824.4 mean_steps=13.6
|
|
[Episode 22130] reward=-120645572.6 actor_loss=0.3111 critic_loss=161757883050.6667 entropy=17.4448 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 22140] reward=-119973243.0 actor_loss=0.2928 critic_loss=157521298063.3600 entropy=17.4429 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 22140] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-274590.5 mean_steps=17.5
|
|
[Episode 22150] reward=-116594675.9 actor_loss=0.2822 critic_loss=157542298596.3243 entropy=17.4451 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 22160] reward=-117103717.1 actor_loss=0.3975 critic_loss=158830596587.5200 entropy=17.4444 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 22160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-513477.1 mean_steps=14.9
|
|
[Episode 22170] reward=-112438234.9 actor_loss=0.3599 critic_loss=149638645532.4445 entropy=17.4507 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22180] reward=-121722014.2 actor_loss=0.2786 critic_loss=166837222955.8857 entropy=17.4756 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448994.0 mean_steps=16.1
|
|
[Episode 22190] reward=-120863058.6 actor_loss=0.2986 critic_loss=161491376810.6667 entropy=17.4641 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22200] reward=-118098675.1 actor_loss=0.2357 critic_loss=156615637583.6444 entropy=17.4650 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 22200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477118.4 mean_steps=15.1
|
|
[Episode 22210] reward=-118296455.0 actor_loss=0.2385 critic_loss=155762433137.7778 entropy=17.4719 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 22220] reward=-117234564.7 actor_loss=0.3227 critic_loss=159037553595.7333 entropy=17.4695 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 22220] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-672523.9 mean_steps=11.4
|
|
[Episode 22230] reward=-115677570.1 actor_loss=0.3126 critic_loss=148224167116.8000 entropy=17.4534 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 22240] reward=-113656052.0 actor_loss=0.2725 critic_loss=149456470447.1579 entropy=17.4587 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 22240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516388.1 mean_steps=13.9
|
|
[Episode 22250] reward=-122096800.6 actor_loss=0.2605 critic_loss=159858983276.0889 entropy=17.4585 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 22260] reward=-120480370.3 actor_loss=0.3960 critic_loss=162909955072.0000 entropy=17.4452 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 22260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520676.6 mean_steps=13.8
|
|
[Episode 22270] reward=-125521041.1 actor_loss=0.2701 critic_loss=171543295426.5600 entropy=17.4585 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 22280] reward=-113355048.7 actor_loss=0.2628 critic_loss=147860345105.0667 entropy=17.4644 approx_kl=0.0112 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510018.8 mean_steps=14.1
|
|
[Episode 22290] reward=-118824444.7 actor_loss=0.3386 critic_loss=162424853117.1555 entropy=17.4649 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 22300] reward=-115505385.8 actor_loss=0.4798 critic_loss=156876480760.2424 entropy=17.4756 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 22300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455148.8 mean_steps=15.2
|
|
[Episode 22310] reward=-122663280.4 actor_loss=0.3097 critic_loss=160640987648.0000 entropy=17.4814 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 22320] reward=-117688823.7 actor_loss=0.2626 critic_loss=156719342738.2857 entropy=17.5058 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-371761.9 mean_steps=15.0
|
|
[Episode 22330] reward=-120204323.4 actor_loss=0.3443 critic_loss=160371462144.0000 entropy=17.4972 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 22340] reward=-118952034.6 actor_loss=0.3325 critic_loss=164504718540.8000 entropy=17.4891 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 22340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536373.3 mean_steps=14.2
|
|
[Episode 22350] reward=-120573327.2 actor_loss=0.2563 critic_loss=175041762645.3333 entropy=17.4848 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 22360] reward=-118207424.1 actor_loss=0.3653 critic_loss=151383277203.9111 entropy=17.4901 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 22360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416198.0 mean_steps=15.2
|
|
[Episode 22370] reward=-119008815.0 actor_loss=0.3244 critic_loss=158285332480.0000 entropy=17.5028 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 22380] reward=-118762848.5 actor_loss=0.2903 critic_loss=155601994941.6296 entropy=17.5054 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 22380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475159.3 mean_steps=14.8
|
|
[Episode 22390] reward=-120042062.9 actor_loss=0.3699 critic_loss=161847087377.0667 entropy=17.4975 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 22400] reward=-122609253.9 actor_loss=0.3670 critic_loss=170874037071.4483 entropy=17.4868 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 22400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-467990.4 mean_steps=15.9
|
|
[Episode 22410] reward=-117999240.2 actor_loss=0.2750 critic_loss=163471733555.2000 entropy=17.4782 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 22420] reward=-121616077.1 actor_loss=0.2951 critic_loss=165608512512.0000 entropy=17.4866 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 22420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510320.8 mean_steps=14.1
|
|
[Episode 22430] reward=-117916945.2 actor_loss=0.3361 critic_loss=160822457794.5600 entropy=17.4661 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 22440] reward=-117926713.0 actor_loss=0.3035 critic_loss=165135999522.1333 entropy=17.4957 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 22440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383658.9 mean_steps=15.8
|
|
[Episode 22450] reward=-121548979.4 actor_loss=0.2899 critic_loss=162436863122.2857 entropy=17.4931 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 22460] reward=-123072012.2 actor_loss=0.2830 critic_loss=167165282222.0800 entropy=17.5033 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 22460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465486.3 mean_steps=14.7
|
|
[Episode 22470] reward=-116625204.4 actor_loss=0.2990 critic_loss=158066551974.6977 entropy=17.5187 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 22480] reward=-115795329.2 actor_loss=0.2861 critic_loss=154751566188.0889 entropy=17.5313 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 22480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451074.3 mean_steps=15.2
|
|
[Episode 22490] reward=-115173643.8 actor_loss=0.3206 critic_loss=157481987185.7778 entropy=17.5216 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 22500] reward=-120619624.2 actor_loss=0.3200 critic_loss=154755947269.6889 entropy=17.5166 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 22500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476387.2 mean_steps=15.2
|
|
[Episode 22510] reward=-114955719.1 actor_loss=0.2977 critic_loss=154121224192.0000 entropy=17.5129 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 22520] reward=-120819665.5 actor_loss=0.3694 critic_loss=162839073041.0667 entropy=17.5270 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 22520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499626.5 mean_steps=15.1
|
|
[Episode 22530] reward=-110299639.3 actor_loss=0.4658 critic_loss=148799627556.5714 entropy=17.5236 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 22540] reward=-122392380.6 actor_loss=0.2044 critic_loss=165310498905.0435 entropy=17.5216 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 22540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479930.2 mean_steps=14.1
|
|
[Episode 22550] reward=-116947708.7 actor_loss=0.3210 critic_loss=161244645052.6316 entropy=17.5046 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22560] reward=-119133000.7 actor_loss=0.1952 critic_loss=156164474288.3556 entropy=17.4890 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 22560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447821.6 mean_steps=15.6
|
|
[Episode 22570] reward=-115500704.5 actor_loss=0.3780 critic_loss=147979305402.8108 entropy=17.4836 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 22580] reward=-116325789.7 actor_loss=0.3144 critic_loss=152484362103.4667 entropy=17.4879 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 22580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-442779.1 mean_steps=13.8
|
|
[Episode 22590] reward=-123857263.0 actor_loss=0.4186 critic_loss=168299115395.1219 entropy=17.4656 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 22600] reward=-121317296.0 actor_loss=0.2906 critic_loss=153129213952.0000 entropy=17.4530 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 22600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458434.2 mean_steps=15.1
|
|
[Episode 22610] reward=-121870372.4 actor_loss=0.3144 critic_loss=167298566436.5714 entropy=17.4538 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 22620] reward=-119109222.0 actor_loss=0.2910 critic_loss=156758512071.1111 entropy=17.4645 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464085.2 mean_steps=14.8
|
|
[Episode 22630] reward=-116153501.8 actor_loss=0.3350 critic_loss=156483903297.4884 entropy=17.4704 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22640] reward=-123205816.8 actor_loss=0.2955 critic_loss=158578259618.3415 entropy=17.4757 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 22640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592291.3 mean_steps=13.8
|
|
[Episode 22650] reward=-125230662.8 actor_loss=0.3271 critic_loss=211266548916.7059 entropy=17.4831 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 22660] reward=-122181226.7 actor_loss=0.2523 critic_loss=164457074315.6364 entropy=17.4797 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 22660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359949.0 mean_steps=16.1
|
|
[Episode 22670] reward=-115751849.4 actor_loss=0.3611 critic_loss=155567866246.0952 entropy=17.4931 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 22680] reward=-119428925.0 actor_loss=0.2522 critic_loss=153865745997.5757 entropy=17.4885 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 22680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-675986.0 mean_steps=12.1
|
|
[Episode 22690] reward=-116854765.2 actor_loss=0.3610 critic_loss=151925701485.7143 entropy=17.5043 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 22700] reward=-118927453.8 actor_loss=0.3554 critic_loss=159669223033.9048 entropy=17.5030 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 22700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584777.8 mean_steps=13.0
|
|
[Episode 22710] reward=-112387515.9 actor_loss=0.3866 critic_loss=148087234349.9487 entropy=17.4951 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 22720] reward=-115998887.7 actor_loss=0.3606 critic_loss=153666259416.6154 entropy=17.5137 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 22720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446217.5 mean_steps=15.7
|
|
[Episode 22730] reward=-120856945.9 actor_loss=0.3142 critic_loss=155793247744.0000 entropy=17.5167 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 22740] reward=-119470774.4 actor_loss=0.2129 critic_loss=153327808967.1111 entropy=17.5288 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 22740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-362889.4 mean_steps=17.4
|
|
[Episode 22750] reward=-120869057.9 actor_loss=0.3118 critic_loss=155248421794.9091 entropy=17.5291 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 22760] reward=-118602824.4 actor_loss=0.3208 critic_loss=156314585682.5807 entropy=17.5138 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 22760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482154.0 mean_steps=14.9
|
|
[Episode 22770] reward=-118434734.2 actor_loss=0.3455 critic_loss=159224523434.6667 entropy=17.5187 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 22780] reward=-117069027.6 actor_loss=0.3869 critic_loss=154353684480.0000 entropy=17.5238 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 22780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527474.9 mean_steps=14.4
|
|
[Episode 22790] reward=-117376258.5 actor_loss=0.3038 critic_loss=156165458056.5333 entropy=17.5265 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 22800] reward=-120303199.1 actor_loss=0.1663 critic_loss=153172829070.2222 entropy=17.5309 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 22800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-625755.6 mean_steps=13.0
|
|
[Episode 22810] reward=-113875371.5 actor_loss=0.3620 critic_loss=153603579576.3200 entropy=17.5341 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 22820] reward=-121405567.1 actor_loss=0.2540 critic_loss=161384169472.0000 entropy=17.5340 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 22820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-403383.6 mean_steps=16.4
|
|
[Episode 22830] reward=-116617146.8 actor_loss=0.4117 critic_loss=158659473993.1429 entropy=17.5423 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 22840] reward=-119268872.6 actor_loss=0.3498 critic_loss=158803179019.3778 entropy=17.5580 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 22840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-605952.6 mean_steps=13.1
|
|
[Episode 22850] reward=-117016147.2 actor_loss=0.2295 critic_loss=154380162522.5366 entropy=17.5587 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 22860] reward=-119300024.5 actor_loss=0.3482 critic_loss=157571930885.6889 entropy=17.5483 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 22860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395440.1 mean_steps=16.4
|
|
[Episode 22870] reward=-118372483.9 actor_loss=0.2946 critic_loss=151044392125.6296 entropy=17.5277 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 22880] reward=-116786316.2 actor_loss=0.2798 critic_loss=155773453016.1778 entropy=17.5290 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 22880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428385.1 mean_steps=15.8
|
|
[Episode 22890] reward=-120094584.8 actor_loss=0.3908 critic_loss=159694173070.2222 entropy=17.5277 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 22900] reward=-116725557.6 actor_loss=0.2596 critic_loss=151794881565.2571 entropy=17.5143 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22900] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-316938.1 mean_steps=17.8
|
|
[Episode 22910] reward=-117877820.0 actor_loss=0.3098 critic_loss=155510829371.0769 entropy=17.5180 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22920] reward=-120517865.7 actor_loss=0.2157 critic_loss=164109671876.4651 entropy=17.5121 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 22920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463555.1 mean_steps=15.7
|
|
[Episode 22930] reward=-122926787.6 actor_loss=0.3622 critic_loss=167009657939.0270 entropy=17.5022 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 22940] reward=-119765649.9 actor_loss=0.3413 critic_loss=157512711134.9677 entropy=17.4973 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 22940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456711.1 mean_steps=14.8
|
|
[Episode 22950] reward=-118946226.6 actor_loss=0.3417 critic_loss=155128117036.1379 entropy=17.5027 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 22960] reward=-118307901.1 actor_loss=0.2870 critic_loss=155995420171.3778 entropy=17.5010 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 22960] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-238718.3 mean_steps=17.3
|
|
[Episode 22970] reward=-118192432.8 actor_loss=0.2867 critic_loss=151326838784.0000 entropy=17.5055 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 22980] reward=-114920339.0 actor_loss=0.2555 critic_loss=162960742649.7561 entropy=17.4975 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 22980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444952.1 mean_steps=15.7
|
|
[Episode 22990] reward=-118277390.7 actor_loss=0.2503 critic_loss=152586031104.0000 entropy=17.4967 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 23000] reward=-119567990.9 actor_loss=0.1943 critic_loss=152016061629.6296 entropy=17.4940 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 23000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586916.7 mean_steps=12.4
|
|
[Episode 23010] reward=-118881068.6 actor_loss=0.3507 critic_loss=153629621101.7143 entropy=17.4969 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 23020] reward=-122047179.1 actor_loss=0.3368 critic_loss=160481974539.1304 entropy=17.4963 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 23020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574970.6 mean_steps=12.8
|
|
[Episode 23030] reward=-122370286.4 actor_loss=0.2326 critic_loss=161643468273.3714 entropy=17.5057 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 23040] reward=-113900772.4 actor_loss=0.2943 critic_loss=149699444371.9111 entropy=17.5027 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 23040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517106.4 mean_steps=14.5
|
|
[Episode 23050] reward=-122721059.0 actor_loss=0.1824 critic_loss=163534947793.4546 entropy=17.5177 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 23060] reward=-119941204.3 actor_loss=0.1431 critic_loss=162667312128.0000 entropy=17.5087 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 23060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506297.2 mean_steps=13.6
|
|
[Episode 23070] reward=-121416915.8 actor_loss=0.2079 critic_loss=160727836113.4546 entropy=17.5105 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 23080] reward=-122211678.9 actor_loss=0.2119 critic_loss=163000592856.6154 entropy=17.5045 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 23080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562884.0 mean_steps=12.8
|
|
[Episode 23090] reward=-113871970.9 actor_loss=0.3759 critic_loss=148738544981.3333 entropy=17.4869 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 23100] reward=-116166731.7 actor_loss=0.2800 critic_loss=154368262144.0000 entropy=17.4925 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 23100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-481352.1 mean_steps=13.1
|
|
[Episode 23110] reward=-112240646.2 actor_loss=0.2643 critic_loss=144717497463.0698 entropy=17.4897 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 23120] reward=-112735230.5 actor_loss=0.3374 critic_loss=147643717677.5111 entropy=17.5024 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 23120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410350.5 mean_steps=16.9
|
|
[Episode 23130] reward=-118019572.4 actor_loss=0.2772 critic_loss=156642669727.2889 entropy=17.5092 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 23140] reward=-116550402.8 actor_loss=0.2550 critic_loss=153857934622.7200 entropy=17.5137 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 23140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526208.0 mean_steps=13.5
|
|
[Episode 23150] reward=-118343297.5 actor_loss=0.3158 critic_loss=157469890048.0000 entropy=17.5176 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 23160] reward=-119647009.7 actor_loss=0.3130 critic_loss=161386113706.6667 entropy=17.5017 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 23160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521512.6 mean_steps=13.6
|
|
[Episode 23170] reward=-109432292.8 actor_loss=0.3878 critic_loss=142512161314.1333 entropy=17.4840 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 23180] reward=-117200135.5 actor_loss=0.2649 critic_loss=154329704220.4445 entropy=17.4754 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 23180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-358128.9 mean_steps=15.8
|
|
[Episode 23190] reward=-119129275.0 actor_loss=0.2845 critic_loss=156130222080.0000 entropy=17.4625 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 23200] reward=-111787911.0 actor_loss=0.3026 critic_loss=145016803643.0769 entropy=17.4475 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 23200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487037.7 mean_steps=13.8
|
|
[Episode 23210] reward=-121195379.1 actor_loss=0.2741 critic_loss=157876272311.7949 entropy=17.4476 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 23220] reward=-114743440.4 actor_loss=0.3529 critic_loss=155869330242.3704 entropy=17.4389 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 23220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574646.8 mean_steps=12.7
|
|
[Episode 23230] reward=-121360300.8 actor_loss=0.2794 critic_loss=157284357334.3256 entropy=17.4310 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 23240] reward=-113698196.4 actor_loss=0.3040 critic_loss=146636449450.6667 entropy=17.4190 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 23240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492571.4 mean_steps=14.9
|
|
[Episode 23250] reward=-117934755.1 actor_loss=0.2724 critic_loss=152665464285.8667 entropy=17.4176 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 23260] reward=-119521151.8 actor_loss=0.3306 critic_loss=158746074453.3333 entropy=17.4384 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 23260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475597.2 mean_steps=14.2
|
|
[Episode 23270] reward=-116530239.6 actor_loss=0.3385 critic_loss=155634210861.5111 entropy=17.4318 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 23280] reward=-117844150.7 actor_loss=0.1801 critic_loss=156750982758.4000 entropy=17.4478 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 23280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599253.2 mean_steps=12.8
|
|
[Episode 23290] reward=-118956777.5 actor_loss=0.3721 critic_loss=157111858517.3333 entropy=17.4454 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 23300] reward=-119003829.6 actor_loss=0.2152 critic_loss=156473738308.2667 entropy=17.4488 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 23300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-608367.1 mean_steps=12.2
|
|
[Episode 23310] reward=-114743875.5 actor_loss=0.3814 critic_loss=153742881923.2820 entropy=17.4659 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 23320] reward=-118170010.1 actor_loss=0.3297 critic_loss=158923557213.6585 entropy=17.4790 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 23320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-370290.0 mean_steps=15.6
|
|
[Episode 23330] reward=-115890562.8 actor_loss=0.3597 critic_loss=149970760704.0000 entropy=17.4816 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 23340] reward=-121151625.3 actor_loss=0.3765 critic_loss=163895569612.8000 entropy=17.5022 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 23340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478215.6 mean_steps=14.8
|
|
[Episode 23350] reward=-119604688.3 actor_loss=0.3590 critic_loss=161356853096.2963 entropy=17.5023 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 23360] reward=-118047184.1 actor_loss=0.3164 critic_loss=156859430502.4000 entropy=17.4947 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 23360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483576.2 mean_steps=14.9
|
|
[Episode 23370] reward=-118781993.3 actor_loss=0.2658 critic_loss=153972569819.4286 entropy=17.4860 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 23380] reward=-120021958.0 actor_loss=0.2304 critic_loss=159843671799.7419 entropy=17.4889 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 23380] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-662529.9 mean_steps=12.0
|
|
[Episode 23390] reward=-120291906.6 actor_loss=0.2784 critic_loss=154832763835.7333 entropy=17.4939 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 23400] reward=-125970284.2 actor_loss=0.2093 critic_loss=170060320061.7931 entropy=17.5189 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 23400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468539.4 mean_steps=14.7
|
|
[Episode 23410] reward=-114922441.7 actor_loss=0.2836 critic_loss=151349539635.2000 entropy=17.5417 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 23420] reward=-117662028.2 actor_loss=0.3005 critic_loss=159439148646.4000 entropy=17.5423 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 23420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425968.5 mean_steps=14.4
|
|
[Episode 23430] reward=-122272803.9 actor_loss=0.2602 critic_loss=168146213741.7143 entropy=17.5358 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 23440] reward=-118813117.8 actor_loss=0.2742 critic_loss=162695508650.6667 entropy=17.5138 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 23440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485825.7 mean_steps=14.0
|
|
[Episode 23450] reward=-119686512.9 actor_loss=0.1893 critic_loss=155757384681.2444 entropy=17.5083 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 23460] reward=-117390233.4 actor_loss=0.2754 critic_loss=153978776598.7556 entropy=17.5237 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 23460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509733.3 mean_steps=12.9
|
|
[Episode 23470] reward=-119824167.8 actor_loss=0.3385 critic_loss=160081365835.2941 entropy=17.5218 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 23480] reward=-117979620.7 actor_loss=0.3235 critic_loss=156977780417.4222 entropy=17.5347 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 23480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547742.4 mean_steps=14.4
|
|
[Episode 23490] reward=-124188334.6 actor_loss=0.2664 critic_loss=160555942980.2667 entropy=17.5364 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 23500] reward=-119703559.3 actor_loss=0.2778 critic_loss=154404167680.0000 entropy=17.5352 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 23500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469588.5 mean_steps=15.1
|
|
[Episode 23510] reward=-121635840.8 actor_loss=0.3524 critic_loss=168237611235.5555 entropy=17.5290 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 23520] reward=-119970647.5 actor_loss=0.2988 critic_loss=159797110010.3111 entropy=17.5148 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 23520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543769.6 mean_steps=13.3
|
|
[Episode 23530] reward=-116726924.9 actor_loss=0.2812 critic_loss=157257646080.0000 entropy=17.5083 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 23540] reward=-117134580.8 actor_loss=0.2826 critic_loss=159325741627.5349 entropy=17.5023 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 23540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-605143.0 mean_steps=12.9
|
|
[Episode 23550] reward=-119225387.9 actor_loss=0.2536 critic_loss=160824524071.8222 entropy=17.5042 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 23560] reward=-112409411.4 actor_loss=0.4018 critic_loss=148690843511.4667 entropy=17.4882 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 23560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473298.1 mean_steps=13.8
|
|
[Episode 23570] reward=-117749495.6 actor_loss=0.3347 critic_loss=156248238266.1818 entropy=17.4996 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 23580] reward=-116109511.9 actor_loss=0.3683 critic_loss=157046319349.7600 entropy=17.4935 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 23580] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-341100.7 mean_steps=17.5
|
|
[Episode 23590] reward=-121273412.4 actor_loss=0.3111 critic_loss=177468999403.2433 entropy=17.4726 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 23600] reward=-119095411.9 actor_loss=0.2855 critic_loss=173557683293.0909 entropy=17.4584 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 23600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433930.5 mean_steps=14.9
|
|
[Episode 23610] reward=-120324087.1 actor_loss=0.2800 critic_loss=160746870708.1482 entropy=17.4589 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 23620] reward=-120186994.5 actor_loss=0.3605 critic_loss=196047278080.0000 entropy=17.4662 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 23620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547988.4 mean_steps=14.7
|
|
[Episode 23630] reward=-125270169.4 actor_loss=0.3640 critic_loss=179936358563.8400 entropy=17.4548 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 23640] reward=-115283810.5 actor_loss=0.4450 critic_loss=163475200682.6667 entropy=17.4660 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 23640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-527698.1 mean_steps=15.8
|
|
[Episode 23650] reward=-118109635.6 actor_loss=0.3351 critic_loss=151870746038.8571 entropy=17.4577 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 23660] reward=-116720142.8 actor_loss=0.3029 critic_loss=156685866037.8947 entropy=17.4413 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 23660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551053.7 mean_steps=13.3
|
|
[Episode 23670] reward=-118425907.4 actor_loss=0.3505 critic_loss=153302237964.1905 entropy=17.4534 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 23680] reward=-121246044.3 actor_loss=0.3842 critic_loss=164800604842.6667 entropy=17.4555 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 23680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462887.5 mean_steps=13.7
|
|
[Episode 23690] reward=-118274606.8 actor_loss=0.2327 critic_loss=155955501862.7879 entropy=17.4619 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 23700] reward=-121709953.4 actor_loss=0.2431 critic_loss=169914931758.5454 entropy=17.4716 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 23700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-263425.6 mean_steps=17.7
|
|
[Episode 23710] reward=-120348310.2 actor_loss=0.2492 critic_loss=160713734467.3684 entropy=17.4665 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 23720] reward=-117784150.2 actor_loss=0.3145 critic_loss=154481652895.2889 entropy=17.4717 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 23720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550744.9 mean_steps=13.2
|
|
[Episode 23730] reward=-122004198.7 actor_loss=0.2484 critic_loss=162502786291.8095 entropy=17.4853 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 23740] reward=-121250385.0 actor_loss=0.2305 critic_loss=159326301835.6364 entropy=17.4974 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 23740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574704.3 mean_steps=13.6
|
|
[Episode 23750] reward=-114070639.0 actor_loss=0.3810 critic_loss=151310429835.6364 entropy=17.5034 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 23760] reward=-120206031.4 actor_loss=0.3266 critic_loss=161675639011.5555 entropy=17.4985 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 23760] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-309940.0 mean_steps=18.2
|
|
[Episode 23770] reward=-114686529.0 actor_loss=0.3144 critic_loss=149127722077.0909 entropy=17.4985 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 23780] reward=-117585363.5 actor_loss=0.4119 critic_loss=160531564134.4000 entropy=17.5156 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 23780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397185.1 mean_steps=15.4
|
|
[Episode 23790] reward=-124796452.3 actor_loss=0.2557 critic_loss=169359392256.0000 entropy=17.5003 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 23800] reward=-119036596.9 actor_loss=0.2889 critic_loss=159872404257.3913 entropy=17.4917 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 23800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518329.0 mean_steps=15.1
|
|
[Episode 23810] reward=-124830610.9 actor_loss=0.2564 critic_loss=164443842150.4000 entropy=17.4975 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 23820] reward=-117048865.9 actor_loss=0.2782 critic_loss=150113787576.3200 entropy=17.5003 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 23820] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-666042.6 mean_steps=11.5
|
|
[Episode 23830] reward=-124478724.6 actor_loss=0.1859 critic_loss=168221166933.3333 entropy=17.4994 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 23840] reward=-122290756.4 actor_loss=0.2638 critic_loss=159208423424.0000 entropy=17.4994 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 23840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469722.5 mean_steps=14.1
|
|
[Episode 23850] reward=-116606401.1 actor_loss=0.2553 critic_loss=158983048819.6129 entropy=17.5062 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 23860] reward=-119819385.7 actor_loss=0.3287 critic_loss=160338355497.2903 entropy=17.5022 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 23860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-617216.3 mean_steps=13.9
|
|
[Episode 23870] reward=-114802867.4 actor_loss=0.3593 critic_loss=156000232745.2903 entropy=17.5151 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 23880] reward=-122205086.0 actor_loss=0.3436 critic_loss=164186360490.6667 entropy=17.5229 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 23880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496657.4 mean_steps=14.1
|
|
[Episode 23890] reward=-121086439.2 actor_loss=0.2714 critic_loss=177039182506.6667 entropy=17.5262 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 23900] reward=-122810038.3 actor_loss=0.2422 critic_loss=166581559777.8824 entropy=17.5061 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 23900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429071.6 mean_steps=15.6
|
|
[Episode 23910] reward=-111950776.9 actor_loss=0.2862 critic_loss=150285330139.4286 entropy=17.5133 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 23920] reward=-109514442.4 actor_loss=0.4325 critic_loss=141544382281.9556 entropy=17.5154 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 23920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548698.2 mean_steps=14.2
|
|
[Episode 23930] reward=-123511023.6 actor_loss=0.2223 critic_loss=162725675372.0889 entropy=17.5216 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 23940] reward=-119868650.7 actor_loss=0.2136 critic_loss=166242979748.9778 entropy=17.5079 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 23940] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-384205.5 mean_steps=17.2
|
|
[Episode 23950] reward=-118506335.0 actor_loss=0.2956 critic_loss=153965409426.2857 entropy=17.5247 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 23960] reward=-117896978.0 actor_loss=0.2248 critic_loss=155446319261.5385 entropy=17.5018 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 23960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545363.4 mean_steps=12.7
|
|
[Episode 23970] reward=-111113199.3 actor_loss=0.3711 critic_loss=147444019456.0000 entropy=17.5164 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 23980] reward=-121767579.7 actor_loss=0.2752 critic_loss=171557929672.3478 entropy=17.5275 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 23980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-610101.6 mean_steps=12.1
|
|
[Episode 23990] reward=-118273825.6 actor_loss=0.2245 critic_loss=158324340443.4286 entropy=17.5309 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 24000] reward=-120137643.1 actor_loss=0.3555 critic_loss=192190061992.5854 entropy=17.5367 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 24000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-577984.2 mean_steps=14.7
|
|
[Episode 24010] reward=-120462692.9 actor_loss=0.3252 critic_loss=182135820503.5789 entropy=17.5362 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 24020] reward=-116641966.8 actor_loss=0.3364 critic_loss=155790749286.4000 entropy=17.5352 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 24020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468150.3 mean_steps=14.9
|
|
[Episode 24030] reward=-119441810.6 actor_loss=0.3194 critic_loss=158239872705.4222 entropy=17.5348 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 24040] reward=-117224877.6 actor_loss=0.2635 critic_loss=153364089514.6667 entropy=17.5276 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 24040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626203.2 mean_steps=13.7
|
|
[Episode 24050] reward=-116396294.5 actor_loss=0.3775 critic_loss=155219188314.3529 entropy=17.5345 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 24060] reward=-119594895.2 actor_loss=0.3080 critic_loss=159996334592.0000 entropy=17.5399 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 24060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553078.1 mean_steps=13.3
|
|
[Episode 24070] reward=-119941977.8 actor_loss=0.3265 critic_loss=155116647947.3778 entropy=17.5261 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 24080] reward=-119362990.3 actor_loss=0.2573 critic_loss=158512433652.6222 entropy=17.5213 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 24080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415608.5 mean_steps=16.8
|
|
[Episode 24090] reward=-117869051.3 actor_loss=0.4161 critic_loss=151154666882.8445 entropy=17.5290 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 24100] reward=-119554107.6 actor_loss=0.2546 critic_loss=156669974937.6000 entropy=17.5490 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517555.4 mean_steps=15.7
|
|
[Episode 24110] reward=-113606594.1 actor_loss=0.3292 critic_loss=152036226463.1351 entropy=17.5516 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 24120] reward=-123084069.0 actor_loss=0.1987 critic_loss=161234675513.8065 entropy=17.5617 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 24120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508551.9 mean_steps=14.1
|
|
[Episode 24130] reward=-114960600.8 actor_loss=0.3109 critic_loss=145195460096.0000 entropy=17.5588 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 24140] reward=-119584899.1 actor_loss=0.2809 critic_loss=155969352386.2069 entropy=17.5736 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 24140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-386075.1 mean_steps=17.2
|
|
[Episode 24150] reward=-115510457.8 actor_loss=0.3486 critic_loss=149866212693.3333 entropy=17.5645 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 24160] reward=-116701957.8 actor_loss=0.3447 critic_loss=157252796416.0000 entropy=17.5444 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-476127.4 mean_steps=15.9
|
|
[Episode 24170] reward=-124918073.9 actor_loss=0.3605 critic_loss=170204864229.5172 entropy=17.5624 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 24180] reward=-117162746.3 actor_loss=0.3605 critic_loss=156828802486.8571 entropy=17.5612 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 24180] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-262491.8 mean_steps=17.1
|
|
[Episode 24190] reward=-120394474.4 actor_loss=0.3030 critic_loss=161832481414.7368 entropy=17.5804 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 24200] reward=-116631761.8 actor_loss=0.3202 critic_loss=154265727067.0222 entropy=17.5698 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 24200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515644.8 mean_steps=14.2
|
|
[Episode 24210] reward=-119236035.6 actor_loss=0.3634 critic_loss=160114308437.3333 entropy=17.5709 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 24220] reward=-123098966.9 actor_loss=0.2375 critic_loss=165176025998.2222 entropy=17.5775 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 24220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-529727.8 mean_steps=15.2
|
|
[Episode 24230] reward=-112906167.3 actor_loss=0.4273 critic_loss=149533926636.3077 entropy=17.5683 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 24240] reward=-120367011.3 actor_loss=0.3327 critic_loss=158710022144.0000 entropy=17.5740 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 24240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502837.2 mean_steps=13.5
|
|
[Episode 24250] reward=-118954787.2 actor_loss=0.2272 critic_loss=153938395136.0000 entropy=17.5771 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 24260] reward=-118672949.1 actor_loss=0.3883 critic_loss=157756889497.6000 entropy=17.5529 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 24260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497250.8 mean_steps=14.5
|
|
[Episode 24270] reward=-120430594.5 actor_loss=0.2871 critic_loss=155319005424.9412 entropy=17.5593 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 24280] reward=-117765251.7 actor_loss=0.2913 critic_loss=154111971601.0667 entropy=17.5746 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 24280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542973.4 mean_steps=13.6
|
|
[Episode 24290] reward=-121308379.1 actor_loss=0.2758 critic_loss=166731645690.0465 entropy=17.5642 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 24300] reward=-120563128.2 actor_loss=0.1684 critic_loss=164063908598.5185 entropy=17.5610 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 24300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546915.1 mean_steps=14.3
|
|
[Episode 24310] reward=-121338898.2 actor_loss=0.3882 critic_loss=164523535892.4800 entropy=17.5693 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 24320] reward=-121022803.5 actor_loss=0.3204 critic_loss=161376775606.8571 entropy=17.5889 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453903.1 mean_steps=15.5
|
|
[Episode 24330] reward=-114406007.9 actor_loss=0.3910 critic_loss=155556435285.3333 entropy=17.5843 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 24340] reward=-112686488.3 actor_loss=0.4218 critic_loss=148327018222.9333 entropy=17.5886 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 24340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485176.6 mean_steps=14.1
|
|
[Episode 24350] reward=-118936513.5 actor_loss=0.1686 critic_loss=154800929359.6444 entropy=17.5721 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 24360] reward=-116721289.5 actor_loss=0.3047 critic_loss=150513579101.0909 entropy=17.5680 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 24360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447210.9 mean_steps=15.1
|
|
[Episode 24370] reward=-120120231.5 actor_loss=0.3113 critic_loss=161020377575.6190 entropy=17.5816 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 24380] reward=-114406276.6 actor_loss=0.3450 critic_loss=150119550884.9778 entropy=17.5922 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 24380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529964.1 mean_steps=13.6
|
|
[Episode 24390] reward=-117094445.5 actor_loss=0.3747 critic_loss=162691779546.0741 entropy=17.5803 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 24400] reward=-125740075.9 actor_loss=0.1194 critic_loss=165740536320.0000 entropy=17.5876 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 24400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413848.6 mean_steps=16.6
|
|
[Episode 24410] reward=-120990684.2 actor_loss=0.2123 critic_loss=157191163904.0000 entropy=17.5726 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 24420] reward=-118930204.3 actor_loss=0.3570 critic_loss=158823704478.4762 entropy=17.5774 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 24420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-507327.3 mean_steps=15.8
|
|
[Episode 24430] reward=-118863794.0 actor_loss=0.1600 critic_loss=156507126637.7143 entropy=17.5746 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 24440] reward=-122689549.4 actor_loss=0.2597 critic_loss=160012794760.9302 entropy=17.5834 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427398.8 mean_steps=15.3
|
|
[Episode 24450] reward=-119644480.2 actor_loss=0.2194 critic_loss=159118364945.0667 entropy=17.5858 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 24460] reward=-116126681.0 actor_loss=0.2377 critic_loss=152607076807.1111 entropy=17.5751 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 24460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457478.5 mean_steps=14.1
|
|
[Episode 24470] reward=-115822981.8 actor_loss=0.3096 critic_loss=157378605147.0222 entropy=17.5925 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 24480] reward=-114734596.0 actor_loss=0.2121 critic_loss=156910954682.1818 entropy=17.5919 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 24480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-428010.5 mean_steps=16.2
|
|
[Episode 24490] reward=-118176281.2 actor_loss=0.2747 critic_loss=154765632821.5814 entropy=17.6099 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 24500] reward=-113551190.2 actor_loss=0.3304 critic_loss=148380777130.6667 entropy=17.6022 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 24500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425271.0 mean_steps=15.8
|
|
[Episode 24510] reward=-118562369.8 actor_loss=0.2670 critic_loss=152285321216.0000 entropy=17.5950 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 24520] reward=-122554747.3 actor_loss=0.2924 critic_loss=159991633474.7826 entropy=17.6067 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 24520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500144.3 mean_steps=15.2
|
|
[Episode 24530] reward=-120775832.1 actor_loss=0.2932 critic_loss=162593137095.1111 entropy=17.6198 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 24540] reward=-121692668.1 actor_loss=0.2471 critic_loss=164820268694.5882 entropy=17.6244 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 24540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-658401.4 mean_steps=12.4
|
|
[Episode 24550] reward=-122162366.5 actor_loss=0.1893 critic_loss=159012083757.5111 entropy=17.6259 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 24560] reward=-120050808.7 actor_loss=0.2775 critic_loss=159911996451.3103 entropy=17.6330 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 24560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413258.6 mean_steps=15.2
|
|
[Episode 24570] reward=-118847009.0 actor_loss=0.3331 critic_loss=157699884100.2667 entropy=17.6156 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 24580] reward=-121665866.9 actor_loss=0.2672 critic_loss=159729653077.3333 entropy=17.6080 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-343389.1 mean_steps=16.6
|
|
[Episode 24590] reward=-119934326.7 actor_loss=0.2270 critic_loss=160902981586.4889 entropy=17.5997 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 24600] reward=-120750341.0 actor_loss=0.2577 critic_loss=156823674631.7576 entropy=17.5939 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 24600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-438648.6 mean_steps=16.6
|
|
[Episode 24610] reward=-117321629.3 actor_loss=0.4398 critic_loss=159268524889.3023 entropy=17.5940 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 24620] reward=-120849932.0 actor_loss=0.2648 critic_loss=157950523255.4667 entropy=17.5911 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 24620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532180.6 mean_steps=13.3
|
|
[Episode 24630] reward=-115910795.4 actor_loss=0.2890 critic_loss=149512479703.0400 entropy=17.5987 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 24640] reward=-119861314.4 actor_loss=0.3164 critic_loss=163035508736.0000 entropy=17.6038 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 24640] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-675400.9 mean_steps=11.4
|
|
[Episode 24650] reward=-120895859.8 actor_loss=0.2988 critic_loss=155631202152.2963 entropy=17.5875 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 24660] reward=-116674140.8 actor_loss=0.3148 critic_loss=149287023616.0000 entropy=17.5871 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 24660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456579.3 mean_steps=14.9
|
|
[Episode 24670] reward=-119793366.1 actor_loss=0.2616 critic_loss=164150504834.8445 entropy=17.5883 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 24680] reward=-117579918.0 actor_loss=0.3386 critic_loss=151898190530.2069 entropy=17.5913 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 24680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474354.8 mean_steps=14.8
|
|
[Episode 24690] reward=-118644863.5 actor_loss=0.2968 critic_loss=160010993664.0000 entropy=17.5774 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 24700] reward=-118303070.5 actor_loss=0.2858 critic_loss=155145999337.2444 entropy=17.5683 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 24700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-447520.4 mean_steps=16.9
|
|
[Episode 24710] reward=-122211274.3 actor_loss=0.2388 critic_loss=162798682112.0000 entropy=17.5672 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 24720] reward=-121405677.7 actor_loss=0.3552 critic_loss=193151899693.5111 entropy=17.5630 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 24720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375097.5 mean_steps=15.8
|
|
[Episode 24730] reward=-121028603.3 actor_loss=0.2507 critic_loss=158835140853.7600 entropy=17.5708 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 24740] reward=-116221104.6 actor_loss=0.4661 critic_loss=152224251221.3333 entropy=17.5586 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 24740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360777.6 mean_steps=16.2
|
|
[Episode 24750] reward=-119632513.0 actor_loss=0.3225 critic_loss=156529265019.2592 entropy=17.5635 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 24760] reward=-120298540.4 actor_loss=0.1835 critic_loss=158493311606.1538 entropy=17.5608 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 24760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474810.8 mean_steps=14.8
|
|
[Episode 24770] reward=-119699650.0 actor_loss=0.3536 critic_loss=160808444152.2424 entropy=17.5483 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 24780] reward=-119695607.7 actor_loss=0.2474 critic_loss=156816503490.2069 entropy=17.5588 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 24780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565986.6 mean_steps=13.8
|
|
[Episode 24790] reward=-115293094.4 actor_loss=0.3391 critic_loss=151699483096.6154 entropy=17.5591 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 24800] reward=-121120545.7 actor_loss=0.3175 critic_loss=157175164436.4800 entropy=17.5655 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 24800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-600837.3 mean_steps=12.1
|
|
[Episode 24810] reward=-119346990.5 actor_loss=0.2476 critic_loss=150224235490.7429 entropy=17.5961 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 24820] reward=-122638098.0 actor_loss=0.2622 critic_loss=160331209339.5862 entropy=17.6022 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410242.8 mean_steps=17.0
|
|
[Episode 24830] reward=-125272242.3 actor_loss=0.1965 critic_loss=165778336426.6667 entropy=17.5864 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 24840] reward=-119714814.8 actor_loss=0.3016 critic_loss=157659229317.5652 entropy=17.5920 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 24840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397831.2 mean_steps=15.4
|
|
[Episode 24850] reward=-120974402.8 actor_loss=0.3288 critic_loss=160938817859.3684 entropy=17.5835 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 24860] reward=-121662866.3 actor_loss=0.2489 critic_loss=164211016499.2000 entropy=17.5790 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 24860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361432.6 mean_steps=17.4
|
|
[Episode 24870] reward=-126748560.8 actor_loss=0.2195 critic_loss=163351021080.3810 entropy=17.5788 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 24880] reward=-121789297.1 actor_loss=0.2566 critic_loss=161020479624.5333 entropy=17.5866 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 24880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416619.0 mean_steps=15.8
|
|
[Episode 24890] reward=-120185929.8 actor_loss=0.2596 critic_loss=154784623274.6667 entropy=17.5851 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 24900] reward=-124052168.4 actor_loss=0.2725 critic_loss=163622728797.0909 entropy=17.5864 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 24900] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-758019.0 mean_steps=10.0
|
|
[Episode 24910] reward=-124675629.7 actor_loss=0.2548 critic_loss=165384522020.5714 entropy=17.5742 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 24920] reward=-119514412.5 actor_loss=0.2487 critic_loss=158962358998.7097 entropy=17.5805 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 24920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-424590.3 mean_steps=16.4
|
|
[Episode 24930] reward=-120281717.2 actor_loss=0.2933 critic_loss=157391825942.7556 entropy=17.5677 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 24940] reward=-119476636.9 actor_loss=0.2867 critic_loss=152918645800.9600 entropy=17.5657 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 24940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568099.9 mean_steps=13.7
|
|
[Episode 24950] reward=-121230230.9 actor_loss=0.3073 critic_loss=164646367547.0769 entropy=17.5636 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 24960] reward=-114166272.9 actor_loss=0.3679 critic_loss=150411751318.9744 entropy=17.5399 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 24960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481895.0 mean_steps=15.1
|
|
[Episode 24970] reward=-115983002.2 actor_loss=0.2916 critic_loss=152378856945.3714 entropy=17.5214 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 24980] reward=-115855586.1 actor_loss=0.3563 critic_loss=154379872665.6000 entropy=17.5187 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 24980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485920.6 mean_steps=14.2
|
|
[Episode 24990] reward=-120077248.4 actor_loss=0.2976 critic_loss=166262409323.7895 entropy=17.5232 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 25000] reward=-121636717.8 actor_loss=0.3804 critic_loss=158922832164.5714 entropy=17.5208 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 25000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493475.1 mean_steps=15.2
|
|
[Episode 25010] reward=-119133028.4 actor_loss=0.3520 critic_loss=154725080905.9556 entropy=17.5144 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 25020] reward=-119902346.6 actor_loss=0.2928 critic_loss=173372108526.9333 entropy=17.5056 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 25020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-306870.5 mean_steps=16.8
|
|
[Episode 25030] reward=-122757039.4 actor_loss=0.2681 critic_loss=158148748585.2903 entropy=17.5045 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25040] reward=-113866918.9 actor_loss=0.2884 critic_loss=146737667739.8261 entropy=17.4964 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506459.6 mean_steps=14.4
|
|
[Episode 25050] reward=-118014720.3 actor_loss=0.2032 critic_loss=153046336000.0000 entropy=17.4969 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 25060] reward=-121107531.5 actor_loss=0.3067 critic_loss=158933046325.8947 entropy=17.4891 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 25060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443314.3 mean_steps=15.4
|
|
[Episode 25070] reward=-117440666.7 actor_loss=0.3337 critic_loss=153748309392.6956 entropy=17.4781 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25080] reward=-118432994.0 actor_loss=0.3335 critic_loss=158827544576.0000 entropy=17.4866 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 25080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428602.5 mean_steps=15.0
|
|
[Episode 25090] reward=-121637447.3 actor_loss=0.2597 critic_loss=156238760025.0435 entropy=17.4934 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 25100] reward=-124299770.7 actor_loss=0.2393 critic_loss=166928595899.7333 entropy=17.4856 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 25100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417188.5 mean_steps=15.3
|
|
[Episode 25110] reward=-113751309.5 actor_loss=0.4024 critic_loss=151415659633.7778 entropy=17.4742 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 25120] reward=-121412322.1 actor_loss=0.3111 critic_loss=166423872658.2857 entropy=17.4808 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 25120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532429.0 mean_steps=14.4
|
|
[Episode 25130] reward=-112757763.3 actor_loss=0.2742 critic_loss=146744883791.6444 entropy=17.4799 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 25140] reward=-118425107.6 actor_loss=0.2727 critic_loss=154988800682.6667 entropy=17.4825 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526163.1 mean_steps=14.3
|
|
[Episode 25150] reward=-117430349.1 actor_loss=0.3355 critic_loss=150076939013.6889 entropy=17.4993 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 25160] reward=-118696903.1 actor_loss=0.2648 critic_loss=155550061621.8947 entropy=17.4850 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-497567.0 mean_steps=13.3
|
|
[Episode 25170] reward=-116990326.6 actor_loss=0.3737 critic_loss=156530440005.8182 entropy=17.4992 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 25180] reward=-116985192.9 actor_loss=0.2417 critic_loss=148958733653.3333 entropy=17.5080 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-487760.9 mean_steps=15.6
|
|
[Episode 25190] reward=-121050307.7 actor_loss=0.3219 critic_loss=154695931828.1482 entropy=17.5221 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 25200] reward=-121164734.5 actor_loss=0.3829 critic_loss=162883133767.6800 entropy=17.5208 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 25200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539657.0 mean_steps=14.4
|
|
[Episode 25210] reward=-119745973.1 actor_loss=0.1826 critic_loss=151743735053.4737 entropy=17.5287 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 25220] reward=-124633490.1 actor_loss=0.2134 critic_loss=168321406645.6774 entropy=17.5250 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 25220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-393851.9 mean_steps=15.2
|
|
[Episode 25230] reward=-119888933.7 actor_loss=0.3372 critic_loss=156246723530.1053 entropy=17.5208 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25240] reward=-120873934.5 actor_loss=0.3303 critic_loss=159612514838.2609 entropy=17.5140 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 25240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502729.2 mean_steps=15.2
|
|
[Episode 25250] reward=-123614207.8 actor_loss=0.3361 critic_loss=161240286276.2667 entropy=17.4994 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 25260] reward=-114711602.8 actor_loss=0.2874 critic_loss=146399674864.4849 entropy=17.5013 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 25260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409234.4 mean_steps=15.2
|
|
[Episode 25270] reward=-118380855.9 actor_loss=0.3273 critic_loss=174622989516.8000 entropy=17.5057 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 25280] reward=-122650624.8 actor_loss=0.2254 critic_loss=166651749096.7273 entropy=17.4983 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 25280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-342475.6 mean_steps=16.9
|
|
[Episode 25290] reward=-120574972.0 actor_loss=0.3692 critic_loss=167393442977.6842 entropy=17.5132 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 25300] reward=-118073882.5 actor_loss=0.3724 critic_loss=156936574530.7826 entropy=17.5131 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 25300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476283.5 mean_steps=15.2
|
|
[Episode 25310] reward=-119207890.5 actor_loss=0.4118 critic_loss=166764778460.6897 entropy=17.5090 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 25320] reward=-115752432.1 actor_loss=0.4122 critic_loss=151912255324.1600 entropy=17.5134 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 25320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-608505.5 mean_steps=13.7
|
|
[Episode 25330] reward=-118900201.9 actor_loss=0.2531 critic_loss=157028941824.0000 entropy=17.5121 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 25340] reward=-115527934.9 actor_loss=0.3915 critic_loss=152890800038.9565 entropy=17.5108 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 25340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427788.0 mean_steps=15.4
|
|
[Episode 25350] reward=-120359898.5 actor_loss=0.1739 critic_loss=160316607083.1628 entropy=17.5147 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 25360] reward=-117365277.8 actor_loss=0.3216 critic_loss=149098823248.8421 entropy=17.5227 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 25360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-606211.7 mean_steps=14.0
|
|
[Episode 25370] reward=-122780672.0 actor_loss=0.1736 critic_loss=155514985006.5454 entropy=17.5274 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 25380] reward=-118180356.6 actor_loss=0.3232 critic_loss=152668837361.3714 entropy=17.5213 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 25380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444468.4 mean_steps=15.5
|
|
[Episode 25390] reward=-117439135.9 actor_loss=0.2510 critic_loss=153184345784.3200 entropy=17.5329 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 25400] reward=-118152687.0 actor_loss=0.3290 critic_loss=156886936234.6667 entropy=17.5439 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 25400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539629.3 mean_steps=13.3
|
|
[Episode 25410] reward=-117220187.0 actor_loss=0.1687 critic_loss=152972610638.7692 entropy=17.5393 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 25420] reward=-125811581.9 actor_loss=0.1986 critic_loss=163342324203.5200 entropy=17.5465 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 25420] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-348066.6 mean_steps=18.0
|
|
[Episode 25430] reward=-121069640.7 actor_loss=0.3202 critic_loss=158869476903.3846 entropy=17.5704 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 25440] reward=-119153578.2 actor_loss=0.3064 critic_loss=158085172302.7692 entropy=17.5863 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-411760.5 mean_steps=16.5
|
|
[Episode 25450] reward=-120347629.4 actor_loss=0.1542 critic_loss=164790777976.4706 entropy=17.5830 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 25460] reward=-120162817.0 actor_loss=0.2654 critic_loss=162527146299.0769 entropy=17.5765 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 25460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561039.5 mean_steps=13.7
|
|
[Episode 25470] reward=-124935603.1 actor_loss=0.2413 critic_loss=162068652942.2222 entropy=17.5986 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 25480] reward=-117670092.0 actor_loss=0.3702 critic_loss=163925568418.9091 entropy=17.5876 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 25480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570624.8 mean_steps=12.7
|
|
[Episode 25490] reward=-122896314.2 actor_loss=0.2852 critic_loss=160386456616.9600 entropy=17.5845 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 25500] reward=-123838604.6 actor_loss=0.1856 critic_loss=162065824593.1707 entropy=17.5865 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 25500] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-370410.7 mean_steps=17.1
|
|
[Episode 25510] reward=-121082883.9 actor_loss=0.4157 critic_loss=158382291431.6190 entropy=17.5875 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 25520] reward=-124196524.4 actor_loss=0.3181 critic_loss=165573411840.0000 entropy=17.5874 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 25520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463835.5 mean_steps=15.7
|
|
[Episode 25530] reward=-123052647.9 actor_loss=0.2549 critic_loss=158514095718.4000 entropy=17.5940 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 25540] reward=-121633146.1 actor_loss=0.2401 critic_loss=157291884784.9412 entropy=17.5978 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 25540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-330265.7 mean_steps=15.8
|
|
[Episode 25550] reward=-120740314.6 actor_loss=0.2206 critic_loss=158762627072.0000 entropy=17.5996 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 25560] reward=-120569014.8 actor_loss=0.2541 critic_loss=157914056380.6316 entropy=17.5996 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 25560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464829.5 mean_steps=15.5
|
|
[Episode 25570] reward=-119893057.8 actor_loss=0.2827 critic_loss=162753348190.8148 entropy=17.5945 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 25580] reward=-126570323.1 actor_loss=0.3076 critic_loss=169413158138.3111 entropy=17.6014 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 25580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504601.9 mean_steps=15.6
|
|
[Episode 25590] reward=-113647721.5 actor_loss=0.3562 critic_loss=146291370302.5778 entropy=17.5860 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 25600] reward=-118169452.0 actor_loss=0.2796 critic_loss=152306989332.7567 entropy=17.5818 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 25600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471997.5 mean_steps=14.1
|
|
[Episode 25610] reward=-117976927.7 actor_loss=0.2322 critic_loss=154762524052.8372 entropy=17.5829 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 25620] reward=-118861613.4 actor_loss=0.1420 critic_loss=154031238299.1515 entropy=17.5935 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 25620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441587.6 mean_steps=14.6
|
|
[Episode 25630] reward=-124058567.2 actor_loss=0.1864 critic_loss=162364224759.1724 entropy=17.5942 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 25640] reward=-119073164.9 actor_loss=0.3710 critic_loss=160510248401.4546 entropy=17.5868 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 25640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457883.0 mean_steps=15.8
|
|
[Episode 25650] reward=-119490137.4 actor_loss=0.2909 critic_loss=157431527833.6000 entropy=17.5892 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25660] reward=-121390360.1 actor_loss=0.2791 critic_loss=156809566406.1935 entropy=17.6032 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 25660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551161.6 mean_steps=12.5
|
|
[Episode 25670] reward=-117590829.3 actor_loss=0.2875 critic_loss=159033011222.7556 entropy=17.6028 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 25680] reward=-121036917.5 actor_loss=0.2510 critic_loss=158270812615.1111 entropy=17.5895 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 25680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-634126.3 mean_steps=12.1
|
|
[Episode 25690] reward=-119429213.2 actor_loss=0.2933 critic_loss=153808238819.5555 entropy=17.5937 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 25700] reward=-115885910.8 actor_loss=0.3685 critic_loss=155239219655.1111 entropy=17.5992 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 25700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480846.5 mean_steps=15.0
|
|
[Episode 25710] reward=-120738729.1 actor_loss=0.2630 critic_loss=166427482004.2105 entropy=17.5937 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 25720] reward=-122364073.7 actor_loss=0.1947 critic_loss=158336857247.2889 entropy=17.5938 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 25720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606365.1 mean_steps=12.8
|
|
[Episode 25730] reward=-121280819.8 actor_loss=0.3015 critic_loss=162989040857.2121 entropy=17.5891 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 25740] reward=-121083797.2 actor_loss=0.3018 critic_loss=154957423957.3333 entropy=17.5953 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 25740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505956.3 mean_steps=14.2
|
|
[Episode 25750] reward=-120846851.4 actor_loss=0.2468 critic_loss=160599326288.8421 entropy=17.5992 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 25760] reward=-119558757.3 actor_loss=0.3086 critic_loss=157186634183.1111 entropy=17.5848 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 25760] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-290318.3 mean_steps=17.4
|
|
[Episode 25770] reward=-115277544.3 actor_loss=0.2800 critic_loss=154881037890.7826 entropy=17.5808 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 25780] reward=-114460442.3 actor_loss=0.3212 critic_loss=149089262913.8286 entropy=17.5731 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 25780] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-249905.5 mean_steps=18.3
|
|
[Episode 25790] reward=-122027185.0 actor_loss=0.3099 critic_loss=161316614235.0222 entropy=17.5592 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25800] reward=-121886151.8 actor_loss=0.3658 critic_loss=158050107938.1333 entropy=17.5606 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 25800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441894.4 mean_steps=14.4
|
|
[Episode 25810] reward=-115424228.4 actor_loss=0.3001 critic_loss=149045902367.0303 entropy=17.5526 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 25820] reward=-122649413.1 actor_loss=0.2895 critic_loss=159042905338.3111 entropy=17.5570 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 25820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533401.0 mean_steps=13.2
|
|
[Episode 25830] reward=-118763654.2 actor_loss=0.3188 critic_loss=155583554796.3077 entropy=17.5762 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25840] reward=-118774969.9 actor_loss=0.1551 critic_loss=150741201351.1111 entropy=17.5689 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 25840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437187.9 mean_steps=14.3
|
|
[Episode 25850] reward=-119232783.8 actor_loss=0.3279 critic_loss=159369534857.8462 entropy=17.5733 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 25860] reward=-120723179.6 actor_loss=0.3304 critic_loss=156430943768.3810 entropy=17.5702 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 25860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-381696.2 mean_steps=17.1
|
|
[Episode 25870] reward=-126538895.7 actor_loss=0.2664 critic_loss=175966986240.0000 entropy=17.5687 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25880] reward=-118294772.3 actor_loss=0.2163 critic_loss=153489428658.0869 entropy=17.5744 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 25880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-605987.3 mean_steps=13.8
|
|
[Episode 25890] reward=-123423824.4 actor_loss=0.2426 critic_loss=166752600064.0000 entropy=17.5825 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 25900] reward=-118236084.4 actor_loss=0.2511 critic_loss=154459655036.7180 entropy=17.5800 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-544697.4 mean_steps=15.2
|
|
[Episode 25910] reward=-112817665.0 actor_loss=0.1899 critic_loss=149608688142.6286 entropy=17.5609 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 25920] reward=-115613455.2 actor_loss=0.3514 critic_loss=149887953797.1200 entropy=17.5537 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 25920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417626.5 mean_steps=15.7
|
|
[Episode 25930] reward=-120585517.0 actor_loss=0.3148 critic_loss=159039679926.8571 entropy=17.5474 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 25940] reward=-117545573.4 actor_loss=0.3497 critic_loss=154970406001.7778 entropy=17.5405 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 25940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390464.0 mean_steps=15.4
|
|
[Episode 25950] reward=-120139567.0 actor_loss=0.2518 critic_loss=156902008422.4000 entropy=17.5504 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 25960] reward=-114159885.2 actor_loss=0.3841 critic_loss=149863883571.2000 entropy=17.5607 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 25960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523549.5 mean_steps=13.2
|
|
[Episode 25970] reward=-122022321.7 actor_loss=0.2686 critic_loss=156035118211.2820 entropy=17.5741 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 25980] reward=-116633355.7 actor_loss=0.2826 critic_loss=154600609889.5238 entropy=17.5933 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 25980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-379097.6 mean_steps=14.9
|
|
[Episode 25990] reward=-125186598.3 actor_loss=0.2620 critic_loss=165990335757.4737 entropy=17.5939 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 26000] reward=-116743554.0 actor_loss=0.2340 critic_loss=154097521012.3636 entropy=17.5801 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 26000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535779.8 mean_steps=14.4
|
|
[Episode 26010] reward=-119317038.0 actor_loss=0.1845 critic_loss=156625927695.5151 entropy=17.5794 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 26020] reward=-115555050.6 actor_loss=0.3292 critic_loss=150299744376.4706 entropy=17.5738 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 26020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436700.6 mean_steps=14.8
|
|
[Episode 26030] reward=-119119512.0 actor_loss=0.3029 critic_loss=161091936737.8824 entropy=17.5730 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 26040] reward=-126405913.0 actor_loss=0.1770 critic_loss=284730509312.0000 entropy=17.5732 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 26040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448808.3 mean_steps=14.9
|
|
[Episode 26050] reward=-124717526.0 actor_loss=0.3086 critic_loss=275193719193.6000 entropy=17.5769 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 26060] reward=-117719189.2 actor_loss=0.2504 critic_loss=150623383236.9231 entropy=17.5780 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 26060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416437.6 mean_steps=14.4
|
|
[Episode 26070] reward=-122558582.5 actor_loss=0.1233 critic_loss=162591318343.6800 entropy=17.5799 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 26080] reward=-115888285.0 actor_loss=0.4093 critic_loss=152753263616.0000 entropy=17.5884 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 26080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453682.6 mean_steps=14.7
|
|
[Episode 26090] reward=-117825554.4 actor_loss=0.2032 critic_loss=154535873740.8000 entropy=17.5868 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 26100] reward=-114442589.3 actor_loss=0.3319 critic_loss=146799127853.1765 entropy=17.5849 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 26100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-578178.3 mean_steps=11.9
|
|
[Episode 26110] reward=-110744730.8 actor_loss=0.2778 critic_loss=143083277312.0000 entropy=17.5958 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 26120] reward=-116003206.5 actor_loss=0.4162 critic_loss=149194921472.0000 entropy=17.6059 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 26120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489825.1 mean_steps=14.8
|
|
[Episode 26130] reward=-123062444.5 actor_loss=0.2999 critic_loss=160497060942.7692 entropy=17.6213 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 26140] reward=-120943239.0 actor_loss=0.1907 critic_loss=161156237498.1818 entropy=17.6241 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 26140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-453453.0 mean_steps=16.8
|
|
[Episode 26150] reward=-121147810.1 actor_loss=0.2895 critic_loss=160546161900.3077 entropy=17.6236 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 26160] reward=-121467459.3 actor_loss=0.2940 critic_loss=159225661319.5294 entropy=17.6382 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412761.6 mean_steps=16.4
|
|
[Episode 26170] reward=-115132838.5 actor_loss=0.3028 critic_loss=185107452416.0000 entropy=17.6209 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 26180] reward=-110921393.1 actor_loss=0.3448 critic_loss=142653533561.2632 entropy=17.6079 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-560706.0 mean_steps=15.2
|
|
[Episode 26190] reward=-120122811.0 actor_loss=0.3809 critic_loss=154797480742.7879 entropy=17.6121 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 26200] reward=-117431268.8 actor_loss=0.3065 critic_loss=152776964006.9565 entropy=17.6130 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 26200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602418.3 mean_steps=12.8
|
|
[Episode 26210] reward=-114136700.6 actor_loss=0.2789 critic_loss=141999900262.4000 entropy=17.6099 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 26220] reward=-124762908.1 actor_loss=0.2229 critic_loss=160390449561.6000 entropy=17.6110 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 26220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-678117.9 mean_steps=12.6
|
|
[Episode 26230] reward=-120027684.8 actor_loss=0.4283 critic_loss=161525338020.9778 entropy=17.6244 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 26240] reward=-116307210.0 actor_loss=0.3049 critic_loss=151696160194.5600 entropy=17.6311 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 26240] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-201028.7 mean_steps=18.9
|
|
[Episode 26250] reward=-124384290.7 actor_loss=0.3207 critic_loss=163445878605.9131 entropy=17.6268 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 26260] reward=-119523334.7 actor_loss=0.2643 critic_loss=159519570147.5555 entropy=17.6270 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 26260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445652.1 mean_steps=16.1
|
|
[Episode 26270] reward=-120692367.3 actor_loss=0.2776 critic_loss=160718686374.0540 entropy=17.6383 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 26280] reward=-119221981.0 actor_loss=0.2370 critic_loss=156983731814.4000 entropy=17.6400 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 26280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-377195.8 mean_steps=17.4
|
|
[Episode 26290] reward=-116944606.3 actor_loss=0.2786 critic_loss=156650616989.5385 entropy=17.6515 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 26300] reward=-114066473.1 actor_loss=0.3036 critic_loss=148250738688.0000 entropy=17.6682 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 26300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590270.5 mean_steps=14.1
|
|
[Episode 26310] reward=-119510309.9 actor_loss=0.2562 critic_loss=152839513338.3111 entropy=17.6839 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 26320] reward=-107965789.2 actor_loss=0.4922 critic_loss=142963247962.8387 entropy=17.6682 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 26320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624884.3 mean_steps=13.6
|
|
[Episode 26330] reward=-123107075.1 actor_loss=0.2608 critic_loss=160948663455.2889 entropy=17.6670 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 26340] reward=-117162481.1 actor_loss=0.2684 critic_loss=152088154290.0869 entropy=17.6900 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 26340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600739.1 mean_steps=12.7
|
|
[Episode 26350] reward=-119819873.6 actor_loss=0.2223 critic_loss=159094371487.2889 entropy=17.6778 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 26360] reward=-114761974.2 actor_loss=0.3150 critic_loss=150360175775.2889 entropy=17.6815 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 26360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548675.1 mean_steps=13.6
|
|
[Episode 26370] reward=-124224947.3 actor_loss=0.3209 critic_loss=166751154080.7442 entropy=17.6749 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 26380] reward=-113185823.0 actor_loss=0.3051 critic_loss=145444724536.1951 entropy=17.6718 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 26380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396335.3 mean_steps=16.4
|
|
[Episode 26390] reward=-111102064.6 actor_loss=0.2364 critic_loss=144026729235.6923 entropy=17.6746 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 26400] reward=-121642898.8 actor_loss=0.3722 critic_loss=160206979072.0000 entropy=17.6674 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 26400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405006.8 mean_steps=16.1
|
|
[Episode 26410] reward=-120185349.3 actor_loss=0.4282 critic_loss=163359855957.3333 entropy=17.6688 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 26420] reward=-116489177.8 actor_loss=0.2954 critic_loss=155210541443.4595 entropy=17.6630 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 26420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475457.0 mean_steps=13.9
|
|
[Episode 26430] reward=-119622952.3 actor_loss=0.3663 critic_loss=155371046288.6956 entropy=17.6547 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 26440] reward=-121998111.5 actor_loss=0.2233 critic_loss=156073855162.1818 entropy=17.6553 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 26440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549101.9 mean_steps=14.2
|
|
[Episode 26450] reward=-116014540.7 actor_loss=0.2665 critic_loss=152918947157.3333 entropy=17.6766 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 26460] reward=-117279343.1 actor_loss=0.3012 critic_loss=154981140616.5333 entropy=17.6784 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446640.8 mean_steps=15.8
|
|
[Episode 26470] reward=-116283145.8 actor_loss=1.4696 critic_loss=160865672305.7778 entropy=17.6718 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 26480] reward=-120360891.8 actor_loss=0.2511 critic_loss=157783696998.4000 entropy=17.6660 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 26480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418833.3 mean_steps=15.5
|
|
[Episode 26490] reward=-123195751.0 actor_loss=0.3184 critic_loss=162478978311.3143 entropy=17.6695 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 26500] reward=-123086362.6 actor_loss=0.2475 critic_loss=163404573961.4815 entropy=17.6712 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 26500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487080.5 mean_steps=15.2
|
|
[Episode 26510] reward=-125405848.0 actor_loss=0.3331 critic_loss=169377305161.1429 entropy=17.6599 approx_kl=0.0116 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 26520] reward=-117724217.8 actor_loss=0.3365 critic_loss=159190431243.3778 entropy=17.6613 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 26520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504779.9 mean_steps=15.1
|
|
[Episode 26530] reward=-120554607.1 actor_loss=0.2232 critic_loss=153528827904.0000 entropy=17.6717 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 26540] reward=-118848236.1 actor_loss=0.3524 critic_loss=154119672698.4348 entropy=17.6667 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 26540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524934.2 mean_steps=14.4
|
|
[Episode 26550] reward=-118196664.0 actor_loss=0.3002 critic_loss=157163792226.4615 entropy=17.6610 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 26560] reward=-124016996.2 actor_loss=0.3144 critic_loss=166962566197.8947 entropy=17.6728 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 26560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510109.4 mean_steps=15.3
|
|
[Episode 26570] reward=-121782061.9 actor_loss=0.2819 critic_loss=164372326845.2174 entropy=17.6759 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 26580] reward=-125407676.2 actor_loss=0.2502 critic_loss=168848017115.4286 entropy=17.6616 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 26580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602166.0 mean_steps=13.4
|
|
[Episode 26590] reward=-117127384.1 actor_loss=0.2880 critic_loss=146683880407.0400 entropy=17.6635 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 26600] reward=-115543550.3 actor_loss=0.3864 critic_loss=148279538910.6087 entropy=17.6627 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 26600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515262.3 mean_steps=14.5
|
|
[Episode 26610] reward=-122960391.5 actor_loss=0.2125 critic_loss=161132837091.5555 entropy=17.6591 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 26620] reward=-120292837.3 actor_loss=0.2851 critic_loss=158382631594.6667 entropy=17.6707 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 26620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396643.3 mean_steps=16.6
|
|
[Episode 26630] reward=-123437857.6 actor_loss=0.2448 critic_loss=163990960537.6000 entropy=17.6905 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 26640] reward=-123211678.8 actor_loss=0.2806 critic_loss=164479986565.1200 entropy=17.7080 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-388043.1 mean_steps=16.6
|
|
[Episode 26650] reward=-115402127.1 actor_loss=0.3175 critic_loss=154393165111.6522 entropy=17.7111 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 26660] reward=-119202793.2 actor_loss=0.3310 critic_loss=163053291362.4615 entropy=17.6884 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573655.7 mean_steps=13.7
|
|
[Episode 26670] reward=-123104267.7 actor_loss=0.3575 critic_loss=169568297807.4483 entropy=17.6875 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 26680] reward=-123078489.7 actor_loss=0.1911 critic_loss=173483762328.2162 entropy=17.6975 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 26680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411169.3 mean_steps=15.4
|
|
[Episode 26690] reward=-118434283.1 actor_loss=0.3484 critic_loss=160821496490.6667 entropy=17.6939 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 26700] reward=-116449361.4 actor_loss=0.3441 critic_loss=161172527396.5714 entropy=17.6928 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 26700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472084.8 mean_steps=13.9
|
|
[Episode 26710] reward=-119766815.7 actor_loss=0.2920 critic_loss=161221290861.7143 entropy=17.7054 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 26720] reward=-120481976.5 actor_loss=0.2208 critic_loss=156823047545.2632 entropy=17.7040 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 26720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-419233.9 mean_steps=16.8
|
|
[Episode 26730] reward=-118374797.8 actor_loss=0.2445 critic_loss=156605461065.1429 entropy=17.7164 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 26740] reward=-112156287.1 actor_loss=0.2409 critic_loss=147227191796.6222 entropy=17.7132 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 26740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642288.9 mean_steps=12.2
|
|
[Episode 26750] reward=-123225681.4 actor_loss=0.2678 critic_loss=164961945600.0000 entropy=17.7069 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 26760] reward=-120303590.0 actor_loss=0.3053 critic_loss=160602228053.3333 entropy=17.7118 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 26760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394337.8 mean_steps=16.4
|
|
[Episode 26770] reward=-120028172.5 actor_loss=0.2268 critic_loss=158895293976.3810 entropy=17.7300 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 26780] reward=-120193528.8 actor_loss=0.2841 critic_loss=155846934528.0000 entropy=17.7266 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 26780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-468121.8 mean_steps=15.7
|
|
[Episode 26790] reward=-117736352.4 actor_loss=0.2826 critic_loss=161196744347.8261 entropy=17.7323 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 26800] reward=-115442418.0 actor_loss=0.3422 critic_loss=148914047658.6667 entropy=17.7365 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 26800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-530526.3 mean_steps=14.9
|
|
[Episode 26810] reward=-119486769.6 actor_loss=0.2715 critic_loss=156877800734.7200 entropy=17.7340 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 26820] reward=-122677876.8 actor_loss=0.2993 critic_loss=160823126846.2703 entropy=17.7271 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 26820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-368696.2 mean_steps=16.9
|
|
[Episode 26830] reward=-115343808.2 actor_loss=0.2671 critic_loss=151724756536.8889 entropy=17.7447 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 26840] reward=-122900281.9 actor_loss=0.2648 critic_loss=161113589005.4737 entropy=17.7392 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-443152.3 mean_steps=17.1
|
|
[Episode 26850] reward=-115780838.4 actor_loss=0.3821 critic_loss=158751978207.1795 entropy=17.7135 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 26860] reward=-124208509.1 actor_loss=0.2794 critic_loss=167904673185.1852 entropy=17.7292 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 26860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-551513.9 mean_steps=14.2
|
|
[Episode 26870] reward=-120762322.2 actor_loss=0.3716 critic_loss=160413116734.5778 entropy=17.7211 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 26880] reward=-117009956.3 actor_loss=0.2326 critic_loss=159344106536.9600 entropy=17.7178 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 26880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469840.2 mean_steps=13.8
|
|
[Episode 26890] reward=-121191525.5 actor_loss=0.2390 critic_loss=159093330235.0769 entropy=17.7263 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 26900] reward=-120241015.6 actor_loss=0.2626 critic_loss=152253605252.4138 entropy=17.7256 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 26900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351768.4 mean_steps=15.9
|
|
[Episode 26910] reward=-127814248.1 actor_loss=0.1804 critic_loss=178137171324.3429 entropy=17.7295 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 26920] reward=-125117111.3 actor_loss=0.3098 critic_loss=172737531588.9231 entropy=17.7230 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 26920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376716.5 mean_steps=16.6
|
|
[Episode 26930] reward=-117856206.1 actor_loss=0.2830 critic_loss=157155635785.1429 entropy=17.7289 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 26940] reward=-118378987.4 actor_loss=0.3353 critic_loss=155428154709.3333 entropy=17.7296 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 26940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483661.1 mean_steps=15.9
|
|
[Episode 26950] reward=-121501197.4 actor_loss=0.3449 critic_loss=164835702647.4667 entropy=17.7372 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 26960] reward=-119042464.7 actor_loss=0.2491 critic_loss=152488131615.0303 entropy=17.7266 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 26960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556173.6 mean_steps=13.7
|
|
[Episode 26970] reward=-116595732.9 actor_loss=0.3599 critic_loss=158665487397.9259 entropy=17.7381 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 26980] reward=-117741328.1 actor_loss=0.2577 critic_loss=157283998896.5517 entropy=17.7420 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 26980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-404685.5 mean_steps=15.6
|
|
[Episode 26990] reward=-120823992.4 actor_loss=0.2972 critic_loss=160480744061.1555 entropy=17.7345 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 27000] reward=-123341022.7 actor_loss=0.1955 critic_loss=165045159568.4102 entropy=17.7391 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 27000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462711.9 mean_steps=16.4
|
|
[Episode 27010] reward=-117146273.3 actor_loss=0.3417 critic_loss=155302653291.3548 entropy=17.7516 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 27020] reward=-125580898.5 actor_loss=0.2060 critic_loss=170044377626.9474 entropy=17.7598 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 27020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-372900.6 mean_steps=15.4
|
|
[Episode 27030] reward=-118987228.1 actor_loss=0.3174 critic_loss=160152389278.8965 entropy=17.7665 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27040] reward=-122862939.1 actor_loss=0.3241 critic_loss=159258497969.2308 entropy=17.7789 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 27040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525161.0 mean_steps=14.2
|
|
[Episode 27050] reward=-117936062.8 actor_loss=0.3568 critic_loss=156285221782.0690 entropy=17.7843 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 27060] reward=-121752571.6 actor_loss=0.1855 critic_loss=164052702003.2000 entropy=17.7830 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 27060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457228.4 mean_steps=15.0
|
|
[Episode 27070] reward=-123429459.1 actor_loss=0.2334 critic_loss=202547439518.4762 entropy=17.7801 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 27080] reward=-123492199.2 actor_loss=0.3241 critic_loss=381420988006.4000 entropy=17.7777 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 27080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542816.0 mean_steps=13.3
|
|
[Episode 27090] reward=-117909184.9 actor_loss=0.3261 critic_loss=168158636168.5333 entropy=17.7638 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 27100] reward=-120526194.7 actor_loss=0.2245 critic_loss=262274873967.3044 entropy=17.7756 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 27100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401697.7 mean_steps=16.3
|
|
[Episode 27110] reward=-117389443.6 actor_loss=0.2630 critic_loss=152349509404.4445 entropy=17.7612 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 27120] reward=-112944927.0 actor_loss=0.3812 critic_loss=148408351920.5517 entropy=17.7491 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 27120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-601414.1 mean_steps=12.2
|
|
[Episode 27130] reward=-120277569.2 actor_loss=0.2545 critic_loss=157146573630.2703 entropy=17.7286 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 27140] reward=-118183415.1 actor_loss=0.3845 critic_loss=153495154050.8445 entropy=17.7026 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 27140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364812.1 mean_steps=16.1
|
|
[Episode 27150] reward=-118018804.2 actor_loss=0.3181 critic_loss=161415259022.2222 entropy=17.7002 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 27160] reward=-117172096.2 actor_loss=0.3630 critic_loss=152400709924.5714 entropy=17.7106 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 27160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467403.2 mean_steps=14.9
|
|
[Episode 27170] reward=-115595031.2 actor_loss=0.3937 critic_loss=153374740388.9778 entropy=17.7284 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 27180] reward=-118666810.3 actor_loss=0.2366 critic_loss=155835218273.1035 entropy=17.7414 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 27180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-339012.6 mean_steps=16.8
|
|
[Episode 27190] reward=-122622213.7 actor_loss=0.2786 critic_loss=159679216065.5610 entropy=17.7303 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27200] reward=-117805424.5 actor_loss=0.3323 critic_loss=155639788885.3333 entropy=17.7413 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 27200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463717.4 mean_steps=16.1
|
|
[Episode 27210] reward=-117147487.8 actor_loss=0.2861 critic_loss=175437111113.9556 entropy=17.7427 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 27220] reward=-122679942.3 actor_loss=0.2311 critic_loss=171537268736.0000 entropy=17.7350 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 27220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450835.9 mean_steps=15.1
|
|
[Episode 27230] reward=-115224308.5 actor_loss=0.3492 critic_loss=154002895793.2308 entropy=17.7307 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 27240] reward=-126327742.8 actor_loss=0.3375 critic_loss=170058389549.5111 entropy=17.7383 approx_kl=0.0115 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 27240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577206.5 mean_steps=13.7
|
|
[Episode 27250] reward=-117448170.2 actor_loss=0.4207 critic_loss=156830692050.8235 entropy=17.7263 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 27260] reward=-123157914.9 actor_loss=0.2807 critic_loss=159786518664.5333 entropy=17.7416 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 27260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-473709.9 mean_steps=15.9
|
|
[Episode 27270] reward=-122636648.4 actor_loss=0.2836 critic_loss=161804290548.6222 entropy=17.7484 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 27280] reward=-121442630.0 actor_loss=0.3588 critic_loss=160512207530.6667 entropy=17.7400 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 27280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438514.8 mean_steps=14.4
|
|
[Episode 27290] reward=-111551769.5 actor_loss=0.3653 critic_loss=145826174293.3333 entropy=17.7457 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 27300] reward=-120058282.8 actor_loss=0.2440 critic_loss=159387902498.1333 entropy=17.7700 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 27300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494117.4 mean_steps=13.8
|
|
[Episode 27310] reward=-118135592.9 actor_loss=0.3303 critic_loss=156552936015.6444 entropy=17.7622 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 27320] reward=-114433848.6 actor_loss=0.2854 critic_loss=148446450483.2000 entropy=17.7456 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 27320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551297.5 mean_steps=13.7
|
|
[Episode 27330] reward=-123218747.4 actor_loss=0.2302 critic_loss=161007360045.5111 entropy=17.7418 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 27340] reward=-116650169.7 actor_loss=0.3654 critic_loss=154247494041.6000 entropy=17.7334 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 27340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539451.2 mean_steps=14.2
|
|
[Episode 27350] reward=-119458907.1 actor_loss=0.2283 critic_loss=155902952152.1778 entropy=17.7443 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 27360] reward=-117271102.6 actor_loss=0.2822 critic_loss=157701702360.1778 entropy=17.7490 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 27360] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-366472.5 mean_steps=17.6
|
|
[Episode 27370] reward=-124172375.0 actor_loss=0.3086 critic_loss=164817855326.3158 entropy=17.7540 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 27380] reward=-121390130.7 actor_loss=0.2748 critic_loss=155638161590.0444 entropy=17.7524 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 27380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496260.5 mean_steps=14.2
|
|
[Episode 27390] reward=-124801790.9 actor_loss=0.2548 critic_loss=165824474498.8445 entropy=17.7612 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 27400] reward=-121444583.1 actor_loss=0.3182 critic_loss=159107804182.7556 entropy=17.7472 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 27400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561767.0 mean_steps=13.1
|
|
[Episode 27410] reward=-119317525.6 actor_loss=0.2880 critic_loss=156399763456.0000 entropy=17.7250 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 27420] reward=-122192699.1 actor_loss=0.2717 critic_loss=160618817378.4615 entropy=17.7234 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 27420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554950.5 mean_steps=13.4
|
|
[Episode 27430] reward=-126811038.6 actor_loss=0.2415 critic_loss=202068863238.5641 entropy=17.7137 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 27440] reward=-119049847.4 actor_loss=0.3824 critic_loss=178937985858.3704 entropy=17.7205 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 27440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417646.9 mean_steps=15.4
|
|
[Episode 27450] reward=-121980576.9 actor_loss=0.2987 critic_loss=159174897390.9333 entropy=17.7290 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27460] reward=-125790009.8 actor_loss=0.1553 critic_loss=168109280033.3913 entropy=17.7291 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 27460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425661.5 mean_steps=15.8
|
|
[Episode 27470] reward=-120321028.9 actor_loss=0.2697 critic_loss=156931091114.6667 entropy=17.7292 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 27480] reward=-118952197.7 actor_loss=0.1786 critic_loss=153841270272.0000 entropy=17.7347 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 27480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-650319.9 mean_steps=13.2
|
|
[Episode 27490] reward=-118496142.6 actor_loss=0.2202 critic_loss=192504186470.4000 entropy=17.7215 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 27500] reward=-121561159.4 actor_loss=0.2382 critic_loss=162935914496.0000 entropy=17.7106 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 27500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549689.3 mean_steps=13.7
|
|
[Episode 27510] reward=-119240963.5 actor_loss=0.2039 critic_loss=158232837415.8222 entropy=17.7036 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 27520] reward=-121294823.4 actor_loss=0.2728 critic_loss=165132674048.0000 entropy=17.7035 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 27520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400791.3 mean_steps=16.6
|
|
[Episode 27530] reward=-116657056.8 actor_loss=0.2520 critic_loss=154201784832.0000 entropy=17.6952 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 27540] reward=-124742699.9 actor_loss=0.2352 critic_loss=166234882048.0000 entropy=17.6814 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 27540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-607137.9 mean_steps=11.8
|
|
[Episode 27550] reward=-118458235.2 actor_loss=0.2455 critic_loss=157391359908.9778 entropy=17.6918 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 27560] reward=-123388494.9 actor_loss=0.2258 critic_loss=166481129256.4211 entropy=17.7046 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 27560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418587.5 mean_steps=15.2
|
|
[Episode 27570] reward=-114903799.6 actor_loss=0.3253 critic_loss=152136557714.2857 entropy=17.6983 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 27580] reward=-115291706.8 actor_loss=0.2669 critic_loss=153621670818.9091 entropy=17.7013 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 27580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-578444.0 mean_steps=14.3
|
|
[Episode 27590] reward=-117041654.1 actor_loss=0.2597 critic_loss=150447084339.2000 entropy=17.6855 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 27600] reward=-121670385.4 actor_loss=0.2370 critic_loss=154192074433.4222 entropy=17.6892 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 27600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-316898.5 mean_steps=17.5
|
|
[Episode 27610] reward=-123524036.3 actor_loss=0.2837 critic_loss=168042233435.8974 entropy=17.6941 approx_kl=0.0114 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 27620] reward=-123075550.8 actor_loss=0.2557 critic_loss=165267341312.0000 entropy=17.7121 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 27620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521133.0 mean_steps=15.2
|
|
[Episode 27630] reward=-116490863.6 actor_loss=0.3381 critic_loss=160730980783.1579 entropy=17.7170 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 27640] reward=-121660425.0 actor_loss=0.3250 critic_loss=160790917575.1111 entropy=17.6971 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 27640] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-276524.9 mean_steps=17.4
|
|
[Episode 27650] reward=-122297294.6 actor_loss=0.3367 critic_loss=158632277442.5600 entropy=17.6891 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 27660] reward=-116906624.1 actor_loss=0.3206 critic_loss=151297606724.2667 entropy=17.6819 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 27660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529694.6 mean_steps=13.6
|
|
[Episode 27670] reward=-122409612.1 actor_loss=0.2639 critic_loss=155740169830.4000 entropy=17.6785 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 27680] reward=-115276505.3 actor_loss=0.2527 critic_loss=149774048038.7879 entropy=17.6642 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 27680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540231.5 mean_steps=14.6
|
|
[Episode 27690] reward=-121373627.9 actor_loss=0.2990 critic_loss=161834763500.3077 entropy=17.6777 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27700] reward=-120680342.1 actor_loss=0.3491 critic_loss=153904181411.8400 entropy=17.6738 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 27700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-374374.6 mean_steps=16.0
|
|
[Episode 27710] reward=-116841683.9 actor_loss=0.3310 critic_loss=148219871663.1579 entropy=17.6792 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 27720] reward=-125338471.6 actor_loss=0.2594 critic_loss=171168144315.7333 entropy=17.6873 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 27720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-373864.1 mean_steps=16.3
|
|
[Episode 27730] reward=-114151608.0 actor_loss=0.3484 critic_loss=146771006586.8800 entropy=17.6772 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27740] reward=-121407662.1 actor_loss=0.2786 critic_loss=159585950105.6000 entropy=17.6738 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 27740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422189.0 mean_steps=15.8
|
|
[Episode 27750] reward=-123172532.7 actor_loss=0.2551 critic_loss=161106083059.8095 entropy=17.6658 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 27760] reward=-125117096.7 actor_loss=0.2328 critic_loss=159582162833.2973 entropy=17.6655 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 27760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578230.5 mean_steps=12.8
|
|
[Episode 27770] reward=-118092414.5 actor_loss=0.1499 critic_loss=153566312497.9512 entropy=17.6773 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 27780] reward=-118430696.8 actor_loss=0.2829 critic_loss=156933925194.3226 entropy=17.6777 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 27780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493193.2 mean_steps=14.7
|
|
[Episode 27790] reward=-118999365.5 actor_loss=0.3231 critic_loss=156174980073.2444 entropy=17.6507 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 27800] reward=-119602534.1 actor_loss=0.3212 critic_loss=154054821595.4286 entropy=17.6598 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 27800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392227.0 mean_steps=16.6
|
|
[Episode 27810] reward=-118584703.4 actor_loss=0.2235 critic_loss=154855868006.4000 entropy=17.6694 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 27820] reward=-124810714.9 actor_loss=0.2643 critic_loss=161470706574.2222 entropy=17.6725 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 27820] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-283968.1 mean_steps=17.2
|
|
[Episode 27830] reward=-119751265.4 actor_loss=0.3111 critic_loss=155210027463.1111 entropy=17.6772 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 27840] reward=-120441063.0 actor_loss=0.2270 critic_loss=151970809036.8000 entropy=17.6777 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 27840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-610672.0 mean_steps=12.2
|
|
[Episode 27850] reward=-118099781.7 actor_loss=0.3381 critic_loss=157203943282.7586 entropy=17.6984 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27860] reward=-118250103.7 actor_loss=0.3075 critic_loss=157296522854.4000 entropy=17.6996 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 27860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-433793.1 mean_steps=14.0
|
|
[Episode 27870] reward=-119478748.3 actor_loss=0.3439 critic_loss=153668498525.0909 entropy=17.6992 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 27880] reward=-125120670.5 actor_loss=0.1851 critic_loss=162181800218.4828 entropy=17.6901 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 27880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480435.1 mean_steps=15.1
|
|
[Episode 27890] reward=-121545876.4 actor_loss=0.3244 critic_loss=163154412830.7200 entropy=17.6819 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 27900] reward=-117576988.9 actor_loss=0.2784 critic_loss=160289144832.0000 entropy=17.6666 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 27900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552801.9 mean_steps=14.2
|
|
[Episode 27910] reward=-120779405.9 actor_loss=0.2929 critic_loss=158594042450.5807 entropy=17.6585 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 27920] reward=-122537422.8 actor_loss=0.2881 critic_loss=157115159347.2000 entropy=17.6689 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 27920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567809.3 mean_steps=12.9
|
|
[Episode 27930] reward=-121485412.8 actor_loss=0.3809 critic_loss=157253627904.0000 entropy=17.6644 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 27940] reward=-118093551.4 actor_loss=0.3029 critic_loss=165768272749.7143 entropy=17.6701 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 27940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581453.0 mean_steps=12.7
|
|
[Episode 27950] reward=-120983453.8 actor_loss=0.3430 critic_loss=161140111223.4667 entropy=17.6652 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 27960] reward=-117834262.0 actor_loss=0.2858 critic_loss=158905546251.3778 entropy=17.6578 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 27960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-318540.8 mean_steps=16.8
|
|
[Episode 27970] reward=-121138339.9 actor_loss=0.2461 critic_loss=158778501074.4889 entropy=17.6458 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 27980] reward=-125176541.1 actor_loss=0.3549 critic_loss=166806012534.1538 entropy=17.6520 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 27980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485278.5 mean_steps=14.7
|
|
[Episode 27990] reward=-116990041.8 actor_loss=0.2404 critic_loss=154855868006.4000 entropy=17.6427 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 28000] reward=-112918158.9 actor_loss=0.2918 critic_loss=149288599187.9111 entropy=17.6187 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 28000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532696.3 mean_steps=14.4
|
|
[Episode 28010] reward=-121319975.5 actor_loss=0.2535 critic_loss=161189456164.5714 entropy=17.6236 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 28020] reward=-122254552.9 actor_loss=0.2712 critic_loss=207914108518.4000 entropy=17.6280 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 28020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-447531.2 mean_steps=16.9
|
|
[Episode 28030] reward=-138678497.0 actor_loss=0.4028 critic_loss=1137393206846.4390 entropy=17.6366 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 28040] reward=-122017438.6 actor_loss=0.2422 critic_loss=159491744256.0000 entropy=17.6345 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 28040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424284.3 mean_steps=14.7
|
|
[Episode 28050] reward=-119704053.9 actor_loss=0.2864 critic_loss=155698768190.5778 entropy=17.6267 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 28060] reward=-121506373.5 actor_loss=0.3357 critic_loss=181581054862.2222 entropy=17.6059 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 28060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367489.9 mean_steps=16.8
|
|
[Episode 28070] reward=-115808481.5 actor_loss=0.3125 critic_loss=162281500146.8718 entropy=17.5976 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 28080] reward=-121618708.3 actor_loss=0.2736 critic_loss=155673916098.2069 entropy=17.5968 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 28080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534347.1 mean_steps=13.5
|
|
[Episode 28090] reward=-120949417.5 actor_loss=0.3996 critic_loss=161144244360.5333 entropy=17.6111 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 28100] reward=-119941956.9 actor_loss=0.2207 critic_loss=155362447732.3636 entropy=17.5968 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 28100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409632.8 mean_steps=15.8
|
|
[Episode 28110] reward=-121297011.4 actor_loss=0.3271 critic_loss=156985966592.0000 entropy=17.6001 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 28120] reward=-120974612.3 actor_loss=0.2998 critic_loss=157344442660.5714 entropy=17.6221 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 28120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-593876.1 mean_steps=13.8
|
|
[Episode 28130] reward=-122878771.1 actor_loss=0.3132 critic_loss=160610853494.1538 entropy=17.6191 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 28140] reward=-117645135.3 actor_loss=0.3000 critic_loss=150896934547.9111 entropy=17.6031 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 28140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423176.3 mean_steps=14.8
|
|
[Episode 28150] reward=-118827986.3 actor_loss=0.3005 critic_loss=155039730098.4243 entropy=17.5959 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 28160] reward=-122579085.7 actor_loss=0.3110 critic_loss=156858931467.1304 entropy=17.5930 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 28160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425082.6 mean_steps=14.6
|
|
[Episode 28170] reward=-118500240.3 actor_loss=0.3268 critic_loss=161720864312.8889 entropy=17.5908 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 28180] reward=-122839568.8 actor_loss=0.2600 critic_loss=158049645468.9032 entropy=17.5861 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 28180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393770.1 mean_steps=16.6
|
|
[Episode 28190] reward=-125023401.1 actor_loss=0.2833 critic_loss=167538254370.1333 entropy=17.5856 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 28200] reward=-117819721.9 actor_loss=0.3162 critic_loss=151869381911.2727 entropy=17.5881 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 28200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-577605.1 mean_steps=10.9
|
|
[Episode 28210] reward=-122438012.3 actor_loss=0.2059 critic_loss=153218502656.0000 entropy=17.5775 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 28220] reward=-119084924.7 actor_loss=0.3355 critic_loss=157954877801.4118 entropy=17.5726 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 28220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439206.8 mean_steps=14.8
|
|
[Episode 28230] reward=-123543705.6 actor_loss=0.2104 critic_loss=160387739999.0857 entropy=17.5740 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 28240] reward=-123413767.4 actor_loss=0.2034 critic_loss=161554916966.4000 entropy=17.5624 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 28240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416468.5 mean_steps=16.6
|
|
[Episode 28250] reward=-119554134.7 actor_loss=0.3531 critic_loss=152959312554.6667 entropy=17.5647 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 28260] reward=-121609199.6 actor_loss=0.2592 critic_loss=158236183210.6667 entropy=17.5531 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 28260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518264.5 mean_steps=14.1
|
|
[Episode 28270] reward=-119224754.8 actor_loss=0.3125 critic_loss=149964939264.0000 entropy=17.5521 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 28280] reward=-123762327.4 actor_loss=0.3101 critic_loss=163327773536.7111 entropy=17.5607 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 28280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620216.3 mean_steps=12.2
|
|
[Episode 28290] reward=-120308799.1 actor_loss=0.2760 critic_loss=151885852398.9333 entropy=17.5691 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 28300] reward=-122634715.1 actor_loss=0.3146 critic_loss=182878731377.7778 entropy=17.5630 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 28300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545913.8 mean_steps=13.3
|
|
[Episode 28310] reward=-120881439.6 actor_loss=0.2376 critic_loss=153864587946.6667 entropy=17.5591 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 28320] reward=-118183211.3 actor_loss=0.3816 critic_loss=152038021012.2105 entropy=17.5565 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 28320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500541.5 mean_steps=14.2
|
|
[Episode 28330] reward=-118205972.0 actor_loss=0.4131 critic_loss=154993219291.4286 entropy=17.5593 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 28340] reward=-114111386.0 actor_loss=0.3177 critic_loss=145657373876.7059 entropy=17.5690 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 28340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470435.5 mean_steps=14.1
|
|
[Episode 28350] reward=-117171300.0 actor_loss=0.3507 critic_loss=152107427237.6471 entropy=17.5787 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 28360] reward=-121736614.9 actor_loss=0.3313 critic_loss=160479178536.4211 entropy=17.5807 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 28360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-576621.4 mean_steps=12.8
|
|
[Episode 28370] reward=-118961301.5 actor_loss=0.2377 critic_loss=153461975401.4118 entropy=17.5838 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 28380] reward=-120744435.6 actor_loss=0.2548 critic_loss=153235604626.2857 entropy=17.5885 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 28380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500383.7 mean_steps=15.5
|
|
[Episode 28390] reward=-121450488.1 actor_loss=0.2440 critic_loss=156616989478.7879 entropy=17.5688 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 28400] reward=-121983359.0 actor_loss=0.3042 critic_loss=160607448064.0000 entropy=17.5855 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 28400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553424.6 mean_steps=13.5
|
|
[Episode 28410] reward=-118342241.6 actor_loss=0.3385 critic_loss=248580081956.5714 entropy=17.5733 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 28420] reward=-121589340.1 actor_loss=0.2480 critic_loss=157326450991.4074 entropy=17.5718 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 28420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504429.1 mean_steps=15.2
|
|
[Episode 28430] reward=-118355364.5 actor_loss=0.3334 critic_loss=158842327625.1429 entropy=17.5719 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 28440] reward=-120857850.8 actor_loss=0.3733 critic_loss=154600612271.1579 entropy=17.5732 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 28440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528845.8 mean_steps=13.3
|
|
[Episode 28450] reward=-121979866.7 actor_loss=0.1473 critic_loss=159085388946.2857 entropy=17.5707 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 28460] reward=-114990467.7 actor_loss=0.3720 critic_loss=165305282344.4211 entropy=17.5746 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 28460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477634.1 mean_steps=14.1
|
|
[Episode 28470] reward=-117771232.4 actor_loss=0.3680 critic_loss=153356847786.6667 entropy=17.5785 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 28480] reward=-114539945.2 actor_loss=0.3302 critic_loss=147620627062.1538 entropy=17.5679 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 28480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-450646.6 mean_steps=16.8
|
|
[Episode 28490] reward=-119222007.4 actor_loss=0.1505 critic_loss=159666474188.8000 entropy=17.5572 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 28500] reward=-118884168.5 actor_loss=0.2617 critic_loss=151264703199.1795 entropy=17.5565 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 28500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523508.5 mean_steps=14.4
|
|
[Episode 28510] reward=-114886663.2 actor_loss=0.3200 critic_loss=145177968904.2581 entropy=17.5508 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 28520] reward=-120589375.7 actor_loss=0.2444 critic_loss=160057996447.2889 entropy=17.5426 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 28520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413796.0 mean_steps=15.4
|
|
[Episode 28530] reward=-121046007.3 actor_loss=0.3003 critic_loss=159117568318.5778 entropy=17.5418 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 28540] reward=-118176244.0 actor_loss=0.3579 critic_loss=150872816753.7778 entropy=17.5354 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 28540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-446604.8 mean_steps=13.9
|
|
[Episode 28550] reward=-124338943.7 actor_loss=0.2759 critic_loss=173714325504.0000 entropy=17.5358 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 28560] reward=-117562653.2 actor_loss=0.3774 critic_loss=154490345472.0000 entropy=17.5299 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 28560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-485897.6 mean_steps=16.2
|
|
[Episode 28570] reward=-117738523.4 actor_loss=0.3009 critic_loss=152117584262.0952 entropy=17.5300 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28580] reward=-116718721.8 actor_loss=0.4291 critic_loss=153515240106.6667 entropy=17.5199 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 28580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365293.4 mean_steps=16.5
|
|
[Episode 28590] reward=-113035248.7 actor_loss=0.3352 critic_loss=142399783470.5454 entropy=17.5330 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28600] reward=-119920816.7 actor_loss=0.2762 critic_loss=155135311451.8974 entropy=17.5218 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 28600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444293.1 mean_steps=15.6
|
|
[Episode 28610] reward=-119868899.2 actor_loss=0.2524 critic_loss=160735532373.3333 entropy=17.5388 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 28620] reward=-120470060.5 actor_loss=0.2476 critic_loss=155487216360.7273 entropy=17.5448 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 28620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-376942.2 mean_steps=17.3
|
|
[Episode 28630] reward=-118392847.9 actor_loss=0.2980 critic_loss=147890304812.1379 entropy=17.5564 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28640] reward=-113869635.6 actor_loss=0.2268 critic_loss=154380254412.8000 entropy=17.5486 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 28640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530246.2 mean_steps=13.3
|
|
[Episode 28650] reward=-119690400.2 actor_loss=0.2489 critic_loss=149072094822.4000 entropy=17.5440 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 28660] reward=-119761036.8 actor_loss=0.3842 critic_loss=157147692635.8974 entropy=17.5399 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 28660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439482.6 mean_steps=15.8
|
|
[Episode 28670] reward=-121314752.7 actor_loss=0.2072 critic_loss=157368838046.4762 entropy=17.5274 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 28680] reward=-116121535.2 actor_loss=0.2850 critic_loss=152601892788.1482 entropy=17.5183 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 28680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-474867.8 mean_steps=15.9
|
|
[Episode 28690] reward=-119443956.2 actor_loss=0.2698 critic_loss=155275487339.7895 entropy=17.5031 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 28700] reward=-124410069.7 actor_loss=0.1621 critic_loss=164342200230.9565 entropy=17.5048 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 28700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425213.4 mean_steps=15.7
|
|
[Episode 28710] reward=-114569761.1 actor_loss=0.3488 critic_loss=152869420119.7714 entropy=17.4981 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 28720] reward=-120822204.8 actor_loss=0.2476 critic_loss=160359981537.8824 entropy=17.5174 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 28720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527957.3 mean_steps=14.4
|
|
[Episode 28730] reward=-120286658.5 actor_loss=0.3000 critic_loss=155259073783.1724 entropy=17.5370 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 28740] reward=-124957537.8 actor_loss=0.3304 critic_loss=165316722688.0000 entropy=17.5440 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 28740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-331519.1 mean_steps=16.9
|
|
[Episode 28750] reward=-115359782.2 actor_loss=0.2804 critic_loss=154256695296.0000 entropy=17.5500 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 28760] reward=-119531517.8 actor_loss=0.3362 critic_loss=154082830238.4762 entropy=17.5528 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 28760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-597232.3 mean_steps=13.4
|
|
[Episode 28770] reward=-124048760.5 actor_loss=0.2947 critic_loss=168227167709.8667 entropy=17.5347 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 28780] reward=-115502786.5 actor_loss=0.2749 critic_loss=149329110630.4000 entropy=17.5506 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 28780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436812.7 mean_steps=15.8
|
|
[Episode 28790] reward=-123007611.9 actor_loss=0.2582 critic_loss=164855067685.9259 entropy=17.5577 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 28800] reward=-119604737.7 actor_loss=0.3431 critic_loss=166617875212.1905 entropy=17.5751 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 28800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457827.1 mean_steps=16.4
|
|
[Episode 28810] reward=-125085123.7 actor_loss=0.3017 critic_loss=163714874709.3333 entropy=17.5793 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28820] reward=-118178563.8 actor_loss=0.3091 critic_loss=149168858908.4445 entropy=17.5713 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 28820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-451456.3 mean_steps=16.9
|
|
[Episode 28830] reward=-115643043.6 actor_loss=0.1992 critic_loss=146538019603.6923 entropy=17.5709 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 28840] reward=-113989269.9 actor_loss=0.2921 critic_loss=144294722515.4783 entropy=17.5594 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 28840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-467902.8 mean_steps=15.6
|
|
[Episode 28850] reward=-119890862.5 actor_loss=0.2247 critic_loss=157130115936.7111 entropy=17.5578 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 28860] reward=-120211011.3 actor_loss=0.3128 critic_loss=155529578320.4572 entropy=17.5554 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 28860] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-661571.2 mean_steps=11.7
|
|
[Episode 28870] reward=-119340974.2 actor_loss=0.4017 critic_loss=155240068133.9259 entropy=17.5583 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 28880] reward=-126304092.5 actor_loss=0.2660 critic_loss=473364144559.1579 entropy=17.5587 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 28880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-665492.4 mean_steps=12.3
|
|
[Episode 28890] reward=-113259162.4 actor_loss=0.3387 critic_loss=148629126686.1176 entropy=17.5483 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 28900] reward=-122169578.8 actor_loss=0.2970 critic_loss=159883982555.4286 entropy=17.5457 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 28900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411973.4 mean_steps=15.6
|
|
[Episode 28910] reward=-121113369.7 actor_loss=0.2596 critic_loss=152513897358.2222 entropy=17.5340 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28920] reward=-117512351.9 actor_loss=0.3270 critic_loss=146584318771.2000 entropy=17.5600 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 28920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521584.6 mean_steps=15.1
|
|
[Episode 28930] reward=-118479007.0 actor_loss=0.2749 critic_loss=149301612916.3636 entropy=17.5504 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 28940] reward=-121707604.8 actor_loss=0.2561 critic_loss=158288360711.3143 entropy=17.5489 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 28940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-562231.6 mean_steps=14.5
|
|
[Episode 28950] reward=-116305881.8 actor_loss=0.3285 critic_loss=145592431738.8800 entropy=17.5448 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28960] reward=-112940673.9 actor_loss=0.3744 critic_loss=149647297461.0732 entropy=17.5367 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 28960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431184.0 mean_steps=15.8
|
|
[Episode 28970] reward=-123708589.9 actor_loss=0.1741 critic_loss=164088424920.6154 entropy=17.5326 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 28980] reward=-116784089.0 actor_loss=0.3444 critic_loss=152731759411.2000 entropy=17.5244 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 28980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504830.6 mean_steps=14.5
|
|
[Episode 28990] reward=-122165564.0 actor_loss=0.3106 critic_loss=161937031168.0000 entropy=17.5272 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 29000] reward=-116030077.1 actor_loss=0.2787 critic_loss=153850237106.0869 entropy=17.5331 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 29000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591774.1 mean_steps=12.8
|
|
[Episode 29010] reward=-116066397.4 actor_loss=0.3538 critic_loss=146825486767.1579 entropy=17.5284 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 29020] reward=-117588833.6 actor_loss=0.3503 critic_loss=157494012586.6667 entropy=17.5421 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 29020] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-349559.6 mean_steps=17.9
|
|
[Episode 29030] reward=-119688694.1 actor_loss=0.3695 critic_loss=152871066009.6000 entropy=17.5371 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 29040] reward=-116433354.4 actor_loss=0.2650 critic_loss=290940960112.6400 entropy=17.5104 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 29040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395421.9 mean_steps=16.4
|
|
[Episode 29050] reward=-120590254.0 actor_loss=0.2279 critic_loss=153423314944.0000 entropy=17.5132 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 29060] reward=-120832221.2 actor_loss=0.2282 critic_loss=155398280169.2444 entropy=17.5124 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 29060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481777.0 mean_steps=14.6
|
|
[Episode 29070] reward=-115713741.4 actor_loss=0.3034 critic_loss=144312602715.0222 entropy=17.5265 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 29080] reward=-118734506.6 actor_loss=0.3806 critic_loss=157264828097.4222 entropy=17.5224 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 29080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402764.3 mean_steps=15.3
|
|
[Episode 29090] reward=-119637224.4 actor_loss=0.2695 critic_loss=155169879287.1724 entropy=17.5135 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 29100] reward=-118429880.5 actor_loss=0.2689 critic_loss=178438387712.0000 entropy=17.5155 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 29100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-532318.5 mean_steps=15.7
|
|
[Episode 29110] reward=-121006156.2 actor_loss=0.2974 critic_loss=157313238416.6956 entropy=17.5190 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 29120] reward=-123946106.8 actor_loss=0.2304 critic_loss=158499849284.2667 entropy=17.5345 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 29120] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-309675.1 mean_steps=16.4
|
|
[Episode 29130] reward=-113900721.0 actor_loss=0.2574 critic_loss=151305812591.3044 entropy=17.5328 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 29140] reward=-106847582.7 actor_loss=0.2880 critic_loss=141534028068.5714 entropy=17.5351 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 29140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599742.7 mean_steps=12.8
|
|
[Episode 29150] reward=-118710451.8 actor_loss=0.2735 critic_loss=160139267954.7586 entropy=17.5335 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 29160] reward=-111187673.4 actor_loss=0.3299 critic_loss=157829944442.8800 entropy=17.5355 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 29160] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-667703.6 mean_steps=11.6
|
|
[Episode 29170] reward=-119985558.0 actor_loss=0.1965 critic_loss=153040319186.8235 entropy=17.5397 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 29180] reward=-120216638.3 actor_loss=0.3207 critic_loss=156909128908.8000 entropy=17.5377 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 29180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-396169.0 mean_steps=17.4
|
|
[Episode 29190] reward=-115449073.2 actor_loss=0.2585 critic_loss=148794249707.5200 entropy=17.5262 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 29200] reward=-125118975.4 actor_loss=0.3010 critic_loss=285562200808.7273 entropy=17.5428 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 29200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571494.4 mean_steps=12.9
|
|
[Episode 29210] reward=-117330168.1 actor_loss=0.2743 critic_loss=149420229089.8824 entropy=17.5330 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 29220] reward=-119399874.2 actor_loss=0.3481 critic_loss=155123913781.8947 entropy=17.5146 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 29220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571957.7 mean_steps=12.7
|
|
[Episode 29230] reward=-113432792.5 actor_loss=0.4044 critic_loss=145283818564.2667 entropy=17.5266 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 29240] reward=-120338220.9 actor_loss=0.2551 critic_loss=154515781924.5714 entropy=17.5306 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 29240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513065.1 mean_steps=13.7
|
|
[Episode 29250] reward=-120007105.2 actor_loss=0.2845 critic_loss=157490205354.6667 entropy=17.5405 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 29260] reward=-121398054.4 actor_loss=0.2032 critic_loss=154243416064.0000 entropy=17.5293 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 29260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577348.4 mean_steps=13.4
|
|
[Episode 29270] reward=-114820259.5 actor_loss=0.3340 critic_loss=149160259899.0769 entropy=17.5325 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 29280] reward=-115669397.6 actor_loss=0.3528 critic_loss=149524082211.7209 entropy=17.5113 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 29280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-537550.1 mean_steps=15.4
|
|
[Episode 29290] reward=-119957290.4 actor_loss=0.3010 critic_loss=149714142120.2286 entropy=17.5188 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 29300] reward=-115473355.4 actor_loss=0.3410 critic_loss=145257218048.0000 entropy=17.5240 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 29300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432721.5 mean_steps=16.6
|
|
[Episode 29310] reward=-118211721.1 actor_loss=0.2626 critic_loss=148605462370.4615 entropy=17.5220 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 29320] reward=-118584009.3 actor_loss=0.3733 critic_loss=164851217050.7907 entropy=17.5284 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 29320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575208.1 mean_steps=12.8
|
|
[Episode 29330] reward=-117624467.1 actor_loss=0.3288 critic_loss=154757241124.5714 entropy=17.5173 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 29340] reward=-121087252.6 actor_loss=0.3259 critic_loss=150147915511.7419 entropy=17.5070 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 29340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-418823.1 mean_steps=16.4
|
|
[Episode 29350] reward=-120859495.6 actor_loss=0.2437 critic_loss=156776432745.9310 entropy=17.5329 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 29360] reward=-114358593.3 actor_loss=0.3485 critic_loss=146275382476.8000 entropy=17.5381 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 29360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400335.4 mean_steps=15.2
|
|
[Episode 29370] reward=-122791021.3 actor_loss=0.1778 critic_loss=155530308078.3448 entropy=17.5360 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 29380] reward=-121326257.8 actor_loss=0.2282 critic_loss=158663884435.9111 entropy=17.5419 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 29380] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-747914.8 mean_steps=10.7
|
|
[Episode 29390] reward=-111928718.6 actor_loss=0.3357 critic_loss=144546454127.3044 entropy=17.5445 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 29400] reward=-115426873.2 actor_loss=0.4802 critic_loss=142709505325.1765 entropy=17.5352 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1536 front_blocked=0
|
|
[Eval 29400] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-773147.4 mean_steps=10.8
|
|
[Episode 29410] reward=-116745046.0 actor_loss=0.3656 critic_loss=152983891321.2632 entropy=17.5379 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 29420] reward=-122539739.6 actor_loss=0.3208 critic_loss=155083889868.8000 entropy=17.5416 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 29420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391912.7 mean_steps=16.1
|
|
[Episode 29430] reward=-117962851.6 actor_loss=0.3563 critic_loss=151484238506.6667 entropy=17.5482 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 29440] reward=-119495781.4 actor_loss=0.2751 critic_loss=155205801984.0000 entropy=17.5397 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 29440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462162.6 mean_steps=15.8
|
|
[Episode 29450] reward=-122448226.3 actor_loss=0.2361 critic_loss=158723844050.4889 entropy=17.5518 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 29460] reward=-119594175.7 actor_loss=0.3428 critic_loss=157043544746.6667 entropy=17.5477 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 29460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454861.8 mean_steps=14.8
|
|
[Episode 29470] reward=-116920102.7 actor_loss=0.2861 critic_loss=147007506022.4000 entropy=17.5450 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 29480] reward=-135665848.2 actor_loss=0.3287 critic_loss=1222109965698.8445 entropy=17.5729 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 29480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-459395.0 mean_steps=16.5
|
|
[Episode 29490] reward=-118010969.0 actor_loss=0.3296 critic_loss=154025839820.8000 entropy=17.5730 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 29500] reward=-118032768.7 actor_loss=0.2661 critic_loss=170918167113.1429 entropy=17.5839 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 29500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-477750.0 mean_steps=16.1
|
|
[Episode 29510] reward=-118209791.1 actor_loss=0.3615 critic_loss=155935562865.7778 entropy=17.5800 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 29520] reward=-115794981.1 actor_loss=0.3732 critic_loss=147010627356.4445 entropy=17.5671 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 29520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-354340.5 mean_steps=17.0
|
|
[Episode 29530] reward=-118470759.4 actor_loss=0.3074 critic_loss=165312765470.1176 entropy=17.5801 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 29540] reward=-129654806.8 actor_loss=0.3058 critic_loss=616958965987.5555 entropy=17.5820 approx_kl=0.0038 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 29540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457806.3 mean_steps=15.2
|
|
[Episode 29550] reward=-120816780.1 actor_loss=0.2607 critic_loss=157795341516.8000 entropy=17.5785 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 29560] reward=-116425727.1 actor_loss=0.3465 critic_loss=149233159469.1765 entropy=17.5784 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 29560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-491727.1 mean_steps=16.1
|
|
[Episode 29570] reward=-118748929.8 actor_loss=0.2231 critic_loss=181226501605.0526 entropy=17.5798 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 29580] reward=-130850968.8 actor_loss=0.2770 critic_loss=752540053876.3636 entropy=17.5711 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 29580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-406825.7 mean_steps=17.4
|
|
[Episode 29590] reward=-157876259.3 actor_loss=5.4085 critic_loss=2453367384425.4116 entropy=17.5743 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 29600] reward=-120915913.9 actor_loss=0.2228 critic_loss=152692258749.9355 entropy=17.5772 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 29600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490945.8 mean_steps=14.4
|
|
[Episode 29610] reward=-116463394.2 actor_loss=0.2391 critic_loss=151607091785.1429 entropy=17.5884 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 29620] reward=-112391755.3 actor_loss=0.3072 critic_loss=145280297642.6667 entropy=17.5882 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 29620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484303.6 mean_steps=15.2
|
|
[Episode 29630] reward=-115106976.3 actor_loss=0.3891 critic_loss=146897150279.6800 entropy=17.5864 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 29640] reward=-122033554.2 actor_loss=0.3031 critic_loss=158466241331.2000 entropy=17.5841 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 29640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448624.2 mean_steps=14.7
|
|
[Episode 29650] reward=-154092127.2 actor_loss=7.9229 critic_loss=4383887529797.8184 entropy=17.5831 approx_kl=0.0026 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 29660] reward=-116918179.4 actor_loss=0.3095 critic_loss=152197979297.6842 entropy=17.5673 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 29660] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-321928.0 mean_steps=16.7
|
|
[Episode 29670] reward=-117891336.5 actor_loss=0.2722 critic_loss=343945834390.9744 entropy=17.5880 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 29680] reward=-118992943.3 actor_loss=0.2472 critic_loss=155816032413.5385 entropy=17.5965 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 29680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389040.9 mean_steps=16.7
|
|
[Episode 29690] reward=-121362162.7 actor_loss=0.3250 critic_loss=160828424936.7273 entropy=17.5980 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 29700] reward=-121502997.5 actor_loss=0.2582 critic_loss=157125838620.4445 entropy=17.5960 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 29700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-372857.1 mean_steps=16.3
|
|
[Episode 29710] reward=-124866259.9 actor_loss=0.2736 critic_loss=162414623675.7333 entropy=17.5808 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 29720] reward=-119394420.7 actor_loss=0.2802 critic_loss=158274335175.1111 entropy=17.5807 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 29720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470488.5 mean_steps=13.8
|
|
[Episode 29730] reward=-114913214.5 actor_loss=0.3466 critic_loss=145182248140.8000 entropy=17.5864 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 29740] reward=-112654333.2 actor_loss=0.3424 critic_loss=144290318654.5778 entropy=17.5756 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 29740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487966.1 mean_steps=15.1
|
|
[Episode 29750] reward=-119888460.5 actor_loss=0.2762 critic_loss=153715552451.0476 entropy=17.5727 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 29760] reward=-114408887.2 actor_loss=0.3294 critic_loss=143990864099.5555 entropy=17.5660 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 29760] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-309946.5 mean_steps=18.1
|
|
[Episode 29770] reward=-119262557.9 actor_loss=0.2397 critic_loss=153170102044.4445 entropy=17.5572 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 29780] reward=-115397154.3 actor_loss=0.3634 critic_loss=154981179392.0000 entropy=17.5480 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 29780] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-343024.1 mean_steps=17.8
|
|
[Episode 29790] reward=-126696663.2 actor_loss=0.3360 critic_loss=580149864448.0000 entropy=17.5504 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 29800] reward=-119739938.2 actor_loss=0.3212 critic_loss=155090043335.1111 entropy=17.5435 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 29800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518788.1 mean_steps=15.3
|
|
[Episode 29810] reward=-120049881.9 actor_loss=0.2737 critic_loss=157357203655.8049 entropy=17.5467 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 29820] reward=-124283306.5 actor_loss=0.2931 critic_loss=158360149767.7576 entropy=17.5403 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 29820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437574.1 mean_steps=15.5
|
|
[Episode 29830] reward=-114210994.0 actor_loss=0.4017 critic_loss=144981881287.1111 entropy=17.5588 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 29840] reward=-120137858.2 actor_loss=0.2564 critic_loss=152711739164.4445 entropy=17.5617 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 29840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-390522.7 mean_steps=16.3
|
|
[Episode 29850] reward=-120262630.7 actor_loss=0.1889 critic_loss=195113710855.3143 entropy=17.5645 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 29860] reward=-118028432.2 actor_loss=0.2863 critic_loss=150704894043.0222 entropy=17.5448 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 29860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-433894.7 mean_steps=16.8
|
|
[Episode 29870] reward=-116723804.2 actor_loss=0.3066 critic_loss=146055656877.4193 entropy=17.5490 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 29880] reward=-120564485.3 actor_loss=0.2961 critic_loss=152500616123.7333 entropy=17.5368 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 29880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-354841.2 mean_steps=17.1
|
|
[Episode 29890] reward=-124647960.5 actor_loss=0.2508 critic_loss=162242255005.5385 entropy=17.5392 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 29900] reward=-124945351.2 actor_loss=0.2089 critic_loss=167554011011.8788 entropy=17.5625 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 29900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-459093.1 mean_steps=14.1
|
|
[Episode 29910] reward=-117103774.3 actor_loss=0.2074 critic_loss=149736920117.8947 entropy=17.5654 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 29920] reward=-117180868.4 actor_loss=0.2066 critic_loss=145802060946.2857 entropy=17.5682 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 29920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522437.1 mean_steps=14.8
|
|
[Episode 29930] reward=-117533696.6 actor_loss=0.3779 critic_loss=149106908160.0000 entropy=17.5587 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 29940] reward=-126604128.1 actor_loss=0.1954 critic_loss=160890915939.0968 entropy=17.5777 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 29940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-580106.5 mean_steps=14.8
|
|
[Episode 29950] reward=-118167170.3 actor_loss=0.4117 critic_loss=147686120001.6410 entropy=17.5872 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 29960] reward=-120096400.6 actor_loss=0.3555 critic_loss=162451204778.6667 entropy=17.5895 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 29960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502477.3 mean_steps=15.6
|
|
[Episode 29970] reward=-118755768.2 actor_loss=0.3808 critic_loss=150476503121.9200 entropy=17.5785 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 29980] reward=-119517598.3 actor_loss=0.2931 critic_loss=148782786969.6000 entropy=17.5675 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 29980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467527.3 mean_steps=14.9
|
|
[Episode 29990] reward=-118757994.6 actor_loss=0.2384 critic_loss=167607024139.9070 entropy=17.5792 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 30000] reward=-118946948.8 actor_loss=0.2670 critic_loss=149405152779.3778 entropy=17.5887 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 30000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-485512.7 mean_steps=13.1
|
|
[Episode 30010] reward=-112382040.0 actor_loss=0.4756 critic_loss=146231693403.0222 entropy=17.5839 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 30020] reward=-120356959.0 actor_loss=0.2098 critic_loss=169840199452.4445 entropy=17.5884 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 30020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520500.0 mean_steps=14.3
|
|
[Episode 30030] reward=-115913239.4 actor_loss=0.3586 critic_loss=161956631717.1613 entropy=17.5950 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 30040] reward=-119105652.9 actor_loss=0.2779 critic_loss=153350621128.6487 entropy=17.6054 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 30040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-346970.4 mean_steps=16.1
|
|
[Episode 30050] reward=-122431662.4 actor_loss=0.3006 critic_loss=164282655464.7273 entropy=17.6074 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 30060] reward=-115181211.0 actor_loss=0.3740 critic_loss=144016193324.1379 entropy=17.5944 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 30060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493355.5 mean_steps=15.4
|
|
[Episode 30070] reward=-120429985.4 actor_loss=0.3437 critic_loss=151792213805.9487 entropy=17.6044 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 30080] reward=-123959968.8 actor_loss=0.2481 critic_loss=161420073005.5111 entropy=17.6209 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 30080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-639410.5 mean_steps=12.1
|
|
[Episode 30090] reward=-118210756.8 actor_loss=0.3210 critic_loss=147316128335.6444 entropy=17.6231 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 30100] reward=-123193064.8 actor_loss=0.2244 critic_loss=157575782096.5926 entropy=17.6330 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 30100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-470228.8 mean_steps=16.1
|
|
[Episode 30110] reward=-122824275.2 actor_loss=0.2238 critic_loss=158602893312.0000 entropy=17.6314 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 30120] reward=-120189815.7 actor_loss=0.2551 critic_loss=155593057621.3333 entropy=17.6253 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 30120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-652736.2 mean_steps=124.2
|
|
[Episode 30130] reward=-119070628.4 actor_loss=0.2819 critic_loss=155459332143.6279 entropy=17.6301 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30140] reward=-120184999.7 actor_loss=0.2943 critic_loss=174915451617.2800 entropy=17.6367 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 30140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420668.9 mean_steps=16.4
|
|
[Episode 30150] reward=-119779242.4 actor_loss=0.3436 critic_loss=153859881483.9070 entropy=17.6559 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 30160] reward=-123805479.0 actor_loss=0.2487 critic_loss=246953815341.1765 entropy=17.6577 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 30160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524826.5 mean_steps=14.1
|
|
[Episode 30170] reward=-117385213.5 actor_loss=0.2940 critic_loss=157157397065.1429 entropy=17.6603 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30180] reward=-119595985.7 actor_loss=0.2439 critic_loss=151348561001.9310 entropy=17.6543 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 30180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507414.4 mean_steps=14.6
|
|
[Episode 30190] reward=-120632169.7 actor_loss=0.3488 critic_loss=159622661916.4445 entropy=17.6543 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 30200] reward=-122572819.6 actor_loss=0.3042 critic_loss=157322628995.8788 entropy=17.6578 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 30200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595068.9 mean_steps=13.1
|
|
[Episode 30210] reward=-124755943.7 actor_loss=0.2340 critic_loss=164889589248.0000 entropy=17.6529 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 30220] reward=-117614367.3 actor_loss=0.3335 critic_loss=161084549438.5778 entropy=17.6572 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 30220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552871.6 mean_steps=13.3
|
|
[Episode 30230] reward=-123882692.5 actor_loss=0.2541 critic_loss=161963851776.0000 entropy=17.6589 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 30240] reward=-121603452.7 actor_loss=0.2948 critic_loss=151612083278.7692 entropy=17.6595 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 30240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-528571.2 mean_steps=16.1
|
|
[Episode 30250] reward=-119616657.0 actor_loss=0.3065 critic_loss=152256445719.2727 entropy=17.6485 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 30260] reward=-121175673.7 actor_loss=0.2439 critic_loss=152335548416.0000 entropy=17.6430 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 30260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543201.6 mean_steps=13.5
|
|
[Episode 30270] reward=-120992357.7 actor_loss=0.2859 critic_loss=158653490220.5217 entropy=17.6453 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 30280] reward=-117272737.1 actor_loss=0.3463 critic_loss=151185124693.3333 entropy=17.6550 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 30280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644776.2 mean_steps=12.2
|
|
[Episode 30290] reward=-121071561.7 actor_loss=0.2811 critic_loss=151656356977.7778 entropy=17.6566 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 30300] reward=-126374576.7 actor_loss=0.2404 critic_loss=167378860987.7333 entropy=17.6581 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 30300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586906.0 mean_steps=12.8
|
|
[Episode 30310] reward=-120953639.3 actor_loss=0.3342 critic_loss=159128160467.8621 entropy=17.6506 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 30320] reward=-122074116.8 actor_loss=0.2346 critic_loss=156578580980.6222 entropy=17.6584 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 30320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510220.7 mean_steps=14.4
|
|
[Episode 30330] reward=-115247969.7 actor_loss=0.3500 critic_loss=146883228516.8485 entropy=17.6532 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 30340] reward=-118662188.6 actor_loss=0.3101 critic_loss=156708475392.0000 entropy=17.6616 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 30340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-512982.8 mean_steps=13.2
|
|
[Episode 30350] reward=-119092923.9 actor_loss=0.3420 critic_loss=161659122639.2381 entropy=17.6727 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 30360] reward=-122143197.0 actor_loss=0.3274 critic_loss=160159816817.7778 entropy=17.6882 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 30360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-584640.6 mean_steps=13.9
|
|
[Episode 30370] reward=-117184067.9 actor_loss=0.2814 critic_loss=150714450550.1538 entropy=17.6874 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 30380] reward=-112152121.9 actor_loss=0.2897 critic_loss=147585622926.2222 entropy=17.7007 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 30380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430945.4 mean_steps=14.7
|
|
[Episode 30390] reward=-123531103.3 actor_loss=0.2526 critic_loss=160107491028.2927 entropy=17.6900 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30400] reward=-126813351.2 actor_loss=0.2524 critic_loss=165375608273.4546 entropy=17.6711 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 30400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513243.7 mean_steps=13.6
|
|
[Episode 30410] reward=-122715918.6 actor_loss=0.2693 critic_loss=164534946084.5714 entropy=17.6659 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 30420] reward=-125068752.7 actor_loss=0.2228 critic_loss=162436238155.2941 entropy=17.6603 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 30420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-390690.7 mean_steps=16.4
|
|
[Episode 30430] reward=-120073715.8 actor_loss=0.3350 critic_loss=153079874398.3158 entropy=17.6576 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 30440] reward=-116039513.1 actor_loss=0.2817 critic_loss=151560047993.2632 entropy=17.6617 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 30440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520227.4 mean_steps=14.3
|
|
[Episode 30450] reward=-122308990.7 actor_loss=0.2089 critic_loss=160878716369.4546 entropy=17.6732 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 30460] reward=-125411961.5 actor_loss=0.3044 critic_loss=218266782168.6154 entropy=17.6826 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 30460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541671.8 mean_steps=13.4
|
|
[Episode 30470] reward=-125474500.4 actor_loss=0.1806 critic_loss=163149684736.0000 entropy=17.6976 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 30480] reward=-123141721.6 actor_loss=0.2413 critic_loss=155506607340.3077 entropy=17.6808 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 30480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-544997.7 mean_steps=12.2
|
|
[Episode 30490] reward=-119164073.8 actor_loss=0.3710 critic_loss=153262446182.4000 entropy=17.6779 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 30500] reward=-126479898.1 actor_loss=0.3351 critic_loss=233147270609.4546 entropy=17.6753 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 30500] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-298514.7 mean_steps=17.8
|
|
[Episode 30510] reward=-119138587.8 actor_loss=0.2849 critic_loss=161493795328.0000 entropy=17.6711 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30520] reward=-116886773.0 actor_loss=0.2421 critic_loss=144333420005.0526 entropy=17.6593 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 30520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366545.1 mean_steps=17.1
|
|
[Episode 30530] reward=-117177323.7 actor_loss=0.3743 critic_loss=149656867089.0667 entropy=17.6683 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 30540] reward=-120065967.4 actor_loss=0.2543 critic_loss=153269479610.1818 entropy=17.6637 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 30540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-579507.5 mean_steps=12.1
|
|
[Episode 30550] reward=-123260398.0 actor_loss=0.3048 critic_loss=165877809152.0000 entropy=17.6703 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 30560] reward=-122820870.0 actor_loss=0.2005 critic_loss=156402721450.6667 entropy=17.6693 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 30560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473834.0 mean_steps=15.1
|
|
[Episode 30570] reward=-114240478.9 actor_loss=0.3853 critic_loss=146599163851.4872 entropy=17.6670 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 30580] reward=-118105839.8 actor_loss=0.3895 critic_loss=176747481588.6222 entropy=17.6505 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 30580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463332.6 mean_steps=14.6
|
|
[Episode 30590] reward=-114051404.5 actor_loss=0.4309 critic_loss=155665257130.6667 entropy=17.6465 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 30600] reward=-118875572.3 actor_loss=0.2369 critic_loss=157310690918.4000 entropy=17.6450 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 30600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550333.8 mean_steps=13.6
|
|
[Episode 30610] reward=-123326170.0 actor_loss=0.2535 critic_loss=158580053333.3333 entropy=17.6450 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30620] reward=-125001737.9 actor_loss=0.2018 critic_loss=159365171266.0645 entropy=17.6377 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 30620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-590351.3 mean_steps=14.7
|
|
[Episode 30630] reward=-120497824.7 actor_loss=0.2755 critic_loss=157494480804.9778 entropy=17.6348 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 30640] reward=-118946871.9 actor_loss=0.3481 critic_loss=153108592453.8182 entropy=17.6217 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 30640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560706.6 mean_steps=12.8
|
|
[Episode 30650] reward=-119594090.0 actor_loss=0.3087 critic_loss=153606517191.1111 entropy=17.6242 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 30660] reward=-116400494.9 actor_loss=0.2847 critic_loss=150662607583.1795 entropy=17.6144 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 30660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519152.6 mean_steps=13.3
|
|
[Episode 30670] reward=-121058952.3 actor_loss=0.2930 critic_loss=151013850391.2727 entropy=17.6110 approx_kl=0.0125 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 30680] reward=-125065737.6 actor_loss=0.2360 critic_loss=165602011818.6667 entropy=17.6188 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 30680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-404966.0 mean_steps=15.7
|
|
[Episode 30690] reward=-121403377.5 actor_loss=0.2343 critic_loss=150660053947.7333 entropy=17.6192 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 30700] reward=-117476463.4 actor_loss=0.2506 critic_loss=146472925696.0000 entropy=17.6154 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 30700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539914.5 mean_steps=14.4
|
|
[Episode 30710] reward=-122211260.6 actor_loss=0.2922 critic_loss=259745810064.4102 entropy=17.6237 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 30720] reward=-126865117.0 actor_loss=0.2711 critic_loss=311059110661.6889 entropy=17.6340 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 30720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501165.7 mean_steps=15.3
|
|
[Episode 30730] reward=-118940297.8 actor_loss=0.3135 critic_loss=158660553386.6667 entropy=17.6370 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 30740] reward=-115614351.4 actor_loss=0.3307 critic_loss=151481807494.7368 entropy=17.6434 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 30740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463853.5 mean_steps=16.1
|
|
[Episode 30750] reward=-114034532.2 actor_loss=0.2487 critic_loss=147170841941.3333 entropy=17.6380 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 30760] reward=-115250983.6 actor_loss=0.3029 critic_loss=152534034750.5778 entropy=17.6272 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 30760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-514183.7 mean_steps=13.3
|
|
[Episode 30770] reward=-120779052.2 actor_loss=0.2533 critic_loss=158520170177.4222 entropy=17.6406 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 30780] reward=-118748691.9 actor_loss=0.2770 critic_loss=156228361849.9048 entropy=17.6374 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 30780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545831.3 mean_steps=12.9
|
|
[Episode 30790] reward=-119149445.7 actor_loss=0.3802 critic_loss=154895688424.7273 entropy=17.6334 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 30800] reward=-122724468.2 actor_loss=0.3374 critic_loss=158682240474.5366 entropy=17.6266 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 30800] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-375348.6 mean_steps=17.7
|
|
[Episode 30810] reward=-115273373.5 actor_loss=0.3638 critic_loss=143198450119.1111 entropy=17.6201 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 30820] reward=-119857299.5 actor_loss=0.2808 critic_loss=153747143975.8222 entropy=17.6402 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 30820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437155.1 mean_steps=14.8
|
|
[Episode 30830] reward=-115087819.9 actor_loss=0.3486 critic_loss=156865078272.0000 entropy=17.6375 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30840] reward=-119531823.9 actor_loss=0.2864 critic_loss=166603644928.0000 entropy=17.6524 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 30840] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-369539.1 mean_steps=19.4
|
|
[Episode 30850] reward=-123163433.7 actor_loss=0.2833 critic_loss=161097920603.0222 entropy=17.6597 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30860] reward=-115559838.6 actor_loss=0.3128 critic_loss=144663065941.3333 entropy=17.6460 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 30860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523963.3 mean_steps=14.4
|
|
[Episode 30870] reward=-115077808.2 actor_loss=0.3727 critic_loss=144778947788.8000 entropy=17.6655 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 30880] reward=-118284542.7 actor_loss=0.3503 critic_loss=154276889356.1905 entropy=17.6517 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 30880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560477.7 mean_steps=12.9
|
|
[Episode 30890] reward=-123999543.8 actor_loss=0.2600 critic_loss=158900001905.7778 entropy=17.6459 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 30900] reward=-112100371.2 actor_loss=0.4270 critic_loss=137638555010.8445 entropy=17.6529 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 30900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-552837.7 mean_steps=12.8
|
|
[Episode 30910] reward=-114529156.0 actor_loss=0.3021 critic_loss=144699748998.7368 entropy=17.6506 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 30920] reward=-123254788.2 actor_loss=0.3028 critic_loss=160400846475.6364 entropy=17.6521 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 30920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526946.3 mean_steps=13.2
|
|
[Episode 30930] reward=-119349059.6 actor_loss=0.2000 critic_loss=153273004851.2000 entropy=17.6444 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 30940] reward=-116733928.4 actor_loss=0.3677 critic_loss=159817894461.4400 entropy=17.6358 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 30940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427542.3 mean_steps=15.2
|
|
[Episode 30950] reward=-124611788.9 actor_loss=0.1917 critic_loss=166646825441.8824 entropy=17.6228 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 30960] reward=-119603513.2 actor_loss=0.2993 critic_loss=152888894733.4737 entropy=17.6171 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 30960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377340.8 mean_steps=16.7
|
|
[Episode 30970] reward=-117898963.0 actor_loss=0.3791 critic_loss=154760756519.8222 entropy=17.6205 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 30980] reward=-121205973.4 actor_loss=0.2942 critic_loss=159026658963.9111 entropy=17.6451 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 30980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468182.1 mean_steps=15.2
|
|
[Episode 30990] reward=-119520066.7 actor_loss=0.3577 critic_loss=156415307138.8445 entropy=17.6361 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 31000] reward=-121522804.8 actor_loss=0.3224 critic_loss=159946810254.2222 entropy=17.6468 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 31000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541671.3 mean_steps=14.3
|
|
[Episode 31010] reward=-122495626.6 actor_loss=0.2141 critic_loss=157440036329.7391 entropy=17.6403 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 31020] reward=-117668192.2 actor_loss=0.3240 critic_loss=147959223427.2820 entropy=17.6406 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 31020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410838.0 mean_steps=15.7
|
|
[Episode 31030] reward=-119792835.7 actor_loss=0.3360 critic_loss=156153498828.8000 entropy=17.6285 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 31040] reward=-115834916.3 actor_loss=0.2558 critic_loss=142414606973.1555 entropy=17.6267 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 31040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501442.9 mean_steps=14.2
|
|
[Episode 31050] reward=-119874784.9 actor_loss=0.1982 critic_loss=153014487540.6222 entropy=17.6543 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 31060] reward=-120349380.9 actor_loss=0.2245 critic_loss=156256508313.6000 entropy=17.6522 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 31060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517777.2 mean_steps=13.3
|
|
[Episode 31070] reward=-117894130.5 actor_loss=0.2869 critic_loss=151126007417.9048 entropy=17.6560 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 31080] reward=-118197188.8 actor_loss=0.3236 critic_loss=148554435361.3913 entropy=17.6528 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 31080] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-269287.9 mean_steps=17.4
|
|
[Episode 31090] reward=-110983345.4 actor_loss=0.4013 critic_loss=141990749289.9310 entropy=17.6469 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 31100] reward=-122174767.1 actor_loss=0.2865 critic_loss=161602835069.1555 entropy=17.6560 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 31100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447432.0 mean_steps=15.9
|
|
[Episode 31110] reward=-122023019.5 actor_loss=0.2605 critic_loss=160456090081.8824 entropy=17.6564 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 31120] reward=-118822910.4 actor_loss=0.3870 critic_loss=156212015344.9412 entropy=17.6446 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 31120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407446.9 mean_steps=16.7
|
|
[Episode 31130] reward=-118546193.5 actor_loss=0.2529 critic_loss=152353902807.5789 entropy=17.6492 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 31140] reward=-118568015.6 actor_loss=0.3419 critic_loss=154755014246.4000 entropy=17.6338 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 31140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-527272.2 mean_steps=15.8
|
|
[Episode 31150] reward=-122814267.9 actor_loss=0.2896 critic_loss=159221937493.3333 entropy=17.6263 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 31160] reward=-118389029.1 actor_loss=0.3257 critic_loss=149743050379.6364 entropy=17.6291 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 31160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541017.8 mean_steps=14.8
|
|
[Episode 31170] reward=-115722250.2 actor_loss=0.3402 critic_loss=147579229696.0000 entropy=17.6202 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 31180] reward=-119174986.9 actor_loss=0.2463 critic_loss=149115922432.0000 entropy=17.6296 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 31180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619712.4 mean_steps=13.6
|
|
[Episode 31190] reward=-110359376.3 actor_loss=0.3804 critic_loss=141880490914.9091 entropy=17.6297 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 31200] reward=-119579504.0 actor_loss=0.2451 critic_loss=155090677122.8445 entropy=17.6495 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 31200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435163.9 mean_steps=14.4
|
|
[Episode 31210] reward=-119459075.2 actor_loss=0.2549 critic_loss=154417778041.2632 entropy=17.6677 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 31220] reward=-122734095.7 actor_loss=0.2602 critic_loss=157073665325.1765 entropy=17.6752 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31220] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-774039.8 mean_steps=10.9
|
|
[Episode 31230] reward=-119210695.7 actor_loss=0.3273 critic_loss=156904756565.3333 entropy=17.6932 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 31240] reward=-118760222.0 actor_loss=0.2856 critic_loss=150837553834.6667 entropy=17.6948 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 31240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-417030.9 mean_steps=14.6
|
|
[Episode 31250] reward=-121320807.5 actor_loss=0.2379 critic_loss=154800699099.4286 entropy=17.6740 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 31260] reward=-123709977.6 actor_loss=0.2603 critic_loss=163094883826.8718 entropy=17.6886 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 31260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440231.4 mean_steps=14.9
|
|
[Episode 31270] reward=-116423408.8 actor_loss=0.3411 critic_loss=150766798524.6316 entropy=17.6771 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 31280] reward=-122529099.2 actor_loss=0.2895 critic_loss=164174306645.3333 entropy=17.6659 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611046.6 mean_steps=13.1
|
|
[Episode 31290] reward=-120774294.6 actor_loss=0.2901 critic_loss=153524083916.8000 entropy=17.6683 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 31300] reward=-123953521.8 actor_loss=0.1630 critic_loss=158283277653.3333 entropy=17.6806 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 31300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-434350.2 mean_steps=16.6
|
|
[Episode 31310] reward=-113201835.0 actor_loss=0.3695 critic_loss=146352665629.2571 entropy=17.6644 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 31320] reward=-119722802.6 actor_loss=0.2517 critic_loss=155014158534.1935 entropy=17.6588 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 31320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-585978.8 mean_steps=11.7
|
|
[Episode 31330] reward=-122416907.4 actor_loss=0.3194 critic_loss=159116118308.5714 entropy=17.6619 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 31340] reward=-116737836.6 actor_loss=0.3214 critic_loss=148371861048.8889 entropy=17.6557 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 31340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468616.8 mean_steps=14.3
|
|
[Episode 31350] reward=-120457161.3 actor_loss=0.2698 critic_loss=152683642880.0000 entropy=17.6504 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 31360] reward=-120951336.6 actor_loss=0.2770 critic_loss=156493333904.6956 entropy=17.6366 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 31360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-506607.9 mean_steps=15.2
|
|
[Episode 31370] reward=-125955555.6 actor_loss=0.2119 critic_loss=162019226965.3333 entropy=17.6261 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 31380] reward=-114011196.9 actor_loss=0.2982 critic_loss=143650555611.4286 entropy=17.6251 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 31380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-372120.8 mean_steps=14.3
|
|
[Episode 31390] reward=-116844914.1 actor_loss=0.3269 critic_loss=149133585354.1053 entropy=17.6233 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 31400] reward=-117741213.6 actor_loss=0.2343 critic_loss=153178287445.3333 entropy=17.6128 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 31400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541850.7 mean_steps=14.4
|
|
[Episode 31410] reward=-122316751.3 actor_loss=0.2734 critic_loss=152851255296.0000 entropy=17.6025 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 31420] reward=-115650372.2 actor_loss=0.3677 critic_loss=145962952583.5294 entropy=17.6082 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 31420] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-330678.9 mean_steps=18.2
|
|
[Episode 31430] reward=-112139246.1 actor_loss=0.3154 critic_loss=145464774314.6667 entropy=17.6098 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 31440] reward=-124904317.7 actor_loss=0.2323 critic_loss=163002078208.0000 entropy=17.6167 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 31440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443612.8 mean_steps=15.3
|
|
[Episode 31450] reward=-124460803.4 actor_loss=0.3133 critic_loss=159735632817.2308 entropy=17.6146 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 31460] reward=-118438072.9 actor_loss=0.2684 critic_loss=148935565047.7419 entropy=17.6031 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 31460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534994.7 mean_steps=13.3
|
|
[Episode 31470] reward=-119439318.2 actor_loss=0.3456 critic_loss=150316818863.1579 entropy=17.6000 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 31480] reward=-120977798.0 actor_loss=0.2577 critic_loss=153533688490.6667 entropy=17.6004 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419104.1 mean_steps=15.6
|
|
[Episode 31490] reward=-122661387.4 actor_loss=0.2161 critic_loss=157194242048.0000 entropy=17.6012 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 31500] reward=-117259351.1 actor_loss=0.2801 critic_loss=150536722659.5555 entropy=17.6158 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498928.0 mean_steps=14.0
|
|
[Episode 31510] reward=-119958629.8 actor_loss=0.2936 critic_loss=151564944998.4000 entropy=17.6019 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 31520] reward=-120751726.7 actor_loss=0.3023 critic_loss=154049873920.0000 entropy=17.6072 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 31520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398197.2 mean_steps=15.6
|
|
[Episode 31530] reward=-109834489.4 actor_loss=0.3047 critic_loss=141259508184.6154 entropy=17.5867 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 31540] reward=-120000042.6 actor_loss=0.3678 critic_loss=193108735707.4286 entropy=17.5920 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 31540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566668.3 mean_steps=12.4
|
|
[Episode 31550] reward=-123067001.4 actor_loss=0.2609 critic_loss=154084636847.5428 entropy=17.5963 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 31560] reward=-119143150.6 actor_loss=0.2658 critic_loss=155970763629.7143 entropy=17.5967 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 31560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492718.9 mean_steps=15.1
|
|
[Episode 31570] reward=-119789975.4 actor_loss=0.2684 critic_loss=150857870774.8571 entropy=17.5981 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 31580] reward=-120967527.9 actor_loss=0.3972 critic_loss=156406143730.5263 entropy=17.5999 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 31580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511602.4 mean_steps=14.8
|
|
[Episode 31590] reward=-120604817.4 actor_loss=0.3209 critic_loss=158346716119.0400 entropy=17.5946 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 31600] reward=-126435888.7 actor_loss=0.1582 critic_loss=160956290389.3333 entropy=17.5966 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 31600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490350.8 mean_steps=14.8
|
|
[Episode 31610] reward=-118913557.9 actor_loss=0.2620 critic_loss=148634315161.6000 entropy=17.5958 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 31620] reward=-120399344.0 actor_loss=0.2791 critic_loss=151983585603.3684 entropy=17.5941 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492708.3 mean_steps=14.0
|
|
[Episode 31630] reward=-117423178.9 actor_loss=0.3378 critic_loss=148378183972.5714 entropy=17.5886 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 31640] reward=-119842585.0 actor_loss=0.3288 critic_loss=152644584913.4546 entropy=17.5833 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 31640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-455020.4 mean_steps=13.2
|
|
[Episode 31650] reward=-123581527.7 actor_loss=0.2065 critic_loss=164125655040.0000 entropy=17.5833 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 31660] reward=-120776476.9 actor_loss=0.2308 critic_loss=150122784085.3333 entropy=17.5891 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 31660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-466565.4 mean_steps=16.3
|
|
[Episode 31670] reward=-124662414.1 actor_loss=0.3069 critic_loss=161154155941.6471 entropy=17.5900 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 31680] reward=-123166152.5 actor_loss=0.2681 critic_loss=159221637120.0000 entropy=17.6127 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 31680] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-368440.6 mean_steps=17.9
|
|
[Episode 31690] reward=-121116459.5 actor_loss=0.3024 critic_loss=154531450060.8000 entropy=17.6219 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 31700] reward=-116828286.5 actor_loss=0.2981 critic_loss=150642957854.1176 entropy=17.6376 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-342320.1 mean_steps=18.1
|
|
[Episode 31710] reward=-124595231.4 actor_loss=0.2764 critic_loss=157822760029.0909 entropy=17.6429 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 31720] reward=-123260742.7 actor_loss=0.1899 critic_loss=159202003482.9474 entropy=17.6415 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 31720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472845.0 mean_steps=15.6
|
|
[Episode 31730] reward=-117219305.1 actor_loss=0.3472 critic_loss=149677060336.9412 entropy=17.6368 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 31740] reward=-118477292.4 actor_loss=0.3060 critic_loss=148009865431.5789 entropy=17.6303 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 31740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580673.7 mean_steps=13.9
|
|
[Episode 31750] reward=-120440132.9 actor_loss=0.2623 critic_loss=155164664438.1538 entropy=17.6321 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 31760] reward=-120610717.3 actor_loss=0.2658 critic_loss=163849147733.3333 entropy=17.6362 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 31760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489149.2 mean_steps=15.1
|
|
[Episode 31770] reward=-118648384.0 actor_loss=0.3487 critic_loss=152554398956.3077 entropy=17.6261 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 31780] reward=-115482198.1 actor_loss=0.2517 critic_loss=145646682824.3478 entropy=17.6217 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 31780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570884.6 mean_steps=13.8
|
|
[Episode 31790] reward=-116202687.6 actor_loss=0.4025 critic_loss=151435167061.3333 entropy=17.6211 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 31800] reward=-121398749.8 actor_loss=0.3133 critic_loss=154694032497.7778 entropy=17.6184 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 31800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577561.8 mean_steps=13.7
|
|
[Episode 31810] reward=-121872331.9 actor_loss=0.3481 critic_loss=157829332445.8667 entropy=17.6358 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 31820] reward=-116854602.0 actor_loss=0.2768 critic_loss=147648062681.2121 entropy=17.6346 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 31820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-422687.2 mean_steps=17.6
|
|
[Episode 31830] reward=-120631039.8 actor_loss=0.2553 critic_loss=157646017877.3333 entropy=17.6433 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 31840] reward=-120057268.0 actor_loss=0.2549 critic_loss=154484837229.7143 entropy=17.6390 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497506.3 mean_steps=14.9
|
|
[Episode 31850] reward=-120417346.5 actor_loss=0.3473 critic_loss=156338902357.3333 entropy=17.6298 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 31860] reward=-119186812.8 actor_loss=0.2983 critic_loss=152693711238.0952 entropy=17.6207 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 31860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452786.6 mean_steps=15.7
|
|
[Episode 31870] reward=-123161350.9 actor_loss=0.2571 critic_loss=159398151782.4000 entropy=17.6271 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 31880] reward=-114985116.7 actor_loss=0.2503 critic_loss=146992054800.5161 entropy=17.6380 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 31880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457720.2 mean_steps=14.6
|
|
[Episode 31890] reward=-118152068.2 actor_loss=0.4048 critic_loss=150842140964.5714 entropy=17.6449 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 31900] reward=-126584788.8 actor_loss=0.2005 critic_loss=159441819461.8182 entropy=17.6487 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 31900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581346.8 mean_steps=13.7
|
|
[Episode 31910] reward=-120298900.7 actor_loss=0.3543 critic_loss=154065295769.6000 entropy=17.6500 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 31920] reward=-120326599.0 actor_loss=0.3020 critic_loss=155510727566.2222 entropy=17.6543 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 31920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480790.4 mean_steps=14.6
|
|
[Episode 31930] reward=-124125647.1 actor_loss=0.2639 critic_loss=170970677729.8824 entropy=17.6444 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 31940] reward=-122693314.0 actor_loss=0.2846 critic_loss=159850823680.0000 entropy=17.6517 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 31940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532346.3 mean_steps=13.9
|
|
[Episode 31950] reward=-116769788.0 actor_loss=0.2873 critic_loss=151758355543.7714 entropy=17.6599 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 31960] reward=-122640992.8 actor_loss=0.2412 critic_loss=204430403584.0000 entropy=17.6765 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 31960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478167.4 mean_steps=15.1
|
|
[Episode 31970] reward=-123709866.3 actor_loss=0.2260 critic_loss=162627957356.6060 entropy=17.6841 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 31980] reward=-116295534.6 actor_loss=0.4520 critic_loss=147015322880.0000 entropy=17.6791 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 31980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527751.1 mean_steps=14.6
|
|
[Episode 31990] reward=-119764459.1 actor_loss=0.3067 critic_loss=157604963151.4483 entropy=17.6840 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 32000] reward=-119378862.2 actor_loss=0.2872 critic_loss=161827918002.0869 entropy=17.6953 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 32000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596866.5 mean_steps=14.0
|
|
[Episode 32010] reward=-123120171.0 actor_loss=0.2583 critic_loss=158243592794.3529 entropy=17.7005 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 32020] reward=-118198434.7 actor_loss=0.2648 critic_loss=148636703493.6889 entropy=17.6993 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 32020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436507.3 mean_steps=14.6
|
|
[Episode 32030] reward=-120379653.2 actor_loss=0.3115 critic_loss=157377142784.0000 entropy=17.6859 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 32040] reward=-124552220.9 actor_loss=0.2560 critic_loss=157251963866.0741 entropy=17.6932 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 32040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554068.7 mean_steps=13.3
|
|
[Episode 32050] reward=-119232685.4 actor_loss=0.3382 critic_loss=151940841472.0000 entropy=17.6802 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 32060] reward=-123364246.8 actor_loss=0.2233 critic_loss=157268313788.6316 entropy=17.6770 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 32060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-459772.0 mean_steps=13.8
|
|
[Episode 32070] reward=-121404879.8 actor_loss=0.2893 critic_loss=159656176298.6667 entropy=17.6804 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 32080] reward=-122255550.8 actor_loss=0.2032 critic_loss=155878627009.4222 entropy=17.6988 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 32080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548524.2 mean_steps=13.8
|
|
[Episode 32090] reward=-119830626.9 actor_loss=0.4184 critic_loss=161593551257.6000 entropy=17.6987 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 32100] reward=-122831790.7 actor_loss=0.2225 critic_loss=156894591096.4706 entropy=17.6870 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 32100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460459.8 mean_steps=14.0
|
|
[Episode 32110] reward=-118022728.6 actor_loss=0.3007 critic_loss=151679598774.0444 entropy=17.6933 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 32120] reward=-120187395.6 actor_loss=0.2564 critic_loss=150761000406.4865 entropy=17.6781 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 32120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477619.2 mean_steps=14.9
|
|
[Episode 32130] reward=-119427167.4 actor_loss=0.2251 critic_loss=151366025580.0889 entropy=17.6803 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 32140] reward=-117751056.4 actor_loss=0.2525 critic_loss=150020056485.6471 entropy=17.6853 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-652967.0 mean_steps=13.6
|
|
[Episode 32150] reward=-119054329.8 actor_loss=0.2668 critic_loss=151514758204.2353 entropy=17.6805 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 32160] reward=-121600540.0 actor_loss=0.2648 critic_loss=165532826062.4516 entropy=17.6917 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 32160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-568763.6 mean_steps=12.8
|
|
[Episode 32170] reward=-120991278.5 actor_loss=0.2713 critic_loss=157708900165.8182 entropy=17.7030 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 32180] reward=-122739128.7 actor_loss=0.2924 critic_loss=184478154752.0000 entropy=17.6849 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 32180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472270.1 mean_steps=15.2
|
|
[Episode 32190] reward=-123212606.9 actor_loss=0.3603 critic_loss=159920213924.9778 entropy=17.6784 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 32200] reward=-120712877.5 actor_loss=0.3262 critic_loss=155234108254.3158 entropy=17.6940 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 32200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437767.5 mean_steps=15.9
|
|
[Episode 32210] reward=-119484536.5 actor_loss=0.2776 critic_loss=155216604066.9091 entropy=17.6900 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 32220] reward=-120639297.2 actor_loss=0.3870 critic_loss=190252322652.1600 entropy=17.6899 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 32220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402741.5 mean_steps=16.9
|
|
[Episode 32230] reward=-117355923.3 actor_loss=0.3258 critic_loss=150835647186.8235 entropy=17.6893 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 32240] reward=-115846493.0 actor_loss=0.2917 critic_loss=147670901217.8824 entropy=17.6916 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 32240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458049.5 mean_steps=16.3
|
|
[Episode 32250] reward=-117157780.6 actor_loss=0.4178 critic_loss=156780112802.9091 entropy=17.6989 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 32260] reward=-115305148.1 actor_loss=0.3233 critic_loss=143721186878.4390 entropy=17.6981 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 32260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477482.2 mean_steps=14.8
|
|
[Episode 32270] reward=-117723768.7 actor_loss=0.3127 critic_loss=153697186909.0909 entropy=17.6793 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 32280] reward=-115062797.4 actor_loss=0.2776 critic_loss=151534177484.8000 entropy=17.6791 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 32280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-430518.5 mean_steps=16.7
|
|
[Episode 32290] reward=-115853019.5 actor_loss=0.3620 critic_loss=150075943384.6154 entropy=17.6726 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 32300] reward=-116767118.9 actor_loss=0.3119 critic_loss=147425974317.5111 entropy=17.6667 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 32300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490376.7 mean_steps=14.2
|
|
[Episode 32310] reward=-116375951.6 actor_loss=0.4125 critic_loss=150127008699.7333 entropy=17.6571 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 32320] reward=-120745473.5 actor_loss=0.2158 critic_loss=155003474678.5185 entropy=17.6620 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 32320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459384.9 mean_steps=15.3
|
|
[Episode 32330] reward=-120518800.6 actor_loss=0.2469 critic_loss=155906643285.3333 entropy=17.6574 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 32340] reward=-117771366.5 actor_loss=0.3217 critic_loss=147566176135.5294 entropy=17.6620 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 32340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351368.3 mean_steps=16.1
|
|
[Episode 32350] reward=-111681509.4 actor_loss=0.2072 critic_loss=138243031222.0444 entropy=17.6690 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 32360] reward=-118307392.8 actor_loss=0.3679 critic_loss=151278301804.6060 entropy=17.6572 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 32360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523876.5 mean_steps=13.3
|
|
[Episode 32370] reward=-115443082.5 actor_loss=0.2611 critic_loss=150268335809.4222 entropy=17.6680 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 32380] reward=-110776049.6 actor_loss=0.4868 critic_loss=143221566610.2857 entropy=17.6744 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 32380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569505.6 mean_steps=12.6
|
|
[Episode 32390] reward=-121720131.7 actor_loss=0.1793 critic_loss=151717207582.1176 entropy=17.6703 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 32400] reward=-118210389.7 actor_loss=0.2596 critic_loss=152527521978.1818 entropy=17.6634 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574644.5 mean_steps=12.9
|
|
[Episode 32410] reward=-123962697.9 actor_loss=0.2213 critic_loss=161603659776.0000 entropy=17.6593 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 32420] reward=-122084402.2 actor_loss=0.2866 critic_loss=154961653304.8889 entropy=17.6592 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 32420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396337.1 mean_steps=15.8
|
|
[Episode 32430] reward=-117511329.9 actor_loss=0.3436 critic_loss=149702334951.6190 entropy=17.6643 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 32440] reward=-116829656.3 actor_loss=0.3263 critic_loss=149129956556.8000 entropy=17.6550 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 32440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399351.1 mean_steps=16.1
|
|
[Episode 32450] reward=-117799975.9 actor_loss=0.2176 critic_loss=144203369472.0000 entropy=17.6549 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 32460] reward=-122102448.8 actor_loss=0.2689 critic_loss=156813217336.8889 entropy=17.6590 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 32460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423380.0 mean_steps=14.6
|
|
[Episode 32470] reward=-121883889.4 actor_loss=0.1728 critic_loss=153497164185.6000 entropy=17.6568 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 32480] reward=-120780462.7 actor_loss=0.3156 critic_loss=157677607321.6000 entropy=17.6678 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 32480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-396472.6 mean_steps=17.4
|
|
[Episode 32490] reward=-117396489.8 actor_loss=0.3683 critic_loss=162165006767.1579 entropy=17.6595 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 32500] reward=-120401707.5 actor_loss=0.2279 critic_loss=156158485504.0000 entropy=17.6657 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457505.9 mean_steps=16.1
|
|
[Episode 32510] reward=-119970509.0 actor_loss=0.2963 critic_loss=164277479014.4000 entropy=17.6696 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 32520] reward=-121201787.4 actor_loss=0.2827 critic_loss=162093648802.9091 entropy=17.6629 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 32520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394642.8 mean_steps=15.4
|
|
[Episode 32530] reward=-116097617.8 actor_loss=0.2729 critic_loss=149340696120.8889 entropy=17.6687 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 32540] reward=-121744266.4 actor_loss=0.3133 critic_loss=154814111262.1176 entropy=17.6688 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 32540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469331.5 mean_steps=14.4
|
|
[Episode 32550] reward=-120206259.8 actor_loss=0.3009 critic_loss=160829108758.2609 entropy=17.6578 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 32560] reward=-117341195.3 actor_loss=0.3236 critic_loss=145824696222.4762 entropy=17.6640 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 32560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483670.2 mean_steps=15.1
|
|
[Episode 32570] reward=-119952291.3 actor_loss=0.3385 critic_loss=153530322488.8889 entropy=17.6813 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 32580] reward=-121261527.6 actor_loss=0.3190 critic_loss=151425516339.2000 entropy=17.6739 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 32580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576502.9 mean_steps=14.2
|
|
[Episode 32590] reward=-117944690.8 actor_loss=0.3853 critic_loss=155018304065.6410 entropy=17.6714 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 32600] reward=-117157627.6 actor_loss=0.3009 critic_loss=154165941589.3333 entropy=17.6752 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 32600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574940.2 mean_steps=14.1
|
|
[Episode 32610] reward=-117762624.6 actor_loss=0.2934 critic_loss=148463506525.0909 entropy=17.6813 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 32620] reward=-112836990.5 actor_loss=0.3563 critic_loss=144929863530.1463 entropy=17.6772 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 32620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459986.5 mean_steps=15.5
|
|
[Episode 32630] reward=-121129180.7 actor_loss=0.2734 critic_loss=152863205218.4615 entropy=17.6663 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 32640] reward=-118773731.0 actor_loss=0.2492 critic_loss=152105802865.7778 entropy=17.6814 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506782.3 mean_steps=14.3
|
|
[Episode 32650] reward=-111220325.7 actor_loss=0.3649 critic_loss=141608436986.3111 entropy=17.6873 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 32660] reward=-120421032.3 actor_loss=0.2593 critic_loss=161079413760.0000 entropy=17.6816 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378116.8 mean_steps=16.3
|
|
[Episode 32670] reward=-120922667.0 actor_loss=0.3105 critic_loss=153304212366.2222 entropy=17.6743 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 32680] reward=-117173825.0 actor_loss=0.2313 critic_loss=154736548717.7143 entropy=17.6706 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 32680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589717.5 mean_steps=13.6
|
|
[Episode 32690] reward=-120696408.2 actor_loss=0.2222 critic_loss=154639335424.0000 entropy=17.6782 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 32700] reward=-113274890.4 actor_loss=0.2924 critic_loss=146145157120.0000 entropy=17.6869 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 32700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429063.8 mean_steps=15.8
|
|
[Episode 32710] reward=-121129945.8 actor_loss=0.1719 critic_loss=153108894913.7297 entropy=17.6751 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 32720] reward=-113207041.0 actor_loss=0.3134 critic_loss=142440446464.0000 entropy=17.6781 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 32720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545370.6 mean_steps=13.9
|
|
[Episode 32730] reward=-124712525.3 actor_loss=0.2816 critic_loss=164657197511.1111 entropy=17.6722 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 32740] reward=-118037822.2 actor_loss=0.3173 critic_loss=146917420236.8000 entropy=17.6728 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 32740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465017.9 mean_steps=14.8
|
|
[Episode 32750] reward=-119306453.6 actor_loss=0.3217 critic_loss=153122969413.8182 entropy=17.6666 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 32760] reward=-113310899.5 actor_loss=0.2431 critic_loss=149088701293.7143 entropy=17.6707 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 32760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349535.5 mean_steps=17.1
|
|
[Episode 32770] reward=-116497006.0 actor_loss=0.2271 critic_loss=146850790679.2727 entropy=17.6717 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 32780] reward=-110594738.5 actor_loss=0.3081 critic_loss=140567397351.0244 entropy=17.6826 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422634.9 mean_steps=16.9
|
|
[Episode 32790] reward=-117100508.9 actor_loss=0.3025 critic_loss=151748237721.6000 entropy=17.6759 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 32800] reward=-115470782.6 actor_loss=0.3027 critic_loss=154764923997.0909 entropy=17.6581 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413858.8 mean_steps=16.9
|
|
[Episode 32810] reward=-121062814.5 actor_loss=0.2556 critic_loss=158765487133.2571 entropy=17.6534 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 32820] reward=-122928195.9 actor_loss=0.1766 critic_loss=158451405619.2000 entropy=17.6571 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 32820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-506724.1 mean_steps=15.2
|
|
[Episode 32830] reward=-116755136.6 actor_loss=0.1859 critic_loss=151375388672.0000 entropy=17.6607 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 32840] reward=-117525507.0 actor_loss=0.2864 critic_loss=151932042899.9111 entropy=17.6609 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 32840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538947.8 mean_steps=14.3
|
|
[Episode 32850] reward=-123516917.8 actor_loss=0.3002 critic_loss=158250504192.0000 entropy=17.6588 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 32860] reward=-122015309.6 actor_loss=0.2767 critic_loss=160012021174.8571 entropy=17.6602 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 32860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395120.1 mean_steps=16.8
|
|
[Episode 32870] reward=-117160066.3 actor_loss=0.3587 critic_loss=151871650876.2353 entropy=17.6512 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 32880] reward=-119920331.6 actor_loss=0.2661 critic_loss=151944709120.0000 entropy=17.6558 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 32880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-402722.2 mean_steps=14.7
|
|
[Episode 32890] reward=-122161247.6 actor_loss=0.2441 critic_loss=157028191609.2632 entropy=17.6665 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 32900] reward=-122168495.7 actor_loss=0.3364 critic_loss=154479481978.8800 entropy=17.6614 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 32900] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-187662.7 mean_steps=18.2
|
|
[Episode 32910] reward=-121106195.6 actor_loss=0.2872 critic_loss=153281745169.0667 entropy=17.6477 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 32920] reward=-114408108.0 actor_loss=0.3738 critic_loss=141288568331.3778 entropy=17.6435 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 32920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637993.8 mean_steps=12.2
|
|
[Episode 32930] reward=-124069224.2 actor_loss=0.2790 critic_loss=157161942944.7442 entropy=17.6430 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 32940] reward=-120602158.3 actor_loss=0.2504 critic_loss=154834662951.3846 entropy=17.6440 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 32940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-676160.9 mean_steps=11.9
|
|
[Episode 32950] reward=-119544048.7 actor_loss=0.2610 critic_loss=149757520956.2353 entropy=17.6469 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 32960] reward=-122634393.8 actor_loss=0.3179 critic_loss=155263665438.7200 entropy=17.6461 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 32960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360091.4 mean_steps=16.1
|
|
[Episode 32970] reward=-123399441.6 actor_loss=0.3343 critic_loss=159395920164.5714 entropy=17.6544 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 32980] reward=-115587184.6 actor_loss=0.2813 critic_loss=149254021551.1579 entropy=17.6187 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 32980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470143.3 mean_steps=15.2
|
|
[Episode 32990] reward=-119598001.2 actor_loss=0.2955 critic_loss=154354808452.7408 entropy=17.6107 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33000] reward=-118183415.4 actor_loss=0.3731 critic_loss=154091260705.3913 entropy=17.6162 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 33000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407948.8 mean_steps=16.6
|
|
[Episode 33010] reward=-115538199.0 actor_loss=0.3119 critic_loss=142094974976.0000 entropy=17.6182 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33020] reward=-121362259.4 actor_loss=0.3250 critic_loss=156229124096.0000 entropy=17.6127 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 33020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540419.9 mean_steps=13.5
|
|
[Episode 33030] reward=-122937434.5 actor_loss=0.1915 critic_loss=153638905173.3333 entropy=17.6060 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 33040] reward=-118782166.4 actor_loss=0.3423 critic_loss=159121628553.8462 entropy=17.6001 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 33040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469011.7 mean_steps=14.6
|
|
[Episode 33050] reward=-118968005.8 actor_loss=0.3362 critic_loss=152063649867.8518 entropy=17.6043 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 33060] reward=-119779173.0 actor_loss=0.1782 critic_loss=152495090723.3103 entropy=17.6045 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 33060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392637.9 mean_steps=16.3
|
|
[Episode 33070] reward=-122819818.7 actor_loss=0.2895 critic_loss=157596275143.1111 entropy=17.6075 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 33080] reward=-128253502.2 actor_loss=0.3218 critic_loss=170273502108.9032 entropy=17.6060 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 33080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-343720.8 mean_steps=15.8
|
|
[Episode 33090] reward=-122683917.6 actor_loss=0.2828 critic_loss=156166956646.4000 entropy=17.6033 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 33100] reward=-117974565.8 actor_loss=0.2461 critic_loss=149691837741.1765 entropy=17.6022 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 33100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537812.9 mean_steps=13.5
|
|
[Episode 33110] reward=-118834812.2 actor_loss=0.2432 critic_loss=146257290308.2667 entropy=17.6073 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 33120] reward=-122963709.5 actor_loss=0.2576 critic_loss=156984716194.9091 entropy=17.6147 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 33120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624891.9 mean_steps=13.1
|
|
[Episode 33130] reward=-117027969.7 actor_loss=0.4237 critic_loss=149479224964.7408 entropy=17.6218 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 33140] reward=-121344060.4 actor_loss=0.3637 critic_loss=156570573027.5555 entropy=17.6336 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 33140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-360100.8 mean_steps=17.4
|
|
[Episode 33150] reward=-118568939.9 actor_loss=0.2982 critic_loss=147697967104.0000 entropy=17.6403 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33160] reward=-118153848.7 actor_loss=0.2842 critic_loss=148661219523.0476 entropy=17.6416 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 33160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518070.4 mean_steps=15.6
|
|
[Episode 33170] reward=-120782912.3 actor_loss=0.2570 critic_loss=154481986937.2632 entropy=17.6485 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 33180] reward=-123840198.0 actor_loss=0.2354 critic_loss=156573249677.2414 entropy=17.6485 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 33180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-563442.3 mean_steps=14.9
|
|
[Episode 33190] reward=-124660023.1 actor_loss=0.2910 critic_loss=162219400192.0000 entropy=17.6526 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33200] reward=-122657847.0 actor_loss=0.2120 critic_loss=166827112510.0606 entropy=17.6490 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 33200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487437.8 mean_steps=15.3
|
|
[Episode 33210] reward=-116444214.9 actor_loss=0.2515 critic_loss=146989838336.0000 entropy=17.6560 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 33220] reward=-122522832.8 actor_loss=0.3232 critic_loss=158338917420.5217 entropy=17.6730 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 33220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537177.2 mean_steps=14.3
|
|
[Episode 33230] reward=-117816884.6 actor_loss=0.3364 critic_loss=155330578350.0800 entropy=17.6803 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 33240] reward=-129404344.8 actor_loss=0.2632 critic_loss=167861186104.8889 entropy=17.6731 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 33240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417814.9 mean_steps=15.8
|
|
[Episode 33250] reward=-118071718.3 actor_loss=0.3418 critic_loss=151185746250.3226 entropy=17.6594 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 33260] reward=-114657844.1 actor_loss=0.3275 critic_loss=147412865609.1429 entropy=17.6665 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 33260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-392278.9 mean_steps=17.2
|
|
[Episode 33270] reward=-117192008.7 actor_loss=0.2377 critic_loss=153937477914.4828 entropy=17.6698 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 33280] reward=-119965390.0 actor_loss=0.2694 critic_loss=154454811945.2903 entropy=17.6644 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 33280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-638390.8 mean_steps=12.1
|
|
[Episode 33290] reward=-124577540.4 actor_loss=0.1806 critic_loss=157216830727.3143 entropy=17.6631 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 33300] reward=-116108395.5 actor_loss=0.3262 critic_loss=151434216387.7647 entropy=17.6602 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 33300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549126.7 mean_steps=14.8
|
|
[Episode 33310] reward=-120186632.3 actor_loss=0.2165 critic_loss=150759115161.6000 entropy=17.6667 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 33320] reward=-119012761.7 actor_loss=0.3725 critic_loss=148910363209.1429 entropy=17.6680 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 33320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450564.7 mean_steps=15.8
|
|
[Episode 33330] reward=-118323333.1 actor_loss=0.2380 critic_loss=149340479223.7419 entropy=17.6708 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 33340] reward=-118655862.3 actor_loss=0.3095 critic_loss=157458873986.9767 entropy=17.6778 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 33340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349000.6 mean_steps=17.0
|
|
[Episode 33350] reward=-117580467.4 actor_loss=0.3233 critic_loss=148427532288.0000 entropy=17.6784 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 33360] reward=-118070253.8 actor_loss=0.3437 critic_loss=147037668588.3077 entropy=17.6752 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 33360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-543399.6 mean_steps=14.2
|
|
[Episode 33370] reward=-118464508.1 actor_loss=0.3806 critic_loss=149340275712.0000 entropy=17.6884 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 33380] reward=-124337407.7 actor_loss=0.3284 critic_loss=157270986536.4211 entropy=17.6815 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 33380] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-281710.6 mean_steps=17.4
|
|
[Episode 33390] reward=-122363650.9 actor_loss=0.3050 critic_loss=157526020004.9778 entropy=17.6841 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 33400] reward=-121449052.2 actor_loss=0.2341 critic_loss=157726446569.2444 entropy=17.6901 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 33400] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-318761.9 mean_steps=17.6
|
|
[Episode 33410] reward=-120404908.7 actor_loss=0.2486 critic_loss=151936553332.3636 entropy=17.6939 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 33420] reward=-121321562.1 actor_loss=0.2403 critic_loss=152802810105.7561 entropy=17.6843 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 33420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590493.4 mean_steps=13.7
|
|
[Episode 33430] reward=-118338666.9 actor_loss=0.2798 critic_loss=149516233386.6667 entropy=17.6998 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 33440] reward=-122311329.0 actor_loss=0.2581 critic_loss=156876505088.0000 entropy=17.6936 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 33440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504698.6 mean_steps=14.1
|
|
[Episode 33450] reward=-121490808.7 actor_loss=0.3912 critic_loss=155366907904.0000 entropy=17.7047 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 33460] reward=-121084133.8 actor_loss=0.3007 critic_loss=159646387541.3333 entropy=17.7078 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 33460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507827.2 mean_steps=13.2
|
|
[Episode 33470] reward=-114996974.0 actor_loss=0.3995 critic_loss=145902072445.1555 entropy=17.7139 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 33480] reward=-115199806.2 actor_loss=0.3494 critic_loss=147699527875.0476 entropy=17.7141 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 33480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458258.2 mean_steps=15.1
|
|
[Episode 33490] reward=-119585119.2 actor_loss=0.2812 critic_loss=152938864375.7419 entropy=17.7195 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 33500] reward=-120907326.9 actor_loss=0.3524 critic_loss=152973252289.4222 entropy=17.7100 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 33500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481202.9 mean_steps=14.1
|
|
[Episode 33510] reward=-117393718.2 actor_loss=0.2508 critic_loss=150091385405.4400 entropy=17.6995 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 33520] reward=-117035997.2 actor_loss=0.2918 critic_loss=154957455360.0000 entropy=17.7085 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 33520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451777.5 mean_steps=15.0
|
|
[Episode 33530] reward=-116508951.1 actor_loss=0.3362 critic_loss=147414476288.0000 entropy=17.7048 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 33540] reward=-121906563.9 actor_loss=0.3342 critic_loss=156047104773.6889 entropy=17.7073 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 33540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-343601.9 mean_steps=15.9
|
|
[Episode 33550] reward=-123407963.0 actor_loss=0.2454 critic_loss=155650700083.2000 entropy=17.7033 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 33560] reward=-117804048.6 actor_loss=0.2908 critic_loss=148371610737.7778 entropy=17.6858 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 33560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-364204.2 mean_steps=15.3
|
|
[Episode 33570] reward=-119611027.8 actor_loss=0.2602 critic_loss=154973234062.2222 entropy=17.6885 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 33580] reward=-119170835.5 actor_loss=0.3417 critic_loss=152991406762.6667 entropy=17.6959 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 33580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417082.4 mean_steps=16.7
|
|
[Episode 33590] reward=-111740220.2 actor_loss=0.3258 critic_loss=138397962240.0000 entropy=17.6900 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 33600] reward=-117946515.8 actor_loss=0.1740 critic_loss=151190729386.6667 entropy=17.6935 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 33600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521471.9 mean_steps=14.7
|
|
[Episode 33610] reward=-120750779.3 actor_loss=0.3477 critic_loss=160804805395.6923 entropy=17.6802 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 33620] reward=-119897298.3 actor_loss=0.2722 critic_loss=157328476296.5333 entropy=17.6945 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 33620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363168.8 mean_steps=16.3
|
|
[Episode 33630] reward=-115904222.1 actor_loss=0.3267 critic_loss=147196552442.3111 entropy=17.7000 approx_kl=0.0108 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 33640] reward=-117598727.3 actor_loss=0.2402 critic_loss=145897486090.2400 entropy=17.7166 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 33640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-609131.5 mean_steps=13.3
|
|
[Episode 33650] reward=-115554447.3 actor_loss=0.2805 critic_loss=146019538625.4222 entropy=17.6965 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 33660] reward=-115521577.1 actor_loss=0.3085 critic_loss=149242744964.1290 entropy=17.6881 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 33660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484424.8 mean_steps=15.5
|
|
[Episode 33670] reward=-119179888.9 actor_loss=0.2896 critic_loss=150147047033.9048 entropy=17.6826 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 33680] reward=-120560311.9 actor_loss=0.2704 critic_loss=153246317706.3784 entropy=17.7018 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 33680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-346192.0 mean_steps=17.6
|
|
[Episode 33690] reward=-118368955.3 actor_loss=0.3339 critic_loss=151098423919.3044 entropy=17.7050 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 33700] reward=-120066671.9 actor_loss=0.2884 critic_loss=150345880991.1351 entropy=17.6993 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 33700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543586.6 mean_steps=13.5
|
|
[Episode 33710] reward=-124556831.9 actor_loss=0.2846 critic_loss=168747017604.4138 entropy=17.7016 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33720] reward=-116442732.5 actor_loss=0.3241 critic_loss=149857373024.7111 entropy=17.7079 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 33720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529961.3 mean_steps=14.6
|
|
[Episode 33730] reward=-119978314.0 actor_loss=0.3077 critic_loss=152526277451.2941 entropy=17.7083 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 33740] reward=-120533561.2 actor_loss=0.2367 critic_loss=156503024360.7273 entropy=17.7088 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 33740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-653188.5 mean_steps=11.8
|
|
[Episode 33750] reward=-118137618.5 actor_loss=0.1967 critic_loss=145447072381.1555 entropy=17.7115 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 33760] reward=-114915423.3 actor_loss=0.2840 critic_loss=141868941552.9412 entropy=17.7215 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 33760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-406064.9 mean_steps=15.6
|
|
[Episode 33770] reward=-124267448.7 actor_loss=0.2206 critic_loss=162892286464.0000 entropy=17.7230 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 33780] reward=-121161768.1 actor_loss=0.2175 critic_loss=155075650082.1333 entropy=17.7335 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 33780] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-354166.3 mean_steps=17.4
|
|
[Episode 33790] reward=-118801770.2 actor_loss=0.2403 critic_loss=151883035158.2609 entropy=17.7222 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 33800] reward=-120785356.7 actor_loss=0.2252 critic_loss=153858316180.2105 entropy=17.7360 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 33800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626983.5 mean_steps=11.9
|
|
[Episode 33810] reward=-118215371.4 actor_loss=0.2974 critic_loss=146040622080.0000 entropy=17.7332 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33820] reward=-114629610.9 actor_loss=0.2508 critic_loss=149274819047.6190 entropy=17.7217 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 33820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-640999.8 mean_steps=13.5
|
|
[Episode 33830] reward=-120380899.7 actor_loss=0.2340 critic_loss=153214748113.4546 entropy=17.7142 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 33840] reward=-116959187.6 actor_loss=0.3083 critic_loss=149116672577.6410 entropy=17.7160 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 33840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407145.1 mean_steps=16.6
|
|
[Episode 33850] reward=-116882585.0 actor_loss=0.2865 critic_loss=148958610500.2667 entropy=17.7126 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33860] reward=-117944227.2 actor_loss=0.2993 critic_loss=150598426441.9556 entropy=17.6846 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 33860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623943.7 mean_steps=12.2
|
|
[Episode 33870] reward=-122179244.0 actor_loss=0.2829 critic_loss=160900263757.9131 entropy=17.6811 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 33880] reward=-122830419.8 actor_loss=0.2558 critic_loss=158565914669.5111 entropy=17.6724 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 33880] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-290117.4 mean_steps=17.9
|
|
[Episode 33890] reward=-120144870.7 actor_loss=0.3026 critic_loss=151269301288.9600 entropy=17.6708 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 33900] reward=-123567938.0 actor_loss=0.2921 critic_loss=154874343739.0769 entropy=17.6726 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 33900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501054.8 mean_steps=13.4
|
|
[Episode 33910] reward=-117215167.1 actor_loss=0.4129 critic_loss=150147161829.5172 entropy=17.6605 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 33920] reward=-117675985.6 actor_loss=0.2644 critic_loss=152668458866.7586 entropy=17.6628 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 33920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473356.8 mean_steps=14.2
|
|
[Episode 33930] reward=-116443428.7 actor_loss=0.2430 critic_loss=152932491745.8824 entropy=17.6618 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 33940] reward=-121015431.4 actor_loss=0.2379 critic_loss=154049942528.0000 entropy=17.6494 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 33940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450307.8 mean_steps=13.9
|
|
[Episode 33950] reward=-119291250.5 actor_loss=0.2473 critic_loss=151223789158.4000 entropy=17.6297 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 33960] reward=-114101065.3 actor_loss=0.3554 critic_loss=143350496987.4286 entropy=17.6215 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 33960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581360.2 mean_steps=13.8
|
|
[Episode 33970] reward=-116984922.8 actor_loss=0.3000 critic_loss=158496445644.8000 entropy=17.6207 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 33980] reward=-118020078.2 actor_loss=0.2018 critic_loss=154640299493.0526 entropy=17.6080 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 33980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-261928.5 mean_steps=16.6
|
|
[Episode 33990] reward=-122162607.4 actor_loss=0.2920 critic_loss=157560860672.0000 entropy=17.6073 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 34000] reward=-121654766.7 actor_loss=0.2383 critic_loss=154985883045.6471 entropy=17.6060 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 34000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510628.3 mean_steps=14.2
|
|
[Episode 34010] reward=-116912271.8 actor_loss=0.3204 critic_loss=144860454456.8889 entropy=17.5986 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 34020] reward=-120027095.0 actor_loss=0.1825 critic_loss=148586614889.0256 entropy=17.5936 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 34020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389709.5 mean_steps=16.2
|
|
[Episode 34030] reward=-116612950.8 actor_loss=0.2143 critic_loss=147341926400.0000 entropy=17.5951 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 34040] reward=-116982087.7 actor_loss=0.2220 critic_loss=148649757144.6154 entropy=17.5752 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 34040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444192.7 mean_steps=14.6
|
|
[Episode 34050] reward=-121881558.6 actor_loss=0.2229 critic_loss=161915863752.3478 entropy=17.5710 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 34060] reward=-118109717.5 actor_loss=0.2475 critic_loss=145780088597.9429 entropy=17.5716 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 34060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479421.2 mean_steps=15.7
|
|
[Episode 34070] reward=-118526375.2 actor_loss=0.2961 critic_loss=145322505947.4286 entropy=17.5805 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 34080] reward=-111456007.8 actor_loss=0.2975 critic_loss=136968358689.3913 entropy=17.5866 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 34080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430901.7 mean_steps=15.2
|
|
[Episode 34090] reward=-121293349.0 actor_loss=0.3053 critic_loss=157682171904.0000 entropy=17.5853 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 34100] reward=-118123458.6 actor_loss=0.2882 critic_loss=145100655360.0000 entropy=17.5783 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 34100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453842.5 mean_steps=15.6
|
|
[Episode 34110] reward=-114171729.5 actor_loss=0.3569 critic_loss=153082744246.8571 entropy=17.5745 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 34120] reward=-118364364.2 actor_loss=0.2368 critic_loss=153378359296.0000 entropy=17.5956 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 34120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423332.0 mean_steps=15.6
|
|
[Episode 34130] reward=-120386130.8 actor_loss=0.2706 critic_loss=150058489173.3333 entropy=17.5981 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 34140] reward=-117474407.2 actor_loss=0.3140 critic_loss=151230494247.3846 entropy=17.6088 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 34140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484828.1 mean_steps=15.3
|
|
[Episode 34150] reward=-119690261.1 actor_loss=0.3860 critic_loss=149925269655.7037 entropy=17.6157 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 34160] reward=-112313019.8 actor_loss=0.3865 critic_loss=145821799765.3333 entropy=17.6168 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 34160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494295.6 mean_steps=14.0
|
|
[Episode 34170] reward=-120915255.8 actor_loss=0.2308 critic_loss=156933950668.8000 entropy=17.6120 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 34180] reward=-117071865.1 actor_loss=0.3603 critic_loss=158141704601.6000 entropy=17.6199 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 34180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-492192.8 mean_steps=15.9
|
|
[Episode 34190] reward=-114906201.3 actor_loss=0.3678 critic_loss=153960910848.0000 entropy=17.6177 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 34200] reward=-121062374.7 actor_loss=0.2603 critic_loss=156036432964.2667 entropy=17.6166 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 34200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-471282.3 mean_steps=13.2
|
|
[Episode 34210] reward=-118126519.9 actor_loss=0.2767 critic_loss=147422922384.4102 entropy=17.6307 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 34220] reward=-119664555.2 actor_loss=0.3029 critic_loss=149030415661.1765 entropy=17.6328 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 34220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548414.7 mean_steps=14.4
|
|
[Episode 34230] reward=-121547577.7 actor_loss=0.3091 critic_loss=156505184148.2105 entropy=17.6266 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 34240] reward=-121442508.8 actor_loss=0.3263 critic_loss=155768982186.6667 entropy=17.6247 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 34240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510826.1 mean_steps=14.7
|
|
[Episode 34250] reward=-121155937.6 actor_loss=0.2775 critic_loss=149450225891.5555 entropy=17.6208 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 34260] reward=-117398029.6 actor_loss=0.3611 critic_loss=145123564657.7778 entropy=17.6070 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 34260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516410.4 mean_steps=15.1
|
|
[Episode 34270] reward=-113379217.3 actor_loss=0.3927 critic_loss=145961177460.3636 entropy=17.5955 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 34280] reward=-124112335.0 actor_loss=0.2943 critic_loss=151983597977.6000 entropy=17.6003 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 34280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399644.1 mean_steps=15.5
|
|
[Episode 34290] reward=-122018402.6 actor_loss=0.1880 critic_loss=151487496192.0000 entropy=17.6012 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 34300] reward=-121472393.5 actor_loss=0.2877 critic_loss=149750396084.7059 entropy=17.5938 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 34300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563601.3 mean_steps=12.6
|
|
[Episode 34310] reward=-118232711.4 actor_loss=0.2713 critic_loss=148574145194.6667 entropy=17.5892 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 34320] reward=-120861388.8 actor_loss=0.2809 critic_loss=152095276032.0000 entropy=17.5934 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 34320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441331.4 mean_steps=15.8
|
|
[Episode 34330] reward=-121751852.6 actor_loss=0.2828 critic_loss=155686420480.0000 entropy=17.6031 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 34340] reward=-126560824.1 actor_loss=0.1751 critic_loss=160275684894.1176 entropy=17.5946 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 34340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420060.8 mean_steps=15.7
|
|
[Episode 34350] reward=-120307409.0 actor_loss=0.3331 critic_loss=151755999524.5714 entropy=17.5986 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 34360] reward=-120385429.2 actor_loss=0.3449 critic_loss=155175655833.6000 entropy=17.6085 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 34360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541120.5 mean_steps=14.4
|
|
[Episode 34370] reward=-119631750.1 actor_loss=0.2472 critic_loss=151104428786.5263 entropy=17.6033 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34380] reward=-110772663.6 actor_loss=0.3423 critic_loss=143341561232.6956 entropy=17.6041 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 34380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450933.5 mean_steps=14.7
|
|
[Episode 34390] reward=-122968219.1 actor_loss=0.2696 critic_loss=161201455668.9655 entropy=17.6135 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 34400] reward=-118685677.9 actor_loss=0.3137 critic_loss=152559061284.5714 entropy=17.6121 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 34400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535794.0 mean_steps=13.5
|
|
[Episode 34410] reward=-119788174.4 actor_loss=0.1693 critic_loss=147475702559.2195 entropy=17.6262 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 34420] reward=-116503959.7 actor_loss=0.4143 critic_loss=143137901681.7778 entropy=17.6339 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 34420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-456803.7 mean_steps=15.8
|
|
[Episode 34430] reward=-118484781.6 actor_loss=0.2209 critic_loss=144999558277.5652 entropy=17.6201 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 34440] reward=-122336784.9 actor_loss=0.3024 critic_loss=159391390999.2727 entropy=17.6250 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 34440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454261.0 mean_steps=14.8
|
|
[Episode 34450] reward=-123321062.3 actor_loss=0.3272 critic_loss=163230103552.0000 entropy=17.6174 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 34460] reward=-119632664.0 actor_loss=0.2870 critic_loss=149165015040.0000 entropy=17.6069 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 34460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572336.3 mean_steps=12.4
|
|
[Episode 34470] reward=-117826131.5 actor_loss=0.3504 critic_loss=146992958781.7931 entropy=17.6084 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 34480] reward=-118319825.9 actor_loss=0.3325 critic_loss=147710496475.4286 entropy=17.6081 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 34480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-489450.7 mean_steps=16.1
|
|
[Episode 34490] reward=-124243599.4 actor_loss=0.2724 critic_loss=157042686634.6667 entropy=17.6040 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 34500] reward=-113112105.3 actor_loss=0.3353 critic_loss=149675239833.6000 entropy=17.6072 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 34500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562331.2 mean_steps=13.3
|
|
[Episode 34510] reward=-117417316.1 actor_loss=0.3703 critic_loss=159525433344.0000 entropy=17.6030 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 34520] reward=-114592315.3 actor_loss=0.2647 critic_loss=147199040365.7143 entropy=17.5998 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 34520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532462.5 mean_steps=14.2
|
|
[Episode 34530] reward=-122254091.3 actor_loss=0.3288 critic_loss=154056296537.0435 entropy=17.6025 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 34540] reward=-116292982.5 actor_loss=0.3370 critic_loss=142715199698.0513 entropy=17.5814 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 34540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490105.7 mean_steps=14.1
|
|
[Episode 34550] reward=-119290550.2 actor_loss=0.2538 critic_loss=150079195570.4243 entropy=17.5765 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 34560] reward=-117957759.9 actor_loss=0.2881 critic_loss=150621307699.2000 entropy=17.5757 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 34560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462740.7 mean_steps=14.8
|
|
[Episode 34570] reward=-118143971.2 actor_loss=0.3127 critic_loss=149302280192.0000 entropy=17.5734 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 34580] reward=-122609232.1 actor_loss=0.2627 critic_loss=156306629238.1538 entropy=17.5806 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 34580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-405839.4 mean_steps=14.8
|
|
[Episode 34590] reward=-114765840.0 actor_loss=0.2914 critic_loss=145396139559.3846 entropy=17.5724 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 34600] reward=-126303209.5 actor_loss=0.2953 critic_loss=265477685854.8148 entropy=17.5824 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 34600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412426.8 mean_steps=16.4
|
|
[Episode 34610] reward=-113900364.5 actor_loss=0.2272 critic_loss=140998694229.3333 entropy=17.5892 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 34620] reward=-122563448.2 actor_loss=0.2198 critic_loss=154492230509.7143 entropy=17.5864 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 34620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519622.8 mean_steps=14.4
|
|
[Episode 34630] reward=-119771601.5 actor_loss=0.1751 critic_loss=150660629342.3158 entropy=17.5882 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 34640] reward=-117729281.5 actor_loss=0.2690 critic_loss=151601101677.7143 entropy=17.5736 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 34640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397288.5 mean_steps=15.4
|
|
[Episode 34650] reward=-119153464.7 actor_loss=0.2759 critic_loss=152178839155.6129 entropy=17.5684 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 34660] reward=-116542252.9 actor_loss=0.3014 critic_loss=144220119586.1333 entropy=17.5594 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 34660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488013.5 mean_steps=14.3
|
|
[Episode 34670] reward=-119012590.1 actor_loss=0.2943 critic_loss=152091478488.6154 entropy=17.5757 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 34680] reward=-114708606.1 actor_loss=0.3656 critic_loss=144562354930.5263 entropy=17.5832 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 34680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-278057.7 mean_steps=16.9
|
|
[Episode 34690] reward=-114873526.7 actor_loss=0.2891 critic_loss=143917817173.3333 entropy=17.5695 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34700] reward=-121445383.6 actor_loss=0.3294 critic_loss=149124686506.6667 entropy=17.5714 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 34700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510405.0 mean_steps=13.3
|
|
[Episode 34710] reward=-117048475.2 actor_loss=0.3660 critic_loss=144887831015.6190 entropy=17.5739 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 34720] reward=-117831661.3 actor_loss=0.2383 critic_loss=146034766643.2000 entropy=17.5761 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 34720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-613169.3 mean_steps=12.1
|
|
[Episode 34730] reward=-116688238.5 actor_loss=0.3751 critic_loss=144075250688.0000 entropy=17.5778 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 34740] reward=-115587967.3 actor_loss=0.3039 critic_loss=145078906060.8000 entropy=17.5696 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 34740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359265.8 mean_steps=16.9
|
|
[Episode 34750] reward=-115673548.6 actor_loss=0.2715 critic_loss=141492697586.1622 entropy=17.5697 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34760] reward=-123753752.1 actor_loss=0.2529 critic_loss=154922418176.0000 entropy=17.5755 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 34760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-410854.4 mean_steps=17.4
|
|
[Episode 34770] reward=-121064485.5 actor_loss=0.3017 critic_loss=148517123959.4667 entropy=17.5759 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 34780] reward=-120735476.1 actor_loss=0.2798 critic_loss=148591964475.0769 entropy=17.5759 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 34780] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-232048.1 mean_steps=19.4
|
|
[Episode 34790] reward=-120771896.9 actor_loss=0.3915 critic_loss=158937080135.6800 entropy=17.5930 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 34800] reward=-118340016.5 actor_loss=0.2748 critic_loss=149525097858.8445 entropy=17.5844 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 34800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446822.4 mean_steps=15.7
|
|
[Episode 34810] reward=-116181161.4 actor_loss=0.3338 critic_loss=145816196189.0909 entropy=17.5782 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 34820] reward=-120790954.8 actor_loss=0.2436 critic_loss=149822402651.0222 entropy=17.5811 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 34820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547861.8 mean_steps=13.4
|
|
[Episode 34830] reward=-123234647.4 actor_loss=0.2775 critic_loss=157694160440.8889 entropy=17.5830 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 34840] reward=-118821597.6 actor_loss=0.3290 critic_loss=153713642458.0741 entropy=17.5777 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 34840] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-306813.0 mean_steps=16.8
|
|
[Episode 34850] reward=-119763758.1 actor_loss=0.2451 critic_loss=147869334291.6923 entropy=17.5778 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34860] reward=-119746253.2 actor_loss=0.2579 critic_loss=150408412091.7333 entropy=17.5730 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 34860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465960.6 mean_steps=14.9
|
|
[Episode 34870] reward=-113087107.7 actor_loss=0.2977 critic_loss=138527632822.8571 entropy=17.5689 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 34880] reward=-119521142.2 actor_loss=0.1743 critic_loss=149831385088.0000 entropy=17.5653 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 34880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465272.8 mean_steps=16.0
|
|
[Episode 34890] reward=-119904785.5 actor_loss=0.2477 critic_loss=153012877019.4286 entropy=17.5603 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 34900] reward=-117097115.3 actor_loss=0.2773 critic_loss=143042275689.4118 entropy=17.5463 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 34900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-535567.0 mean_steps=12.6
|
|
[Episode 34910] reward=-112574617.5 actor_loss=0.3286 critic_loss=149590156902.4000 entropy=17.5442 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 34920] reward=-115681847.7 actor_loss=0.3587 critic_loss=140845922424.4706 entropy=17.5574 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 34920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-542250.3 mean_steps=12.7
|
|
[Episode 34930] reward=-115009730.4 actor_loss=0.2879 critic_loss=148538836204.3077 entropy=17.5533 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34940] reward=-121630797.3 actor_loss=0.2366 critic_loss=152028170406.0540 entropy=17.5591 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 34940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570793.8 mean_steps=13.4
|
|
[Episode 34950] reward=-123275132.9 actor_loss=0.2364 critic_loss=498733312361.4117 entropy=17.5491 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 34960] reward=-120348353.0 actor_loss=0.2891 critic_loss=152562883242.6667 entropy=17.5577 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 34960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553431.2 mean_steps=13.4
|
|
[Episode 34970] reward=-119686812.8 actor_loss=0.3004 critic_loss=152414700612.2667 entropy=17.5413 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34980] reward=-122315541.8 actor_loss=0.3195 critic_loss=158863410412.3077 entropy=17.5302 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 34980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-627572.5 mean_steps=13.3
|
|
[Episode 34990] reward=-118382646.8 actor_loss=0.2552 critic_loss=145379247718.4000 entropy=17.5369 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 35000] reward=-114166224.7 actor_loss=0.4499 critic_loss=149956256488.7273 entropy=17.5465 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 35000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511405.5 mean_steps=14.2
|
|
[Episode 35010] reward=-120125765.2 actor_loss=0.2752 critic_loss=151930738005.3333 entropy=17.5540 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 35020] reward=-117859354.6 actor_loss=0.3013 critic_loss=149760147968.0000 entropy=17.5681 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 35020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-611116.5 mean_steps=14.1
|
|
[Episode 35030] reward=-117531275.9 actor_loss=0.2737 critic_loss=148847414649.2632 entropy=17.5806 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 35040] reward=-115839550.8 actor_loss=0.1713 critic_loss=148905074980.5714 entropy=17.5713 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 35040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395861.8 mean_steps=16.6
|
|
[Episode 35050] reward=-115341526.5 actor_loss=0.2879 critic_loss=146013387807.0303 entropy=17.5779 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 35060] reward=-119115540.2 actor_loss=0.3010 critic_loss=148966360350.7200 entropy=17.5892 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 35060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588997.9 mean_steps=12.8
|
|
[Episode 35070] reward=-118244429.6 actor_loss=0.3018 critic_loss=149348126185.7391 entropy=17.5950 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 35080] reward=-117106815.5 actor_loss=0.2571 critic_loss=147833986161.7778 entropy=17.5960 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 35080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619824.8 mean_steps=13.2
|
|
[Episode 35090] reward=-122396811.4 actor_loss=0.3536 critic_loss=157397246935.0400 entropy=17.5909 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 35100] reward=-120157048.2 actor_loss=0.2691 critic_loss=151410301952.0000 entropy=17.6000 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 35100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-531298.4 mean_steps=15.1
|
|
[Episode 35110] reward=-123932247.5 actor_loss=0.2824 critic_loss=164365628967.3846 entropy=17.6139 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 35120] reward=-119387850.7 actor_loss=0.2313 critic_loss=153016469094.4000 entropy=17.6314 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 35120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-582998.2 mean_steps=12.0
|
|
[Episode 35130] reward=-117083201.3 actor_loss=0.3145 critic_loss=145989148407.7419 entropy=17.6331 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 35140] reward=-116994913.9 actor_loss=0.3376 critic_loss=146550486445.4193 entropy=17.6283 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 35140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-404119.7 mean_steps=17.1
|
|
[Episode 35150] reward=-121476584.8 actor_loss=0.2511 critic_loss=159365336441.2632 entropy=17.6165 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 35160] reward=-118433265.1 actor_loss=0.3311 critic_loss=151789741812.8696 entropy=17.6229 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 35160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504344.3 mean_steps=15.2
|
|
[Episode 35170] reward=-121885234.8 actor_loss=0.2346 critic_loss=158390273489.4546 entropy=17.6216 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 35180] reward=-117147894.7 actor_loss=0.2571 critic_loss=149171606869.3333 entropy=17.6281 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 35180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523201.5 mean_steps=13.5
|
|
[Episode 35190] reward=-116351969.7 actor_loss=0.3514 critic_loss=148885100357.8182 entropy=17.6269 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 35200] reward=-120533841.2 actor_loss=0.2590 critic_loss=157061643806.1176 entropy=17.6233 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 35200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361779.4 mean_steps=16.9
|
|
[Episode 35210] reward=-119945465.5 actor_loss=0.2251 critic_loss=150472844676.4138 entropy=17.6150 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 35220] reward=-119088589.6 actor_loss=0.3641 critic_loss=148227049244.4445 entropy=17.6068 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 35220] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-332401.4 mean_steps=17.6
|
|
[Episode 35230] reward=-117794012.3 actor_loss=0.2905 critic_loss=152955329299.6923 entropy=17.6097 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 35240] reward=-119471365.6 actor_loss=0.2278 critic_loss=152225138777.0435 entropy=17.5923 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 35240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452140.5 mean_steps=15.6
|
|
[Episode 35250] reward=-117982444.7 actor_loss=0.3207 critic_loss=142037816349.2571 entropy=17.5858 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 35260] reward=-114532217.4 actor_loss=0.4011 critic_loss=137326611611.1515 entropy=17.6045 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 35260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398250.2 mean_steps=16.4
|
|
[Episode 35270] reward=-119064049.4 actor_loss=0.3462 critic_loss=172585930379.6364 entropy=17.6060 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 35280] reward=-120014974.6 actor_loss=0.1983 critic_loss=146118157700.4138 entropy=17.6065 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 35280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438015.8 mean_steps=14.5
|
|
[Episode 35290] reward=-115751393.1 actor_loss=0.3032 critic_loss=144134148505.6000 entropy=17.6039 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 35300] reward=-119305556.1 actor_loss=0.1784 critic_loss=153767756458.6667 entropy=17.6123 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 35300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399805.9 mean_steps=15.2
|
|
[Episode 35310] reward=-121381348.1 actor_loss=0.2870 critic_loss=148509851283.9111 entropy=17.6268 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 35320] reward=-114202429.9 actor_loss=0.2957 critic_loss=140788788489.4815 entropy=17.6267 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 35320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457661.0 mean_steps=14.2
|
|
[Episode 35330] reward=-123118100.1 actor_loss=0.2636 critic_loss=154799820093.7931 entropy=17.6227 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 35340] reward=-118503904.6 actor_loss=0.2984 critic_loss=148425782110.3158 entropy=17.6218 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 35340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532672.3 mean_steps=13.2
|
|
[Episode 35350] reward=-122499533.6 actor_loss=0.2506 critic_loss=152360362356.3636 entropy=17.6139 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 35360] reward=-117997985.0 actor_loss=0.2733 critic_loss=147175822677.3333 entropy=17.6097 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 35360] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-685544.5 mean_steps=11.8
|
|
[Episode 35370] reward=-121503113.6 actor_loss=0.2154 critic_loss=151324560854.4865 entropy=17.6019 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 35380] reward=-112200080.7 actor_loss=0.3454 critic_loss=141810957425.7778 entropy=17.6169 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 35380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468711.6 mean_steps=13.9
|
|
[Episode 35390] reward=-117448263.5 actor_loss=0.3483 critic_loss=148829935479.4667 entropy=17.6244 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 35400] reward=-113965912.0 actor_loss=0.2223 critic_loss=148131365608.7273 entropy=17.6226 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 35400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594733.9 mean_steps=12.8
|
|
[Episode 35410] reward=-118523360.2 actor_loss=0.2433 critic_loss=149237683253.8947 entropy=17.6353 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 35420] reward=-122485757.0 actor_loss=0.2991 critic_loss=159349442560.0000 entropy=17.6333 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 35420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553626.7 mean_steps=13.7
|
|
[Episode 35430] reward=-118546746.5 actor_loss=0.2148 critic_loss=149510545152.0000 entropy=17.6443 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 35440] reward=-122560398.9 actor_loss=0.2239 critic_loss=151113486921.1429 entropy=17.6479 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 35440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385634.1 mean_steps=16.1
|
|
[Episode 35450] reward=-119278591.2 actor_loss=0.4584 critic_loss=155834544368.9412 entropy=17.6447 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 35460] reward=-126236144.5 actor_loss=0.2427 critic_loss=161054950088.3478 entropy=17.6569 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 35460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-691414.3 mean_steps=12.3
|
|
[Episode 35470] reward=-124857427.6 actor_loss=0.2438 critic_loss=157500014592.0000 entropy=17.6589 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 35480] reward=-116152570.2 actor_loss=0.4101 critic_loss=148865669643.9070 entropy=17.6600 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 35480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470610.8 mean_steps=15.1
|
|
[Episode 35490] reward=-118475742.2 actor_loss=0.3008 critic_loss=150598758400.0000 entropy=17.6488 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 35500] reward=-122490866.1 actor_loss=0.3032 critic_loss=156139944423.6190 entropy=17.6531 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 35500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393967.7 mean_steps=16.1
|
|
[Episode 35510] reward=-120031425.0 actor_loss=0.2567 critic_loss=153543712950.0444 entropy=17.6484 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 35520] reward=-119487190.3 actor_loss=0.3496 critic_loss=156394352054.8571 entropy=17.6522 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 35520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385160.0 mean_steps=16.6
|
|
[Episode 35530] reward=-117695475.1 actor_loss=0.2262 critic_loss=152670383854.9333 entropy=17.6320 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 35540] reward=-121098588.1 actor_loss=0.2777 critic_loss=149826812391.6190 entropy=17.6278 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 35540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540273.9 mean_steps=14.0
|
|
[Episode 35550] reward=-117619314.3 actor_loss=0.3752 critic_loss=165659185834.6667 entropy=17.6253 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 35560] reward=-116731455.5 actor_loss=0.3687 critic_loss=150155513036.8000 entropy=17.6285 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 35560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-538377.5 mean_steps=12.7
|
|
[Episode 35570] reward=-118858418.0 actor_loss=0.2453 critic_loss=150659741696.0000 entropy=17.6311 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 35580] reward=-115747029.1 actor_loss=0.3593 critic_loss=145335225314.7429 entropy=17.6314 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 35580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626071.7 mean_steps=12.4
|
|
[Episode 35590] reward=-115429736.5 actor_loss=0.3421 critic_loss=151376951854.5454 entropy=17.6317 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 35600] reward=-120961950.9 actor_loss=0.2838 critic_loss=155802580805.8182 entropy=17.6472 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 35600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367881.5 mean_steps=16.5
|
|
[Episode 35610] reward=-123942237.6 actor_loss=0.2877 critic_loss=161928088700.8781 entropy=17.6452 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 35620] reward=-118925944.6 actor_loss=0.3914 critic_loss=150519245902.7692 entropy=17.6357 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 35620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430989.1 mean_steps=14.8
|
|
[Episode 35630] reward=-116529096.0 actor_loss=0.2667 critic_loss=147598465181.5385 entropy=17.6456 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 35640] reward=-122392257.9 actor_loss=0.2698 critic_loss=151676062626.9091 entropy=17.6582 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 35640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455550.8 mean_steps=15.9
|
|
[Episode 35650] reward=-113262718.7 actor_loss=0.2645 critic_loss=134266785300.4800 entropy=17.6590 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 35660] reward=-118435993.4 actor_loss=0.2990 critic_loss=149167905555.6923 entropy=17.6543 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 35660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479227.6 mean_steps=15.4
|
|
[Episode 35670] reward=-120268939.8 actor_loss=0.2942 critic_loss=153205661461.9429 entropy=17.6527 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 35680] reward=-120272974.7 actor_loss=0.2209 critic_loss=154536531441.3714 entropy=17.6508 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 35680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544270.9 mean_steps=13.8
|
|
[Episode 35690] reward=-122234677.1 actor_loss=0.2557 critic_loss=155739603412.1143 entropy=17.6464 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 35700] reward=-118241246.1 actor_loss=0.2928 critic_loss=150085206016.0000 entropy=17.6421 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 35700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364968.9 mean_steps=16.4
|
|
[Episode 35710] reward=-119721324.7 actor_loss=0.2916 critic_loss=151425993634.9091 entropy=17.6267 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 35720] reward=-116812919.3 actor_loss=0.2441 critic_loss=154236536508.6316 entropy=17.6301 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 35720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432811.0 mean_steps=15.9
|
|
[Episode 35730] reward=-127666688.5 actor_loss=0.2547 critic_loss=158474028974.0800 entropy=17.6349 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 35740] reward=-123312779.8 actor_loss=0.1469 critic_loss=293763434632.5333 entropy=17.6378 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 35740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446861.6 mean_steps=15.8
|
|
[Episode 35750] reward=-117584568.5 actor_loss=0.3222 critic_loss=151646832286.8965 entropy=17.6404 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 35760] reward=-116658335.8 actor_loss=0.3050 critic_loss=142414232780.8000 entropy=17.6417 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 35760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417728.0 mean_steps=16.6
|
|
[Episode 35770] reward=-119652112.3 actor_loss=0.2917 critic_loss=152859076198.4000 entropy=17.6448 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 35780] reward=-118754785.4 actor_loss=0.3300 critic_loss=147976443221.3333 entropy=17.6158 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 35780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-458596.1 mean_steps=14.2
|
|
[Episode 35790] reward=-119887146.2 actor_loss=0.2492 critic_loss=151328575703.5789 entropy=17.6175 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 35800] reward=-120705899.6 actor_loss=0.3079 critic_loss=150716966034.2857 entropy=17.6061 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 35800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383444.1 mean_steps=16.5
|
|
[Episode 35810] reward=-122838039.8 actor_loss=0.2695 critic_loss=157847599591.6190 entropy=17.5896 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 35820] reward=-124515694.7 actor_loss=0.1803 critic_loss=159187239367.1111 entropy=17.5865 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 35820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566735.0 mean_steps=12.8
|
|
[Episode 35830] reward=-117217998.0 actor_loss=0.2696 critic_loss=157280091504.6400 entropy=17.5904 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 35840] reward=-119539162.5 actor_loss=0.3061 critic_loss=151483116677.5652 entropy=17.5947 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 35840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-432275.2 mean_steps=13.7
|
|
[Episode 35850] reward=-117952093.8 actor_loss=0.3211 critic_loss=142986881280.0000 entropy=17.5995 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 35860] reward=-119764540.6 actor_loss=0.3117 critic_loss=152854291692.3077 entropy=17.5907 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 35860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545355.3 mean_steps=13.8
|
|
[Episode 35870] reward=-120759239.5 actor_loss=0.2709 critic_loss=150386989810.5263 entropy=17.5893 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 35880] reward=-120316959.5 actor_loss=0.3632 critic_loss=149423194697.1429 entropy=17.5943 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 35880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599848.3 mean_steps=13.1
|
|
[Episode 35890] reward=-121423101.8 actor_loss=0.3196 critic_loss=169018493220.5714 entropy=17.5942 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 35900] reward=-114765493.4 actor_loss=0.2144 critic_loss=142455533568.0000 entropy=17.5995 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 35900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-511013.3 mean_steps=12.7
|
|
[Episode 35910] reward=-122481119.4 actor_loss=0.3017 critic_loss=173701679880.8276 entropy=17.5889 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 35920] reward=-117449436.7 actor_loss=0.2274 critic_loss=146534699380.3636 entropy=17.6030 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 35920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585711.3 mean_steps=12.8
|
|
[Episode 35930] reward=-125436775.0 actor_loss=0.1861 critic_loss=161531856542.8965 entropy=17.6036 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 35940] reward=-123849337.9 actor_loss=0.2675 critic_loss=161581059389.7931 entropy=17.6007 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 35940] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-687422.4 mean_steps=11.5
|
|
[Episode 35950] reward=-117841035.5 actor_loss=0.3517 critic_loss=149752771677.0909 entropy=17.6154 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 35960] reward=-117769399.3 actor_loss=0.4419 critic_loss=150357068458.6667 entropy=17.6127 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 35960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511489.4 mean_steps=13.4
|
|
[Episode 35970] reward=-121842819.2 actor_loss=0.2964 critic_loss=151796789069.9131 entropy=17.6201 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 35980] reward=-122152764.2 actor_loss=0.2993 critic_loss=157096356864.0000 entropy=17.6236 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 35980] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-306894.2 mean_steps=18.1
|
|
[Episode 35990] reward=-118291941.4 actor_loss=0.2763 critic_loss=161886537240.3810 entropy=17.6277 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 36000] reward=-120298718.0 actor_loss=0.2434 critic_loss=156505341152.7805 entropy=17.6388 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 36000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510611.1 mean_steps=14.6
|
|
[Episode 36010] reward=-120102013.4 actor_loss=0.3486 critic_loss=163961903149.5111 entropy=17.6378 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 36020] reward=-118764394.0 actor_loss=0.3469 critic_loss=146858825045.3333 entropy=17.6495 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 36020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517061.9 mean_steps=14.2
|
|
[Episode 36030] reward=-120830017.7 actor_loss=0.1869 critic_loss=153229310464.0000 entropy=17.6425 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 36040] reward=-119943432.7 actor_loss=0.3049 critic_loss=157723555653.8182 entropy=17.6367 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 36040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421970.2 mean_steps=16.5
|
|
[Episode 36050] reward=-118419069.9 actor_loss=0.4137 critic_loss=149236889127.3846 entropy=17.6362 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 36060] reward=-124218280.2 actor_loss=0.2268 critic_loss=159170002944.0000 entropy=17.6402 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 36060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-519354.3 mean_steps=16.4
|
|
[Episode 36070] reward=-119230471.3 actor_loss=0.3577 critic_loss=150846122302.5778 entropy=17.6450 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 36080] reward=-119978512.9 actor_loss=0.3883 critic_loss=151941210112.0000 entropy=17.6471 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 36080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513008.4 mean_steps=13.6
|
|
[Episode 36090] reward=-123898400.9 actor_loss=0.2529 critic_loss=151407966435.5555 entropy=17.6430 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 36100] reward=-120893200.7 actor_loss=0.2291 critic_loss=153226044666.3111 entropy=17.6628 approx_kl=0.0107 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 36100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528792.9 mean_steps=14.6
|
|
[Episode 36110] reward=-125728896.1 actor_loss=0.2906 critic_loss=160816779537.0667 entropy=17.6637 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36120] reward=-119541506.9 actor_loss=0.3021 critic_loss=154453376581.1892 entropy=17.6556 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 36120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556937.9 mean_steps=13.4
|
|
[Episode 36130] reward=-119380341.1 actor_loss=0.3652 critic_loss=146495237142.7556 entropy=17.6646 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 36140] reward=-116765389.7 actor_loss=0.3282 critic_loss=144630116352.0000 entropy=17.6662 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 36140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540181.1 mean_steps=13.8
|
|
[Episode 36150] reward=-128901020.5 actor_loss=0.2934 critic_loss=259773109589.3333 entropy=17.6612 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36160] reward=-118204586.2 actor_loss=0.3575 critic_loss=146064440631.6522 entropy=17.6796 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 36160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545328.7 mean_steps=14.8
|
|
[Episode 36170] reward=-119747919.6 actor_loss=0.3483 critic_loss=149423588752.6956 entropy=17.6835 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 36180] reward=-123682340.0 actor_loss=0.2801 critic_loss=156315381760.0000 entropy=17.6822 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 36180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392659.9 mean_steps=16.6
|
|
[Episode 36190] reward=-118162872.2 actor_loss=0.3610 critic_loss=150714105856.0000 entropy=17.6836 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 36200] reward=-121213491.0 actor_loss=0.2201 critic_loss=146274515968.0000 entropy=17.7004 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 36200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577203.9 mean_steps=12.8
|
|
[Episode 36210] reward=-123100690.9 actor_loss=0.3057 critic_loss=164786684586.6667 entropy=17.7035 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36220] reward=-118246673.3 actor_loss=0.2501 critic_loss=155115334602.1053 entropy=17.7142 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 36220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382388.8 mean_steps=16.4
|
|
[Episode 36230] reward=-125792233.7 actor_loss=0.2237 critic_loss=160783681182.8965 entropy=17.7079 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 36240] reward=-121091681.5 actor_loss=0.4092 critic_loss=167691529216.0000 entropy=17.7129 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 36240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535167.3 mean_steps=14.4
|
|
[Episode 36250] reward=-119253097.2 actor_loss=0.2917 critic_loss=143459625642.6667 entropy=17.7265 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 36260] reward=-112320439.5 actor_loss=0.2679 critic_loss=138972806582.8571 entropy=17.7304 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 36260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-605653.7 mean_steps=12.8
|
|
[Episode 36270] reward=-122312763.6 actor_loss=0.3282 critic_loss=160173725403.4286 entropy=17.7368 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 36280] reward=-119399103.7 actor_loss=0.2447 critic_loss=145366506074.3529 entropy=17.7432 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 36280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-610469.5 mean_steps=13.9
|
|
[Episode 36290] reward=-118781644.6 actor_loss=0.4063 critic_loss=151676772352.0000 entropy=17.7314 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 36300] reward=-123466987.9 actor_loss=0.2302 critic_loss=159783699160.1778 entropy=17.7254 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 36300] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-340672.0 mean_steps=17.0
|
|
[Episode 36310] reward=-120865575.2 actor_loss=0.2921 critic_loss=154232258078.1176 entropy=17.7369 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36320] reward=-121981731.8 actor_loss=0.3129 critic_loss=153144724992.0000 entropy=17.7348 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 36320] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-382695.6 mean_steps=17.5
|
|
[Episode 36330] reward=-123947696.7 actor_loss=0.2948 critic_loss=158041257797.8182 entropy=17.7423 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36340] reward=-118244413.1 actor_loss=0.1508 critic_loss=145844615031.4667 entropy=17.7477 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 36340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360940.6 mean_steps=16.2
|
|
[Episode 36350] reward=-125080682.5 actor_loss=0.3346 critic_loss=406267296481.2800 entropy=17.7380 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 36360] reward=-122281132.2 actor_loss=0.2386 critic_loss=150832336310.8571 entropy=17.7551 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 36360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578012.9 mean_steps=13.8
|
|
[Episode 36370] reward=-119518511.2 actor_loss=0.3249 critic_loss=147165875248.7619 entropy=17.7567 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36380] reward=-115444922.2 actor_loss=0.2863 critic_loss=147301114402.1333 entropy=17.7536 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 36380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508855.2 mean_steps=14.7
|
|
[Episode 36390] reward=-123519047.3 actor_loss=0.2373 critic_loss=155947816810.1463 entropy=17.7496 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 36400] reward=-121045082.3 actor_loss=0.2322 critic_loss=178757462698.6667 entropy=17.7395 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 36400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407783.1 mean_steps=16.6
|
|
[Episode 36410] reward=-119316343.1 actor_loss=0.3784 critic_loss=161307032289.2800 entropy=17.7344 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 36420] reward=-120909028.9 actor_loss=0.3427 critic_loss=156277102273.4222 entropy=17.7340 approx_kl=0.0108 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 36420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568130.4 mean_steps=13.4
|
|
[Episode 36430] reward=-115673059.9 actor_loss=0.3715 critic_loss=139882784954.1818 entropy=17.7307 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 36440] reward=-120025694.0 actor_loss=0.3656 critic_loss=154372407296.0000 entropy=17.7237 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 36440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-483689.6 mean_steps=12.9
|
|
[Episode 36450] reward=-118590280.3 actor_loss=0.1819 critic_loss=150270985648.3556 entropy=17.7159 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 36460] reward=-114798519.5 actor_loss=0.2603 critic_loss=155981769750.7556 entropy=17.7087 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 36460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433118.3 mean_steps=14.7
|
|
[Episode 36470] reward=-118649306.1 actor_loss=0.2144 critic_loss=155889048064.0000 entropy=17.6905 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 36480] reward=-116609813.5 actor_loss=0.3208 critic_loss=143054507520.0000 entropy=17.6838 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 36480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491220.4 mean_steps=15.0
|
|
[Episode 36490] reward=-123873862.6 actor_loss=0.2245 critic_loss=151401905265.7778 entropy=17.6943 approx_kl=0.0117 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 36500] reward=-118353597.2 actor_loss=0.3759 critic_loss=147817598976.0000 entropy=17.7012 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 36500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434689.6 mean_steps=15.8
|
|
[Episode 36510] reward=-114864788.4 actor_loss=0.3643 critic_loss=144110806285.4737 entropy=17.6934 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 36520] reward=-117250631.5 actor_loss=0.2250 critic_loss=138897947528.9302 entropy=17.6897 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 36520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547492.2 mean_steps=14.2
|
|
[Episode 36530] reward=-116862185.7 actor_loss=0.2310 critic_loss=144156688384.0000 entropy=17.6982 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 36540] reward=-117708152.1 actor_loss=0.3619 critic_loss=146092713707.2433 entropy=17.6871 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 36540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554661.1 mean_steps=13.6
|
|
[Episode 36550] reward=-121201368.6 actor_loss=0.2422 critic_loss=153229739349.3333 entropy=17.6947 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 36560] reward=-120061530.1 actor_loss=0.2891 critic_loss=149980978471.8222 entropy=17.7018 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 36560] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320989.0 mean_steps=17.1
|
|
[Episode 36570] reward=-119313440.1 actor_loss=0.3503 critic_loss=145130587386.3111 entropy=17.7043 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 36580] reward=-118890095.0 actor_loss=0.1731 critic_loss=142639238348.8000 entropy=17.7041 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 36580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429617.5 mean_steps=14.8
|
|
[Episode 36590] reward=-117862125.4 actor_loss=0.2735 critic_loss=148404099395.3684 entropy=17.6981 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 36600] reward=-117848502.7 actor_loss=0.2876 critic_loss=144568919381.3333 entropy=17.7096 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 36600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507949.9 mean_steps=14.6
|
|
[Episode 36610] reward=-121221828.5 actor_loss=0.1458 critic_loss=148137542144.0000 entropy=17.7170 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 36620] reward=-126208057.1 actor_loss=0.2370 critic_loss=187311194112.0000 entropy=17.7150 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 36620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501089.5 mean_steps=14.1
|
|
[Episode 36630] reward=-117146959.5 actor_loss=0.2604 critic_loss=146249131961.3793 entropy=17.7130 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 36640] reward=-121044629.1 actor_loss=0.3284 critic_loss=167536206180.1739 entropy=17.7230 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 36640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561342.7 mean_steps=13.3
|
|
[Episode 36650] reward=-130053280.9 actor_loss=0.3386 critic_loss=349942054912.0000 entropy=17.7142 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 36660] reward=-125089279.9 actor_loss=0.2623 critic_loss=161058394772.6452 entropy=17.7112 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 36660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486715.6 mean_steps=14.1
|
|
[Episode 36670] reward=-120309614.9 actor_loss=0.2764 critic_loss=148587485076.2105 entropy=17.7063 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 36680] reward=-119682985.4 actor_loss=0.3570 critic_loss=151907588388.5714 entropy=17.7132 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 36680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498760.8 mean_steps=14.4
|
|
[Episode 36690] reward=-122315583.7 actor_loss=0.2865 critic_loss=161204911010.9091 entropy=17.7152 approx_kl=0.0117 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 36700] reward=-119491555.2 actor_loss=0.2890 critic_loss=148496073728.0000 entropy=17.7280 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 36700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553964.9 mean_steps=14.2
|
|
[Episode 36710] reward=-117470462.0 actor_loss=0.3212 critic_loss=149183636275.2000 entropy=17.7246 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 36720] reward=-121197517.9 actor_loss=0.3655 critic_loss=149797630634.6667 entropy=17.7206 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 36720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-336062.4 mean_steps=15.8
|
|
[Episode 36730] reward=-118049147.9 actor_loss=0.2726 critic_loss=142802884608.0000 entropy=17.7142 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 36740] reward=-118683364.4 actor_loss=0.3318 critic_loss=152229996544.0000 entropy=17.7101 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 36740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479770.0 mean_steps=15.8
|
|
[Episode 36750] reward=-118854161.2 actor_loss=0.2962 critic_loss=147996612113.6552 entropy=17.7047 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 36760] reward=-122328895.6 actor_loss=0.3027 critic_loss=150789295308.8000 entropy=17.7141 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 36760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525825.0 mean_steps=14.3
|
|
[Episode 36770] reward=-119271850.0 actor_loss=0.3048 critic_loss=147632014872.3810 entropy=17.7199 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 36780] reward=-124421950.0 actor_loss=0.2530 critic_loss=158897504256.0000 entropy=17.7270 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 36780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-588093.2 mean_steps=11.9
|
|
[Episode 36790] reward=-119330670.3 actor_loss=0.3911 critic_loss=148871019373.7143 entropy=17.7230 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 36800] reward=-119379647.1 actor_loss=0.2544 critic_loss=146906390528.0000 entropy=17.7194 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 36800] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-272580.3 mean_steps=18.6
|
|
[Episode 36810] reward=-126242126.8 actor_loss=0.2745 critic_loss=160058395033.6000 entropy=17.7314 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 36820] reward=-119886528.3 actor_loss=0.2729 critic_loss=161210322670.9333 entropy=17.7238 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 36820] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-282119.3 mean_steps=18.2
|
|
[Episode 36830] reward=-115802912.1 actor_loss=0.3438 critic_loss=148427590860.8000 entropy=17.7151 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 36840] reward=-125129056.9 actor_loss=0.2812 critic_loss=157391507671.5789 entropy=17.7315 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 36840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-618063.6 mean_steps=12.3
|
|
[Episode 36850] reward=-122149705.8 actor_loss=0.2930 critic_loss=156631348175.2381 entropy=17.7270 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 36860] reward=-125185671.2 actor_loss=0.3283 critic_loss=249508831514.4828 entropy=17.7293 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 36860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-363616.9 mean_steps=15.3
|
|
[Episode 36870] reward=-117433212.1 actor_loss=0.3513 critic_loss=147595935744.0000 entropy=17.7326 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 36880] reward=-122462987.0 actor_loss=0.3228 critic_loss=176328987382.5185 entropy=17.7272 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 36880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523884.3 mean_steps=14.3
|
|
[Episode 36890] reward=-119849493.7 actor_loss=0.3044 critic_loss=149051654144.0000 entropy=17.7277 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 36900] reward=-123120119.3 actor_loss=0.2679 critic_loss=156564020857.9048 entropy=17.7245 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 36900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567594.3 mean_steps=13.8
|
|
[Episode 36910] reward=-121800697.3 actor_loss=0.2689 critic_loss=148360866762.1053 entropy=17.7237 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 36920] reward=-121320234.6 actor_loss=0.3252 critic_loss=152451413333.3333 entropy=17.7279 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 36920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450905.3 mean_steps=16.1
|
|
[Episode 36930] reward=-119359574.6 actor_loss=0.2562 critic_loss=147216580823.5789 entropy=17.7239 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 36940] reward=-122129478.5 actor_loss=0.2783 critic_loss=150978027237.5172 entropy=17.7447 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 36940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518729.8 mean_steps=14.4
|
|
[Episode 36950] reward=-122377142.9 actor_loss=0.2584 critic_loss=148557010167.1724 entropy=17.7521 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 36960] reward=-120896039.4 actor_loss=0.3147 critic_loss=154973000704.0000 entropy=17.7506 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 36960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510673.8 mean_steps=14.9
|
|
[Episode 36970] reward=-121027131.7 actor_loss=0.2419 critic_loss=150775651328.0000 entropy=17.7538 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 36980] reward=-123399164.2 actor_loss=0.3330 critic_loss=159969443840.0000 entropy=17.7595 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 36980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552019.1 mean_steps=13.6
|
|
[Episode 36990] reward=-117680874.3 actor_loss=0.4328 critic_loss=144612856263.1111 entropy=17.7565 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 37000] reward=-120540089.2 actor_loss=0.3541 critic_loss=153856557056.0000 entropy=17.7674 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 37000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451989.2 mean_steps=14.8
|
|
[Episode 37010] reward=-120605633.8 actor_loss=0.2741 critic_loss=159137695185.4546 entropy=17.7636 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 37020] reward=-126096809.2 actor_loss=0.1882 critic_loss=156847690805.8947 entropy=17.7630 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 37020] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-300476.5 mean_steps=17.6
|
|
[Episode 37030] reward=-119012059.7 actor_loss=0.1894 critic_loss=157346745995.6364 entropy=17.7638 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 37040] reward=-120934845.6 actor_loss=0.5936 critic_loss=292711081886.4762 entropy=17.7623 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 37040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-525580.0 mean_steps=15.2
|
|
[Episode 37050] reward=-121450913.6 actor_loss=0.2800 critic_loss=153211988650.6667 entropy=17.7427 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 37060] reward=-119113329.2 actor_loss=0.2226 critic_loss=147180715287.2727 entropy=17.7299 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 37060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465871.1 mean_steps=14.9
|
|
[Episode 37070] reward=-121777467.3 actor_loss=0.3160 critic_loss=153213307904.0000 entropy=17.7115 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 37080] reward=-121243878.9 actor_loss=0.2672 critic_loss=150350462976.0000 entropy=17.7108 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 37080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540941.9 mean_steps=13.4
|
|
[Episode 37090] reward=-120591354.0 actor_loss=0.2512 critic_loss=148909564928.0000 entropy=17.7143 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 37100] reward=-123931818.8 actor_loss=0.2651 critic_loss=154619120088.6154 entropy=17.7235 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 37100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519669.3 mean_steps=15.2
|
|
[Episode 37110] reward=-116066914.8 actor_loss=0.3240 critic_loss=146728119266.7429 entropy=17.7279 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 37120] reward=-118656395.1 actor_loss=0.2395 critic_loss=145322474556.2353 entropy=17.7454 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 37120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-490729.0 mean_steps=13.4
|
|
[Episode 37130] reward=-117844436.9 actor_loss=0.2603 critic_loss=141837359695.6444 entropy=17.7491 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 37140] reward=-122602939.0 actor_loss=0.3330 critic_loss=155117270109.0909 entropy=17.7459 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 37140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497339.5 mean_steps=14.4
|
|
[Episode 37150] reward=-125366244.0 actor_loss=0.1935 critic_loss=154832304355.5555 entropy=17.7501 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 37160] reward=-121213313.8 actor_loss=0.2341 critic_loss=151892212004.5714 entropy=17.7431 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 37160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597745.7 mean_steps=12.8
|
|
[Episode 37170] reward=-118595259.6 actor_loss=0.3255 critic_loss=164070750208.0000 entropy=17.7307 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 37180] reward=-121938078.4 actor_loss=0.3029 critic_loss=184450985301.3333 entropy=17.7359 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 37180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421798.1 mean_steps=15.5
|
|
[Episode 37190] reward=-120511100.1 actor_loss=0.2064 critic_loss=147223424099.0968 entropy=17.7353 approx_kl=0.0123 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 37200] reward=-121126185.4 actor_loss=0.2769 critic_loss=154194243725.2414 entropy=17.7446 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 37200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449629.3 mean_steps=15.6
|
|
[Episode 37210] reward=-118623496.6 actor_loss=0.3013 critic_loss=153759116537.0811 entropy=17.7465 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 37220] reward=-120679214.6 actor_loss=0.2236 critic_loss=150311524244.2105 entropy=17.7645 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 37220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489977.7 mean_steps=14.7
|
|
[Episode 37230] reward=-121986686.7 actor_loss=0.2719 critic_loss=159187025264.6400 entropy=17.7768 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 37240] reward=-122064712.5 actor_loss=0.3032 critic_loss=161321177978.4348 entropy=17.7752 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 37240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-618971.7 mean_steps=13.0
|
|
[Episode 37250] reward=-119069208.9 actor_loss=0.2059 critic_loss=155006661427.2000 entropy=17.7824 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 37260] reward=-120000881.1 actor_loss=0.4104 critic_loss=150216132987.2592 entropy=17.8047 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 37260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538485.1 mean_steps=13.4
|
|
[Episode 37270] reward=-120478161.1 actor_loss=0.2031 critic_loss=154128974336.0000 entropy=17.7889 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 37280] reward=-119591924.0 actor_loss=0.2844 critic_loss=151229140796.9524 entropy=17.7926 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 37280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481408.9 mean_steps=14.2
|
|
[Episode 37290] reward=-119193525.7 actor_loss=0.2390 critic_loss=157376170522.9474 entropy=17.7895 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 37300] reward=-120189599.0 actor_loss=0.2607 critic_loss=153932689603.0476 entropy=17.7957 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 37300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-480473.2 mean_steps=15.9
|
|
[Episode 37310] reward=-114517846.2 actor_loss=0.3105 critic_loss=154965875097.6000 entropy=17.7812 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 37320] reward=-118891258.4 actor_loss=0.3064 critic_loss=151845431068.4445 entropy=17.7627 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 37320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503385.8 mean_steps=15.5
|
|
[Episode 37330] reward=-121538129.2 actor_loss=0.3008 critic_loss=154301563562.6667 entropy=17.7599 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 37340] reward=-120873121.9 actor_loss=0.2416 critic_loss=152153297884.6897 entropy=17.7487 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 37340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438187.4 mean_steps=15.5
|
|
[Episode 37350] reward=-124574855.7 actor_loss=0.3295 critic_loss=171337607122.4889 entropy=17.7414 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 37360] reward=-117568193.0 actor_loss=0.2538 critic_loss=150186265195.1628 entropy=17.7207 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 37360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-327517.6 mean_steps=16.0
|
|
[Episode 37370] reward=-118154756.9 actor_loss=0.3425 critic_loss=147536189440.0000 entropy=17.7221 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 37380] reward=-115083628.9 actor_loss=0.2963 critic_loss=150433420209.2308 entropy=17.7358 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 37380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479256.3 mean_steps=16.1
|
|
[Episode 37390] reward=-117935135.0 actor_loss=0.3767 critic_loss=148861992215.2727 entropy=17.7477 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 37400] reward=-115167453.9 actor_loss=0.3621 critic_loss=147640849905.3714 entropy=17.7643 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 37400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-628733.8 mean_steps=14.2
|
|
[Episode 37410] reward=-119426672.6 actor_loss=0.2968 critic_loss=154117295152.7619 entropy=17.7573 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 37420] reward=-119084505.3 actor_loss=0.2889 critic_loss=145031253125.5652 entropy=17.7574 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 37420] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-349143.1 mean_steps=17.6
|
|
[Episode 37430] reward=-122897667.0 actor_loss=0.3211 critic_loss=157820306897.4546 entropy=17.7424 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 37440] reward=-118115771.0 actor_loss=0.3249 critic_loss=153334085093.0526 entropy=17.7283 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 37440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534964.1 mean_steps=13.3
|
|
[Episode 37450] reward=-120835967.3 actor_loss=0.2590 critic_loss=154167982984.9302 entropy=17.7233 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 37460] reward=-116539863.3 actor_loss=0.4555 critic_loss=146235906785.2800 entropy=17.7304 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 37460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439291.7 mean_steps=15.7
|
|
[Episode 37470] reward=-114482564.1 actor_loss=0.2514 critic_loss=136958820923.5349 entropy=17.7316 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 37480] reward=-123783530.0 actor_loss=0.3302 critic_loss=273012422883.5555 entropy=17.7340 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 37480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451076.3 mean_steps=14.8
|
|
[Episode 37490] reward=-186499841.5 actor_loss=0.3327 critic_loss=15499335186659.5547 entropy=17.7377 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 37500] reward=-131514264.5 actor_loss=0.3033 critic_loss=961068561959.3846 entropy=17.7521 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 37500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572734.8 mean_steps=12.7
|
|
[Episode 37510] reward=-124363488.3 actor_loss=0.1940 critic_loss=154312557012.1143 entropy=17.7596 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 37520] reward=-123105884.0 actor_loss=0.2576 critic_loss=157351670579.2000 entropy=17.7883 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 37520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444152.6 mean_steps=16.0
|
|
[Episode 37530] reward=-121042189.2 actor_loss=0.2714 critic_loss=156928510004.5128 entropy=17.7592 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 37540] reward=-123842038.1 actor_loss=0.2987 critic_loss=173985110173.5385 entropy=17.7524 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 37540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-486722.3 mean_steps=15.6
|
|
[Episode 37550] reward=-120580200.4 actor_loss=0.2139 critic_loss=160715062710.8571 entropy=17.7397 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 37560] reward=-123678523.2 actor_loss=0.2403 critic_loss=154651958227.4783 entropy=17.7354 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 37560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553003.6 mean_steps=13.4
|
|
[Episode 37570] reward=-111858001.8 actor_loss=0.3761 critic_loss=141910517504.0000 entropy=17.7113 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 37580] reward=-120776449.2 actor_loss=0.2616 critic_loss=157500523625.9310 entropy=17.7036 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 37580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359037.6 mean_steps=16.1
|
|
[Episode 37590] reward=-116063636.1 actor_loss=0.4043 critic_loss=146840042291.2000 entropy=17.7028 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 37600] reward=-122604398.6 actor_loss=0.2599 critic_loss=153048150126.7027 entropy=17.6891 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 37600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475543.6 mean_steps=14.9
|
|
[Episode 37610] reward=-118270234.5 actor_loss=0.2667 critic_loss=251719920298.6667 entropy=17.6982 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 37620] reward=-118200648.6 actor_loss=0.3411 critic_loss=146828219572.7059 entropy=17.6880 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 37620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441020.2 mean_steps=15.7
|
|
[Episode 37630] reward=-119101085.0 actor_loss=0.2893 critic_loss=151466027690.6667 entropy=17.6812 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 37640] reward=-120455747.5 actor_loss=0.2147 critic_loss=149232900388.5714 entropy=17.6985 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 37640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-543171.2 mean_steps=15.5
|
|
[Episode 37650] reward=-116681439.5 actor_loss=0.3048 critic_loss=148822329116.4445 entropy=17.6784 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 37660] reward=-121906062.1 actor_loss=0.3278 critic_loss=156920232398.4516 entropy=17.6805 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 37660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-456003.3 mean_steps=16.9
|
|
[Episode 37670] reward=-116567720.9 actor_loss=0.3699 critic_loss=142719061138.2857 entropy=17.6818 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 37680] reward=-116222075.2 actor_loss=0.2414 critic_loss=146860637608.5854 entropy=17.6710 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 37680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524787.1 mean_steps=13.9
|
|
[Episode 37690] reward=-113376200.7 actor_loss=0.2930 critic_loss=137847751773.0909 entropy=17.6528 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 37700] reward=-119526124.1 actor_loss=0.2657 critic_loss=146526127970.4615 entropy=17.6594 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 37700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349585.5 mean_steps=17.0
|
|
[Episode 37710] reward=-122812314.5 actor_loss=0.2328 critic_loss=155298553309.8667 entropy=17.6462 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 37720] reward=-119325276.2 actor_loss=0.3537 critic_loss=147759053423.3044 entropy=17.6650 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 37720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442858.3 mean_steps=14.9
|
|
[Episode 37730] reward=-114749467.0 actor_loss=0.2864 critic_loss=146629919890.2857 entropy=17.6891 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 37740] reward=-122236938.0 actor_loss=0.1859 critic_loss=155610154780.4445 entropy=17.6871 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 37740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-344323.2 mean_steps=15.8
|
|
[Episode 37750] reward=-121460363.8 actor_loss=0.1412 critic_loss=146149558784.0000 entropy=17.6752 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 37760] reward=-121017221.6 actor_loss=0.3678 critic_loss=152328447441.4546 entropy=17.6719 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 37760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487375.9 mean_steps=15.2
|
|
[Episode 37770] reward=-117293444.8 actor_loss=0.2467 critic_loss=139617753861.6889 entropy=17.6663 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 37780] reward=-119973929.1 actor_loss=0.2058 critic_loss=150398661142.2609 entropy=17.6712 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 37780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442494.4 mean_steps=15.8
|
|
[Episode 37790] reward=-121396896.8 actor_loss=0.3044 critic_loss=155348750336.0000 entropy=17.6691 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 37800] reward=-121954241.1 actor_loss=0.1546 critic_loss=154045244837.6471 entropy=17.6686 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 37800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536221.5 mean_steps=14.6
|
|
[Episode 37810] reward=-122148398.2 actor_loss=0.2654 critic_loss=153779743766.7556 entropy=17.6782 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 37820] reward=-118682268.8 actor_loss=0.3150 critic_loss=148580551338.6667 entropy=17.6814 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 37820] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-317675.8 mean_steps=17.5
|
|
[Episode 37830] reward=-119082369.3 actor_loss=0.2174 critic_loss=147775975424.0000 entropy=17.6751 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 37840] reward=-114957790.9 actor_loss=0.2221 critic_loss=145608716072.4211 entropy=17.6746 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 37840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448439.2 mean_steps=14.6
|
|
[Episode 37850] reward=-115327005.7 actor_loss=0.3861 critic_loss=141381472138.9714 entropy=17.6729 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 37860] reward=-139348079.0 actor_loss=0.2572 critic_loss=1696365304685.7144 entropy=17.6621 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 37860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419071.0 mean_steps=14.7
|
|
[Episode 37870] reward=-119454741.4 actor_loss=0.1807 critic_loss=166329535977.7391 entropy=17.6530 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 37880] reward=-122343483.3 actor_loss=0.2848 critic_loss=159701673004.5217 entropy=17.6485 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 37880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454091.3 mean_steps=14.8
|
|
[Episode 37890] reward=-117549755.7 actor_loss=0.3678 critic_loss=149313649825.6842 entropy=17.6388 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 37900] reward=-118011124.1 actor_loss=0.2666 critic_loss=151817211708.9524 entropy=17.6423 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 37900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-575034.5 mean_steps=14.6
|
|
[Episode 37910] reward=-120160930.7 actor_loss=0.2292 critic_loss=147018879720.7273 entropy=17.6413 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 37920] reward=-121316983.7 actor_loss=0.2620 critic_loss=150220286397.2174 entropy=17.6447 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 37920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511601.3 mean_steps=15.2
|
|
[Episode 37930] reward=-122777435.5 actor_loss=0.3086 critic_loss=152403938707.3940 entropy=17.6497 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 37940] reward=-119781670.2 actor_loss=0.2542 critic_loss=147988848861.4054 entropy=17.6374 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 37940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537171.9 mean_steps=13.2
|
|
[Episode 37950] reward=-119919687.2 actor_loss=0.3056 critic_loss=151878429144.6154 entropy=17.6335 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 37960] reward=-119622884.8 actor_loss=0.3059 critic_loss=153786371150.7692 entropy=17.6299 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 37960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366407.1 mean_steps=16.2
|
|
[Episode 37970] reward=-118089022.9 actor_loss=0.2732 critic_loss=146106830848.0000 entropy=17.6227 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 37980] reward=-119678802.5 actor_loss=0.3021 critic_loss=158009874724.5714 entropy=17.6281 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 37980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500323.0 mean_steps=13.6
|
|
[Episode 37990] reward=-119321321.6 actor_loss=0.3082 critic_loss=152438087680.0000 entropy=17.6374 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 38000] reward=-123508593.9 actor_loss=0.2674 critic_loss=154943725021.8667 entropy=17.6381 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 38000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481242.9 mean_steps=14.7
|
|
[Episode 38010] reward=-120250780.1 actor_loss=0.2747 critic_loss=147549127368.3478 entropy=17.6322 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 38020] reward=-115243638.2 actor_loss=0.3436 critic_loss=142824281793.4222 entropy=17.6336 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 38020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546849.1 mean_steps=13.6
|
|
[Episode 38030] reward=-123799853.8 actor_loss=0.2761 critic_loss=153575652631.2727 entropy=17.6354 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 38040] reward=-118850493.6 actor_loss=0.3166 critic_loss=146017113281.7297 entropy=17.6302 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 38040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-508204.2 mean_steps=15.0
|
|
[Episode 38050] reward=-124427113.4 actor_loss=0.3500 critic_loss=161594044731.0769 entropy=17.6360 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 38060] reward=-116843640.3 actor_loss=0.3443 critic_loss=150492953941.3333 entropy=17.6256 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 38060] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-319883.4 mean_steps=17.9
|
|
[Episode 38070] reward=-112200694.0 actor_loss=0.3111 critic_loss=144364310869.3333 entropy=17.6215 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 38080] reward=-119493558.8 actor_loss=0.2505 critic_loss=144520493283.5555 entropy=17.6398 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 38080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353038.8 mean_steps=17.6
|
|
[Episode 38090] reward=-119901847.0 actor_loss=0.2709 critic_loss=145463142809.6000 entropy=17.6283 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 38100] reward=-120833568.0 actor_loss=0.3139 critic_loss=158806907859.4783 entropy=17.6260 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 38100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562365.2 mean_steps=13.8
|
|
[Episode 38110] reward=-119529547.0 actor_loss=0.2806 critic_loss=157393427894.8571 entropy=17.6236 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 38120] reward=-124700838.2 actor_loss=0.3695 critic_loss=264617400729.6000 entropy=17.6127 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 38120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499575.6 mean_steps=14.1
|
|
[Episode 38130] reward=-118361038.6 actor_loss=0.2844 critic_loss=146445622665.8462 entropy=17.6133 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 38140] reward=-123123380.7 actor_loss=0.3157 critic_loss=153873652849.7778 entropy=17.6264 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 38140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-526140.6 mean_steps=15.8
|
|
[Episode 38150] reward=-117381882.1 actor_loss=0.2294 critic_loss=156287137412.7408 entropy=17.6328 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 38160] reward=-117729723.3 actor_loss=0.2881 critic_loss=149584296618.6667 entropy=17.6483 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 38160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517698.5 mean_steps=14.7
|
|
[Episode 38170] reward=-116716616.7 actor_loss=0.3214 critic_loss=150733184341.3333 entropy=17.6547 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 38180] reward=-120839783.0 actor_loss=0.3212 critic_loss=153558189592.3810 entropy=17.6544 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 38180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501673.9 mean_steps=14.0
|
|
[Episode 38190] reward=-122651186.5 actor_loss=0.3437 critic_loss=153734103040.0000 entropy=17.6558 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 38200] reward=-119130195.7 actor_loss=0.3071 critic_loss=153989181067.6364 entropy=17.6553 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 38200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-496848.1 mean_steps=16.2
|
|
[Episode 38210] reward=-117786916.9 actor_loss=0.3202 critic_loss=149338092495.2381 entropy=17.6465 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 38220] reward=-116535914.6 actor_loss=0.2723 critic_loss=160818770505.1429 entropy=17.6413 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 38220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-429865.5 mean_steps=13.4
|
|
[Episode 38230] reward=-124090066.8 actor_loss=0.2025 critic_loss=163649636644.5714 entropy=17.6413 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 38240] reward=-115930040.4 actor_loss=0.3435 critic_loss=149391029733.0526 entropy=17.6462 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 38240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431078.3 mean_steps=15.2
|
|
[Episode 38250] reward=-117413112.2 actor_loss=0.2684 critic_loss=148650868345.9048 entropy=17.6562 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 38260] reward=-113378996.7 actor_loss=0.4761 critic_loss=144725756928.0000 entropy=17.6469 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 38260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549289.8 mean_steps=14.3
|
|
[Episode 38270] reward=-119268039.4 actor_loss=0.3619 critic_loss=151112482071.2727 entropy=17.6518 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 38280] reward=-118319876.4 actor_loss=0.2494 critic_loss=152205123256.3200 entropy=17.6536 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 38280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555290.8 mean_steps=12.7
|
|
[Episode 38290] reward=-119761575.1 actor_loss=0.2215 critic_loss=150020993558.2609 entropy=17.6585 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 38300] reward=-118128542.3 actor_loss=0.2109 critic_loss=152610666945.5610 entropy=17.6652 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 38300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431253.5 mean_steps=14.6
|
|
[Episode 38310] reward=-123829002.4 actor_loss=0.1884 critic_loss=165438065198.5454 entropy=17.6822 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 38320] reward=-121915817.2 actor_loss=0.3092 critic_loss=152203762619.7333 entropy=17.6812 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 38320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433926.5 mean_steps=15.6
|
|
[Episode 38330] reward=-118566121.5 actor_loss=0.3513 critic_loss=154422556829.5385 entropy=17.6887 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 38340] reward=-120798099.8 actor_loss=0.4332 critic_loss=158633089778.5263 entropy=17.6871 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 38340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631369.4 mean_steps=11.9
|
|
[Episode 38350] reward=-117711774.5 actor_loss=0.3564 critic_loss=148297995185.2308 entropy=17.6941 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 38360] reward=-117865248.3 actor_loss=0.2696 critic_loss=149918497555.6923 entropy=17.6853 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 38360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-499434.8 mean_steps=13.6
|
|
[Episode 38370] reward=-116929620.4 actor_loss=0.3988 critic_loss=147529250523.4286 entropy=17.6704 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 38380] reward=-118837129.3 actor_loss=0.2384 critic_loss=153053015381.3333 entropy=17.6689 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 38380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-350119.8 mean_steps=17.4
|
|
[Episode 38390] reward=-119322735.2 actor_loss=0.3848 critic_loss=152003485878.0444 entropy=17.6703 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 38400] reward=-125194810.4 actor_loss=0.3275 critic_loss=159479220435.8621 entropy=17.6599 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 38400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521054.5 mean_steps=13.4
|
|
[Episode 38410] reward=-112524665.0 actor_loss=0.3809 critic_loss=140129979112.7273 entropy=17.6557 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 38420] reward=-118919162.4 actor_loss=0.2401 critic_loss=146647233194.6667 entropy=17.6483 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 38420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-627914.3 mean_steps=13.2
|
|
[Episode 38430] reward=-119664358.8 actor_loss=0.2965 critic_loss=154160797509.8182 entropy=17.6489 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 38440] reward=-123188505.8 actor_loss=0.2271 critic_loss=171360768585.1429 entropy=17.6406 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 38440] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-683484.5 mean_steps=11.4
|
|
[Episode 38450] reward=-124149479.7 actor_loss=0.2602 critic_loss=164760963832.6857 entropy=17.6422 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 38460] reward=-125802966.4 actor_loss=0.2822 critic_loss=175500178612.7059 entropy=17.6389 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 38460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520002.7 mean_steps=15.3
|
|
[Episode 38470] reward=-116745352.8 actor_loss=0.2712 critic_loss=145762369536.0000 entropy=17.6316 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 38480] reward=-122026931.3 actor_loss=0.2541 critic_loss=158075534995.9111 entropy=17.6313 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 38480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435495.0 mean_steps=15.4
|
|
[Episode 38490] reward=-119016370.7 actor_loss=0.2021 critic_loss=153644296601.6000 entropy=17.6385 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 38500] reward=-115436194.5 actor_loss=0.3283 critic_loss=149339928486.9565 entropy=17.6127 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 38500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-664175.5 mean_steps=12.6
|
|
[Episode 38510] reward=-122142697.1 actor_loss=0.2947 critic_loss=155358790412.1905 entropy=17.6123 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 38520] reward=-123104539.9 actor_loss=0.2985 critic_loss=220971216987.0222 entropy=17.6322 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 38520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531382.1 mean_steps=13.6
|
|
[Episode 38530] reward=-120706843.9 actor_loss=0.3898 critic_loss=157002780299.6364 entropy=17.6317 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 38540] reward=-115299511.4 actor_loss=0.3066 critic_loss=145763538582.5882 entropy=17.6278 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 38540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386763.3 mean_steps=15.9
|
|
[Episode 38550] reward=-117852185.6 actor_loss=0.2478 critic_loss=151318247911.6190 entropy=17.6421 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 38560] reward=-121550113.6 actor_loss=0.2177 critic_loss=158392174778.1818 entropy=17.6461 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 38560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-469317.4 mean_steps=15.8
|
|
[Episode 38570] reward=-120115034.4 actor_loss=0.1828 critic_loss=150912923710.0606 entropy=17.6474 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 38580] reward=-117960197.7 actor_loss=0.3904 critic_loss=151637517107.2000 entropy=17.6440 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 38580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571105.5 mean_steps=14.2
|
|
[Episode 38590] reward=-120113550.1 actor_loss=0.3166 critic_loss=148192963242.6667 entropy=17.6495 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 38600] reward=-117914780.3 actor_loss=0.2852 critic_loss=146418195186.5263 entropy=17.6590 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 38600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-377305.0 mean_steps=15.5
|
|
[Episode 38610] reward=-119015680.0 actor_loss=0.3518 critic_loss=148167411898.1818 entropy=17.6656 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 38620] reward=-114473456.9 actor_loss=0.3434 critic_loss=143582049460.7059 entropy=17.6622 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 38620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537068.5 mean_steps=14.8
|
|
[Episode 38630] reward=-120425794.6 actor_loss=0.4141 critic_loss=158791790376.4211 entropy=17.6582 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 38640] reward=-121561211.3 actor_loss=0.2158 critic_loss=152475770880.0000 entropy=17.6571 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 38640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466905.4 mean_steps=15.1
|
|
[Episode 38650] reward=-117731464.3 actor_loss=0.2891 critic_loss=149093325677.7143 entropy=17.6487 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 38660] reward=-121226937.1 actor_loss=0.2034 critic_loss=149221100384.7111 entropy=17.6359 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 38660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596498.0 mean_steps=13.1
|
|
[Episode 38670] reward=-114670421.2 actor_loss=0.2524 critic_loss=145703060318.3158 entropy=17.6288 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 38680] reward=-123658152.0 actor_loss=0.2447 critic_loss=156906252190.4762 entropy=17.6349 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 38680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-440119.2 mean_steps=16.4
|
|
[Episode 38690] reward=-120506539.1 actor_loss=0.2932 critic_loss=149669125351.2258 entropy=17.6655 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 38700] reward=-123264488.1 actor_loss=0.1927 critic_loss=161697567305.1429 entropy=17.6724 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 38700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500786.6 mean_steps=15.2
|
|
[Episode 38710] reward=-121221594.0 actor_loss=0.3395 critic_loss=155862626986.6667 entropy=17.6638 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 38720] reward=-121079070.2 actor_loss=0.1429 critic_loss=152529549312.0000 entropy=17.6476 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 38720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513232.9 mean_steps=14.2
|
|
[Episode 38730] reward=-125923940.0 actor_loss=0.2523 critic_loss=158062445256.3478 entropy=17.6529 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 38740] reward=-121372830.9 actor_loss=0.2599 critic_loss=151816609792.0000 entropy=17.6557 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 38740] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-689942.1 mean_steps=11.7
|
|
[Episode 38750] reward=-119547820.5 actor_loss=0.3050 critic_loss=153237585920.0000 entropy=17.6745 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 38760] reward=-121463611.3 actor_loss=0.2071 critic_loss=149209401116.4445 entropy=17.6875 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 38760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419531.9 mean_steps=15.5
|
|
[Episode 38770] reward=-120181234.1 actor_loss=0.3960 critic_loss=149384727990.8571 entropy=17.6812 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 38780] reward=-122863261.0 actor_loss=0.3365 critic_loss=157525000556.0889 entropy=17.6925 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 38780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483199.8 mean_steps=15.9
|
|
[Episode 38790] reward=-126126143.5 actor_loss=0.2178 critic_loss=267360501760.0000 entropy=17.6936 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 38800] reward=-120815556.6 actor_loss=0.2644 critic_loss=157850221961.8462 entropy=17.6957 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 38800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507312.8 mean_steps=14.1
|
|
[Episode 38810] reward=-115397756.0 actor_loss=0.3257 critic_loss=144939728896.0000 entropy=17.6851 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 38820] reward=-122343654.7 actor_loss=0.3500 critic_loss=150298779830.0444 entropy=17.6687 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 38820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403977.0 mean_steps=15.6
|
|
[Episode 38830] reward=-117500585.0 actor_loss=0.3452 critic_loss=143878497894.4000 entropy=17.6861 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 38840] reward=-121874674.9 actor_loss=0.3351 critic_loss=151349660186.9474 entropy=17.7009 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 38840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-467645.8 mean_steps=15.6
|
|
[Episode 38850] reward=-120660985.2 actor_loss=0.3074 critic_loss=148762495549.4400 entropy=17.6968 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 38860] reward=-124564234.2 actor_loss=0.3132 critic_loss=156890797085.2571 entropy=17.7041 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 38860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552571.0 mean_steps=14.2
|
|
[Episode 38870] reward=-120808273.5 actor_loss=0.2053 critic_loss=153304402392.6154 entropy=17.6912 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 38880] reward=-119626129.0 actor_loss=0.2809 critic_loss=148968639450.0741 entropy=17.6968 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 38880] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-705996.9 mean_steps=11.8
|
|
[Episode 38890] reward=-124511911.2 actor_loss=0.2023 critic_loss=155819284480.0000 entropy=17.6875 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 38900] reward=-116999096.4 actor_loss=0.2983 critic_loss=139974945060.5714 entropy=17.6873 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 38900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544375.6 mean_steps=13.2
|
|
[Episode 38910] reward=-112970452.0 actor_loss=0.3763 critic_loss=136436240501.0286 entropy=17.6874 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 38920] reward=-119947115.4 actor_loss=0.2730 critic_loss=148725707676.9032 entropy=17.6870 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 38920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-380171.6 mean_steps=15.1
|
|
[Episode 38930] reward=-122286365.7 actor_loss=0.2469 critic_loss=157781994859.3548 entropy=17.6887 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 38940] reward=-125479364.0 actor_loss=0.1662 critic_loss=159447106846.7200 entropy=17.6990 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 38940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525214.4 mean_steps=14.4
|
|
[Episode 38950] reward=-118717337.2 actor_loss=0.2284 critic_loss=144986593962.6667 entropy=17.7139 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 38960] reward=-119538158.0 actor_loss=0.2707 critic_loss=145074202985.4118 entropy=17.7045 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 38960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525262.0 mean_steps=13.8
|
|
[Episode 38970] reward=-122147361.1 actor_loss=0.2119 critic_loss=154321447594.6667 entropy=17.7087 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 38980] reward=-119500567.2 actor_loss=0.1932 critic_loss=142752473415.6800 entropy=17.7042 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 38980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-440867.7 mean_steps=13.4
|
|
[Episode 38990] reward=-121141670.4 actor_loss=0.2303 critic_loss=154154429098.6667 entropy=17.7084 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 39000] reward=-123360596.8 actor_loss=0.3013 critic_loss=154808348867.0476 entropy=17.7090 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 39000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500481.0 mean_steps=14.1
|
|
[Episode 39010] reward=-121741207.0 actor_loss=0.2646 critic_loss=155912623226.8800 entropy=17.7023 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 39020] reward=-122187701.4 actor_loss=0.3316 critic_loss=153694124152.4706 entropy=17.6979 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-616767.9 mean_steps=12.1
|
|
[Episode 39030] reward=-120933820.6 actor_loss=0.1835 critic_loss=152921000618.6667 entropy=17.7060 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 39040] reward=-124497484.0 actor_loss=0.1862 critic_loss=153088607337.9310 entropy=17.7057 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 39040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530574.2 mean_steps=13.1
|
|
[Episode 39050] reward=-118962142.3 actor_loss=0.2574 critic_loss=147740362524.4445 entropy=17.7056 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 39060] reward=-117193619.8 actor_loss=0.3301 critic_loss=142715791509.8537 entropy=17.7107 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497254.3 mean_steps=14.9
|
|
[Episode 39070] reward=-123158659.1 actor_loss=0.2687 critic_loss=148985143842.1333 entropy=17.7210 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 39080] reward=-119694090.9 actor_loss=0.2877 critic_loss=152378273955.8400 entropy=17.7153 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 39080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499502.1 mean_steps=15.0
|
|
[Episode 39090] reward=-120650394.8 actor_loss=0.2859 critic_loss=150571793703.8222 entropy=17.7256 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 39100] reward=-122858123.5 actor_loss=0.3228 critic_loss=151670079123.9111 entropy=17.7118 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 39100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512790.7 mean_steps=13.9
|
|
[Episode 39110] reward=-119977804.1 actor_loss=0.2361 critic_loss=149613859188.3636 entropy=17.7077 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 39120] reward=-120951569.9 actor_loss=0.3517 critic_loss=150143574497.8824 entropy=17.7159 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 39120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-640085.6 mean_steps=13.1
|
|
[Episode 39130] reward=-116592966.3 actor_loss=0.2795 critic_loss=139302860998.1935 entropy=17.7017 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 39140] reward=-120685768.4 actor_loss=0.3252 critic_loss=158675491756.9730 entropy=17.6948 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-415737.1 mean_steps=16.0
|
|
[Episode 39150] reward=-124322375.1 actor_loss=0.3006 critic_loss=152449063321.6000 entropy=17.6919 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 39160] reward=-119786530.3 actor_loss=0.2759 critic_loss=151972065006.9333 entropy=17.7054 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 39160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-515956.6 mean_steps=14.7
|
|
[Episode 39170] reward=-122658373.0 actor_loss=0.2925 critic_loss=152384656564.7059 entropy=17.7004 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 39180] reward=-125447806.2 actor_loss=0.2599 critic_loss=155072583725.5111 entropy=17.6949 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382777.5 mean_steps=14.8
|
|
[Episode 39190] reward=-122270626.8 actor_loss=0.2982 critic_loss=153371474602.6667 entropy=17.6891 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 39200] reward=-121083054.7 actor_loss=0.2629 critic_loss=153278355137.4222 entropy=17.6926 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 39200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588110.1 mean_steps=13.4
|
|
[Episode 39210] reward=-126336792.7 actor_loss=0.2078 critic_loss=159816464197.8182 entropy=17.6902 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 39220] reward=-119647380.7 actor_loss=0.2901 critic_loss=151950901794.1333 entropy=17.6952 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 39220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419216.5 mean_steps=15.2
|
|
[Episode 39230] reward=-117129045.0 actor_loss=0.2542 critic_loss=146529570360.8889 entropy=17.6935 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 39240] reward=-123971319.7 actor_loss=0.2866 critic_loss=152622516724.6222 entropy=17.6913 approx_kl=0.0109 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 39240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545584.0 mean_steps=13.8
|
|
[Episode 39250] reward=-119502378.8 actor_loss=0.3341 critic_loss=152306772413.2174 entropy=17.6826 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 39260] reward=-126048866.8 actor_loss=0.2226 critic_loss=160394990569.2444 entropy=17.6777 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 39260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-521873.2 mean_steps=12.4
|
|
[Episode 39270] reward=-119971813.4 actor_loss=0.2347 critic_loss=145113889450.6667 entropy=17.6677 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 39280] reward=-120641434.2 actor_loss=0.3454 critic_loss=152224075776.0000 entropy=17.6512 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 39280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487289.1 mean_steps=14.1
|
|
[Episode 39290] reward=-122132651.7 actor_loss=0.2590 critic_loss=154744988647.0244 entropy=17.6622 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 39300] reward=-124571621.7 actor_loss=0.2745 critic_loss=155962058938.1818 entropy=17.6602 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 39300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413501.3 mean_steps=16.6
|
|
[Episode 39310] reward=-119720297.2 actor_loss=0.3683 critic_loss=150583049420.8000 entropy=17.6636 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 39320] reward=-125577953.6 actor_loss=0.2762 critic_loss=159853203634.0869 entropy=17.6622 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 39320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-418748.6 mean_steps=14.7
|
|
[Episode 39330] reward=-125221175.1 actor_loss=0.2390 critic_loss=156479438483.9111 entropy=17.6602 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 39340] reward=-118919031.4 actor_loss=0.2806 critic_loss=150309058641.9200 entropy=17.6629 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 39340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-460164.4 mean_steps=16.8
|
|
[Episode 39350] reward=-119032321.4 actor_loss=0.3130 critic_loss=164079277634.7826 entropy=17.6645 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 39360] reward=-114990757.7 actor_loss=0.3779 critic_loss=139554267136.0000 entropy=17.6615 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 39360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-350447.2 mean_steps=16.2
|
|
[Episode 39370] reward=-127726684.2 actor_loss=0.3414 critic_loss=303460900864.0000 entropy=17.6598 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 39380] reward=-115450098.5 actor_loss=0.2513 critic_loss=194042106493.1555 entropy=17.6713 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 39380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560656.2 mean_steps=12.8
|
|
[Episode 39390] reward=-120506117.1 actor_loss=0.2005 critic_loss=152510536817.7778 entropy=17.6613 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 39400] reward=-119590289.1 actor_loss=0.3618 critic_loss=149320293034.6667 entropy=17.6661 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 39400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520790.9 mean_steps=14.2
|
|
[Episode 39410] reward=-123164511.2 actor_loss=0.2544 critic_loss=154332297126.9565 entropy=17.6607 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 39420] reward=-121715660.1 actor_loss=0.2562 critic_loss=144746899956.6222 entropy=17.6474 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-380720.1 mean_steps=15.9
|
|
[Episode 39430] reward=-121591909.2 actor_loss=0.3161 critic_loss=153666275494.0540 entropy=17.6483 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 39440] reward=-125404033.0 actor_loss=0.2677 critic_loss=157422253093.9259 entropy=17.6528 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 39440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-493783.3 mean_steps=33.5
|
|
[Episode 39450] reward=-123737348.1 actor_loss=0.2517 critic_loss=150817982964.6222 entropy=17.6435 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 39460] reward=-117291263.7 actor_loss=0.4792 critic_loss=195822920499.2000 entropy=17.6419 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 39460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368795.9 mean_steps=15.9
|
|
[Episode 39470] reward=-119877467.0 actor_loss=0.3076 critic_loss=152819737897.2903 entropy=17.6297 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 39480] reward=-116594440.9 actor_loss=0.3158 critic_loss=141114470084.9231 entropy=17.6323 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 39480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435134.1 mean_steps=15.7
|
|
[Episode 39490] reward=-117432660.3 actor_loss=0.3722 critic_loss=147116562245.8182 entropy=17.6200 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 39500] reward=-115812761.9 actor_loss=0.3064 critic_loss=145363519867.2592 entropy=17.6347 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 39500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460752.2 mean_steps=15.1
|
|
[Episode 39510] reward=-122168694.2 actor_loss=0.3160 critic_loss=155521376477.4054 entropy=17.6198 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 39520] reward=-119197353.9 actor_loss=0.3338 critic_loss=153561299353.6000 entropy=17.6202 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 39520] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-661943.8 mean_steps=11.1
|
|
[Episode 39530] reward=-121981574.6 actor_loss=0.3490 critic_loss=153660240691.2000 entropy=17.6235 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 39540] reward=-123441677.0 actor_loss=0.1601 critic_loss=151353492184.1778 entropy=17.6240 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 39540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-493320.5 mean_steps=15.8
|
|
[Episode 39550] reward=-122793846.3 actor_loss=0.2841 critic_loss=154258948096.0000 entropy=17.6145 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 39560] reward=-120998599.0 actor_loss=0.3042 critic_loss=151692755656.3478 entropy=17.6271 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 39560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-409856.4 mean_steps=14.7
|
|
[Episode 39570] reward=-113969532.0 actor_loss=0.3447 critic_loss=135013009362.4889 entropy=17.6189 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 39580] reward=-124301446.8 actor_loss=0.3471 critic_loss=158741176320.0000 entropy=17.6238 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 39580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-622862.4 mean_steps=13.3
|
|
[Episode 39590] reward=-124243571.8 actor_loss=0.2377 critic_loss=152581673292.1081 entropy=17.6164 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 39600] reward=-120955850.4 actor_loss=0.2845 critic_loss=148980415087.3044 entropy=17.6197 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 39600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547862.3 mean_steps=13.6
|
|
[Episode 39610] reward=-121946531.6 actor_loss=0.3247 critic_loss=152033684480.0000 entropy=17.6261 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 39620] reward=-125710015.5 actor_loss=0.2116 critic_loss=152648882585.6000 entropy=17.6216 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 39620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536586.4 mean_steps=13.7
|
|
[Episode 39630] reward=-116170457.8 actor_loss=0.2885 critic_loss=143120123676.4445 entropy=17.6192 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 39640] reward=-119411608.0 actor_loss=0.3917 critic_loss=229986950099.4783 entropy=17.6210 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 39640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523113.7 mean_steps=14.2
|
|
[Episode 39650] reward=-122211135.6 actor_loss=0.2178 critic_loss=146303810323.6923 entropy=17.6203 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 39660] reward=-122318001.0 actor_loss=0.3375 critic_loss=150680457974.5185 entropy=17.6216 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 39660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488605.7 mean_steps=14.2
|
|
[Episode 39670] reward=-122301898.1 actor_loss=0.2968 critic_loss=152373730344.9600 entropy=17.6180 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 39680] reward=-124654434.1 actor_loss=0.2283 critic_loss=153194471424.0000 entropy=17.5998 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 39680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391741.0 mean_steps=16.1
|
|
[Episode 39690] reward=-120251052.4 actor_loss=0.2850 critic_loss=149828851939.5555 entropy=17.6066 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 39700] reward=-120661617.7 actor_loss=0.2486 critic_loss=148094405500.7180 entropy=17.6048 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 39700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-575452.6 mean_steps=14.8
|
|
[Episode 39710] reward=-119222227.7 actor_loss=0.2871 critic_loss=148733840203.2941 entropy=17.6045 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 39720] reward=-123573413.9 actor_loss=0.2814 critic_loss=149986937370.9474 entropy=17.5935 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 39720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470609.6 mean_steps=13.9
|
|
[Episode 39730] reward=-118722205.7 actor_loss=0.3259 critic_loss=145857450257.0667 entropy=17.5879 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 39740] reward=-119147741.5 actor_loss=0.2689 critic_loss=146846181108.8696 entropy=17.5864 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 39740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-352060.9 mean_steps=17.3
|
|
[Episode 39750] reward=-122299898.1 actor_loss=0.2787 critic_loss=148755886665.1429 entropy=17.5919 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 39760] reward=-117391988.6 actor_loss=0.2514 critic_loss=146001592320.0000 entropy=17.6010 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 39760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551239.1 mean_steps=13.8
|
|
[Episode 39770] reward=-123061127.0 actor_loss=0.2816 critic_loss=149857815756.8000 entropy=17.6025 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 39780] reward=-116050361.0 actor_loss=0.2979 critic_loss=145089764101.6889 entropy=17.5935 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 39780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449570.2 mean_steps=16.1
|
|
[Episode 39790] reward=-126627487.2 actor_loss=0.2172 critic_loss=157958179659.2941 entropy=17.6112 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 39800] reward=-120233175.9 actor_loss=0.3220 critic_loss=149253496285.8667 entropy=17.6070 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 39800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432485.6 mean_steps=15.8
|
|
[Episode 39810] reward=-119198093.5 actor_loss=0.2425 critic_loss=149981137245.6585 entropy=17.6084 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 39820] reward=-118257150.5 actor_loss=0.3334 critic_loss=147001672704.0000 entropy=17.6145 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420347.1 mean_steps=14.8
|
|
[Episode 39830] reward=-117117502.6 actor_loss=0.3362 critic_loss=146565560818.1622 entropy=17.6300 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 39840] reward=-119628382.8 actor_loss=0.1786 critic_loss=146666559232.0000 entropy=17.6303 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 39840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498871.5 mean_steps=15.2
|
|
[Episode 39850] reward=-117940629.8 actor_loss=0.2354 critic_loss=144229694707.8095 entropy=17.6333 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 39860] reward=-120779005.9 actor_loss=0.3707 critic_loss=146602553716.3636 entropy=17.6343 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 39860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-383647.1 mean_steps=15.2
|
|
[Episode 39870] reward=-122642676.7 actor_loss=0.2840 critic_loss=153408220968.4211 entropy=17.6303 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 39880] reward=-111495922.9 actor_loss=0.4136 critic_loss=138892702586.4348 entropy=17.6230 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 39880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426463.2 mean_steps=14.6
|
|
[Episode 39890] reward=-114896900.4 actor_loss=0.2806 critic_loss=143404261586.0513 entropy=17.6238 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 39900] reward=-124550485.9 actor_loss=0.2852 critic_loss=156192285842.2857 entropy=17.6191 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 39900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500669.0 mean_steps=13.4
|
|
[Episode 39910] reward=-120071669.3 actor_loss=0.2850 critic_loss=156450839756.8000 entropy=17.6135 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 39920] reward=-129042390.3 actor_loss=1.8697 critic_loss=339431520337.9200 entropy=17.6153 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-560799.7 mean_steps=14.4
|
|
[Episode 39930] reward=-124586487.9 actor_loss=0.2230 critic_loss=150965665792.0000 entropy=17.6285 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 39940] reward=-119968483.1 actor_loss=0.2779 critic_loss=144289776857.2121 entropy=17.6227 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 39940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-562562.4 mean_steps=14.6
|
|
[Episode 39950] reward=-123211066.4 actor_loss=0.2465 critic_loss=153859445396.6452 entropy=17.6262 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 39960] reward=-118640452.5 actor_loss=0.1872 critic_loss=145265039951.6444 entropy=17.6265 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 39960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-499394.1 mean_steps=16.4
|
|
[Episode 39970] reward=-115247596.6 actor_loss=0.3751 critic_loss=142398346945.4222 entropy=17.6466 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 39980] reward=-122202200.1 actor_loss=0.2932 critic_loss=148264306777.0435 entropy=17.6392 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 39980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479555.0 mean_steps=14.1
|
|
[Episode 39990] reward=-122060616.5 actor_loss=0.3400 critic_loss=150145823175.1111 entropy=17.6361 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 40000] reward=-114916531.0 actor_loss=0.3094 critic_loss=147625805470.8965 entropy=17.6356 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 40000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409942.6 mean_steps=15.8
|
|
[Episode 40010] reward=-121020284.6 actor_loss=0.2977 critic_loss=156645130240.0000 entropy=17.6288 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40020] reward=-121098229.9 actor_loss=0.2354 critic_loss=149002897408.0000 entropy=17.6236 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 40020] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-680173.5 mean_steps=11.6
|
|
[Episode 40030] reward=-123189499.5 actor_loss=0.3283 critic_loss=155672351665.2308 entropy=17.6210 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 40040] reward=-124699903.0 actor_loss=0.3444 critic_loss=154737214220.1905 entropy=17.6331 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 40040] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-638778.2 mean_steps=11.6
|
|
[Episode 40050] reward=-121424845.6 actor_loss=0.2719 critic_loss=149197235712.0000 entropy=17.6194 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 40060] reward=-115995445.1 actor_loss=0.2568 critic_loss=138978382994.2857 entropy=17.6233 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 40060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620710.7 mean_steps=12.3
|
|
[Episode 40070] reward=-121228931.8 actor_loss=0.2577 critic_loss=149286221687.4667 entropy=17.5963 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 40080] reward=-121335897.6 actor_loss=0.2294 critic_loss=149103713393.7778 entropy=17.6075 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 40080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542184.8 mean_steps=14.0
|
|
[Episode 40090] reward=-121231776.4 actor_loss=0.2514 critic_loss=150362456792.1778 entropy=17.6181 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 40100] reward=-121658535.2 actor_loss=0.2935 critic_loss=145244301691.2592 entropy=17.6136 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 40100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421835.5 mean_steps=15.7
|
|
[Episode 40110] reward=-114695906.8 actor_loss=0.4822 critic_loss=143459316349.1555 entropy=17.6147 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1530 front_blocked=0
|
|
[Episode 40120] reward=-116991222.4 actor_loss=0.3847 critic_loss=139512655346.8718 entropy=17.6121 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 40120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-375068.3 mean_steps=15.8
|
|
[Episode 40130] reward=-118125292.6 actor_loss=0.3037 critic_loss=144079272891.7333 entropy=17.6194 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 40140] reward=-114917087.0 actor_loss=0.3729 critic_loss=134106245256.5333 entropy=17.6302 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 40140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561592.7 mean_steps=14.2
|
|
[Episode 40150] reward=-119085370.3 actor_loss=0.2439 critic_loss=147773336312.6857 entropy=17.6290 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 40160] reward=-121041839.8 actor_loss=0.2752 critic_loss=146872311326.1176 entropy=17.6308 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 40160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-538593.3 mean_steps=12.4
|
|
[Episode 40170] reward=-119418287.2 actor_loss=0.2478 critic_loss=145287036459.8857 entropy=17.6358 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 40180] reward=-118606741.4 actor_loss=0.3508 critic_loss=145294049280.0000 entropy=17.6341 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 40180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568741.0 mean_steps=13.7
|
|
[Episode 40190] reward=-121485942.4 actor_loss=0.2822 critic_loss=150912539033.6000 entropy=17.6212 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 40200] reward=-120864414.7 actor_loss=0.3332 critic_loss=151043511030.5185 entropy=17.6170 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 40200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575711.2 mean_steps=13.6
|
|
[Episode 40210] reward=-126550300.4 actor_loss=0.1191 critic_loss=154512407210.6667 entropy=17.6134 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 40220] reward=-113562252.1 actor_loss=0.2844 critic_loss=140095328347.0222 entropy=17.6195 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 40220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531644.6 mean_steps=14.6
|
|
[Episode 40230] reward=-120632867.9 actor_loss=0.3197 critic_loss=148023150182.4000 entropy=17.6164 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 40240] reward=-121412902.5 actor_loss=0.2265 critic_loss=151622003214.6286 entropy=17.6215 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 40240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506432.3 mean_steps=14.2
|
|
[Episode 40250] reward=-121611454.4 actor_loss=0.2454 critic_loss=153922886899.8095 entropy=17.6270 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 40260] reward=-116177318.6 actor_loss=0.2685 critic_loss=145103890793.4118 entropy=17.6179 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 40260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540886.4 mean_steps=13.9
|
|
[Episode 40270] reward=-120180738.5 actor_loss=0.2699 critic_loss=149693099030.7556 entropy=17.6240 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 40280] reward=-113378204.1 actor_loss=0.3479 critic_loss=139010158006.8571 entropy=17.5956 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 40280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546622.8 mean_steps=13.9
|
|
[Episode 40290] reward=-122247564.1 actor_loss=0.2621 critic_loss=153156116289.4884 entropy=17.5817 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40300] reward=-110191428.8 actor_loss=0.2986 critic_loss=131484574068.3636 entropy=17.5711 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 40300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473004.8 mean_steps=15.2
|
|
[Episode 40310] reward=-121926640.2 actor_loss=0.3304 critic_loss=150783129510.9565 entropy=17.5703 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 40320] reward=-115506609.0 actor_loss=0.2454 critic_loss=143918352933.4634 entropy=17.5644 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 40320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-652043.4 mean_steps=12.3
|
|
[Episode 40330] reward=-122156834.3 actor_loss=0.2794 critic_loss=151680525562.3111 entropy=17.5681 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40340] reward=-118294022.6 actor_loss=0.2124 critic_loss=148648301909.3333 entropy=17.5644 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 40340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537054.4 mean_steps=13.3
|
|
[Episode 40350] reward=-121888146.1 actor_loss=0.3674 critic_loss=472679532633.0435 entropy=17.5581 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 40360] reward=-108952951.2 actor_loss=0.4801 critic_loss=130815953214.5778 entropy=17.5491 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 40360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490794.9 mean_steps=14.2
|
|
[Episode 40370] reward=-120027317.4 actor_loss=0.3850 critic_loss=151278901172.1482 entropy=17.5630 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 40380] reward=-119786418.0 actor_loss=0.2350 critic_loss=161492532224.0000 entropy=17.5609 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 40380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386484.7 mean_steps=16.5
|
|
[Episode 40390] reward=-119367375.5 actor_loss=0.2440 critic_loss=145351237996.0889 entropy=17.5688 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 40400] reward=-120123189.3 actor_loss=0.2393 critic_loss=146778574723.8788 entropy=17.5772 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 40400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498105.1 mean_steps=14.1
|
|
[Episode 40410] reward=-113884113.8 actor_loss=0.3575 critic_loss=138635480268.8000 entropy=17.5739 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 40420] reward=-117857539.0 actor_loss=0.3205 critic_loss=148423314016.8649 entropy=17.5642 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 40420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-454303.3 mean_steps=14.2
|
|
[Episode 40430] reward=-115369285.4 actor_loss=0.3134 critic_loss=139326878011.0769 entropy=17.5572 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 40440] reward=-120139801.6 actor_loss=0.1659 critic_loss=146202368000.0000 entropy=17.5716 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 40440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508101.0 mean_steps=14.2
|
|
[Episode 40450] reward=-123153788.9 actor_loss=0.2474 critic_loss=158265279244.1905 entropy=17.5715 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 40460] reward=-117900612.2 actor_loss=0.3771 critic_loss=145627392682.6667 entropy=17.5720 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 40460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-645207.9 mean_steps=13.8
|
|
[Episode 40470] reward=-114798631.3 actor_loss=0.2748 critic_loss=146086842368.0000 entropy=17.5734 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 40480] reward=-120634607.5 actor_loss=0.2907 critic_loss=146739161770.6667 entropy=17.5708 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 40480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522677.1 mean_steps=14.2
|
|
[Episode 40490] reward=-120319010.0 actor_loss=0.3474 critic_loss=152514025026.7826 entropy=17.5696 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 40500] reward=-120435128.3 actor_loss=0.2989 critic_loss=145694646587.0769 entropy=17.5704 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 40500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571444.5 mean_steps=13.7
|
|
[Episode 40510] reward=-116659186.3 actor_loss=0.3261 critic_loss=143020699921.0667 entropy=17.5754 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 40520] reward=-121223860.7 actor_loss=0.3319 critic_loss=151464049394.5263 entropy=17.5948 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 40520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554752.5 mean_steps=12.7
|
|
[Episode 40530] reward=-115232214.6 actor_loss=0.3400 critic_loss=147694311131.4286 entropy=17.5980 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 40540] reward=-118308154.6 actor_loss=0.4068 critic_loss=143509509649.6552 entropy=17.5977 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 40540] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-672969.7 mean_steps=11.4
|
|
[Episode 40550] reward=-122954962.9 actor_loss=0.2181 critic_loss=150731832083.6923 entropy=17.5964 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 40560] reward=-117213824.2 actor_loss=0.2281 critic_loss=142783653478.4000 entropy=17.5762 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 40560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-527818.2 mean_steps=11.9
|
|
[Episode 40570] reward=-119471296.0 actor_loss=0.2921 critic_loss=148818467693.7143 entropy=17.5854 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 40580] reward=-117072967.0 actor_loss=0.3078 critic_loss=139859826635.4872 entropy=17.5913 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 40580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-558293.4 mean_steps=14.4
|
|
[Episode 40590] reward=-121378921.4 actor_loss=0.2841 critic_loss=150296662439.7242 entropy=17.5942 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 40600] reward=-121488085.7 actor_loss=0.2499 critic_loss=144898687180.8000 entropy=17.6068 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 40600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503839.4 mean_steps=14.3
|
|
[Episode 40610] reward=-121762647.7 actor_loss=0.2404 critic_loss=145804952399.4483 entropy=17.6048 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 40620] reward=-124607001.2 actor_loss=0.2735 critic_loss=153087328559.4074 entropy=17.5937 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 40620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438881.7 mean_steps=14.8
|
|
[Episode 40630] reward=-122068328.9 actor_loss=0.3340 critic_loss=149800394436.9231 entropy=17.5936 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 40640] reward=-118040814.9 actor_loss=0.3088 critic_loss=164305183151.1579 entropy=17.6033 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 40640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497904.4 mean_steps=14.1
|
|
[Episode 40650] reward=-111785296.5 actor_loss=0.3517 critic_loss=133815203430.4000 entropy=17.6046 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 40660] reward=-116561272.8 actor_loss=0.3783 critic_loss=143964461738.6667 entropy=17.6179 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 40660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408865.9 mean_steps=15.5
|
|
[Episode 40670] reward=-123314322.7 actor_loss=0.2433 critic_loss=156202864772.1290 entropy=17.6283 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 40680] reward=-109956124.0 actor_loss=0.3672 critic_loss=146987289048.6154 entropy=17.6302 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 40680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-628527.5 mean_steps=12.7
|
|
[Episode 40690] reward=-119649201.8 actor_loss=0.2627 critic_loss=147383187269.8182 entropy=17.6289 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40700] reward=-123151590.4 actor_loss=0.3258 critic_loss=150447521011.8095 entropy=17.6316 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 40700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-476847.4 mean_steps=15.8
|
|
[Episode 40710] reward=-121648624.1 actor_loss=0.2357 critic_loss=150688576605.0909 entropy=17.6303 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 40720] reward=-124488088.6 actor_loss=0.2870 critic_loss=158133368030.6087 entropy=17.6235 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 40720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515033.7 mean_steps=14.4
|
|
[Episode 40730] reward=-117011128.6 actor_loss=0.2532 critic_loss=141001809139.8095 entropy=17.6226 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 40740] reward=-111950162.4 actor_loss=0.4058 critic_loss=143680070494.3158 entropy=17.6261 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 40740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462513.3 mean_steps=14.6
|
|
[Episode 40750] reward=-115493159.9 actor_loss=0.3930 critic_loss=138908558222.2222 entropy=17.6224 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 40760] reward=-115122481.8 actor_loss=0.2967 critic_loss=143180813365.8947 entropy=17.5986 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 40760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469754.3 mean_steps=15.1
|
|
[Episode 40770] reward=-120723043.8 actor_loss=0.2799 critic_loss=148588875069.7931 entropy=17.5993 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40780] reward=-112940752.5 actor_loss=0.2863 critic_loss=149872403342.2222 entropy=17.6049 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 40780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555956.1 mean_steps=12.7
|
|
[Episode 40790] reward=-121561069.2 actor_loss=0.2637 critic_loss=148107597329.6552 entropy=17.6061 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40800] reward=-126252725.2 actor_loss=0.2658 critic_loss=160273047005.8667 entropy=17.6097 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 40800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-400368.6 mean_steps=14.7
|
|
[Episode 40810] reward=-114881398.5 actor_loss=0.3534 critic_loss=140822721604.2667 entropy=17.6076 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 40820] reward=-119333368.5 actor_loss=0.2814 critic_loss=145963278080.0000 entropy=17.6146 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 40820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532368.7 mean_steps=13.2
|
|
[Episode 40830] reward=-113441028.1 actor_loss=0.4097 critic_loss=136415214652.2353 entropy=17.6029 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 40840] reward=-119227945.4 actor_loss=0.3168 critic_loss=145693941369.9048 entropy=17.5870 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 40840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423456.4 mean_steps=16.8
|
|
[Episode 40850] reward=-125554836.1 actor_loss=0.2040 critic_loss=159312243097.6000 entropy=17.5902 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 40860] reward=-120437167.6 actor_loss=0.2465 critic_loss=150570857267.2000 entropy=17.5781 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 40860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-419482.8 mean_steps=16.4
|
|
[Episode 40870] reward=-125636983.3 actor_loss=0.1801 critic_loss=163078228811.2941 entropy=17.5783 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 40880] reward=-122817593.9 actor_loss=0.2175 critic_loss=209114440310.1538 entropy=17.5864 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 40880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465237.7 mean_steps=15.1
|
|
[Episode 40890] reward=-115693230.5 actor_loss=0.3443 critic_loss=148269980135.6190 entropy=17.5876 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 40900] reward=-121533910.8 actor_loss=0.3457 critic_loss=150565479310.2222 entropy=17.5858 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 40900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516365.3 mean_steps=15.2
|
|
[Episode 40910] reward=-121444870.4 actor_loss=0.2897 critic_loss=150475823331.5555 entropy=17.5946 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 40920] reward=-120371686.1 actor_loss=0.2761 critic_loss=150842714794.6667 entropy=17.5861 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 40920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528801.8 mean_steps=13.2
|
|
[Episode 40930] reward=-119294326.5 actor_loss=0.3298 critic_loss=156681397394.2857 entropy=17.5714 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 40940] reward=-115027388.5 actor_loss=0.3305 critic_loss=140202050796.3077 entropy=17.5715 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 40940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544994.9 mean_steps=14.2
|
|
[Episode 40950] reward=-124019175.6 actor_loss=0.3229 critic_loss=157537020586.6667 entropy=17.5629 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 40960] reward=-114546407.2 actor_loss=0.3033 critic_loss=143189847244.8000 entropy=17.5578 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 40960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546571.5 mean_steps=13.8
|
|
[Episode 40970] reward=-120252862.1 actor_loss=0.2362 critic_loss=150377664512.0000 entropy=17.5672 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 40980] reward=-119064638.2 actor_loss=0.1896 critic_loss=151888317644.8000 entropy=17.5631 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 40980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-633083.3 mean_steps=12.1
|
|
[Episode 40990] reward=-117001620.6 actor_loss=0.4343 critic_loss=149194799427.3684 entropy=17.5677 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 41000] reward=-117441587.3 actor_loss=0.2620 critic_loss=143135047962.4828 entropy=17.5710 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 41000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553685.5 mean_steps=13.1
|
|
[Episode 41010] reward=-119128795.0 actor_loss=0.3249 critic_loss=148618211913.1429 entropy=17.5678 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 41020] reward=-122002346.7 actor_loss=0.3014 critic_loss=151183854592.0000 entropy=17.5761 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 41020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575932.5 mean_steps=13.7
|
|
[Episode 41030] reward=-119211588.5 actor_loss=0.3415 critic_loss=146637644024.2424 entropy=17.5784 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 41040] reward=-116102693.5 actor_loss=0.3089 critic_loss=146566700311.2727 entropy=17.5852 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 41040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517407.7 mean_steps=14.1
|
|
[Episode 41050] reward=-118496392.7 actor_loss=0.2952 critic_loss=144513443430.4000 entropy=17.5778 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 41060] reward=-112462490.9 actor_loss=0.2858 critic_loss=142760199782.4000 entropy=17.5733 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 41060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416542.5 mean_steps=14.5
|
|
[Episode 41070] reward=-113135994.6 actor_loss=0.3581 critic_loss=140926093642.3226 entropy=17.5738 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 41080] reward=-116122299.7 actor_loss=0.3341 critic_loss=139120696599.2727 entropy=17.5738 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468400.0 mean_steps=14.9
|
|
[Episode 41090] reward=-119353658.6 actor_loss=0.2791 critic_loss=146298807364.2667 entropy=17.5876 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 41100] reward=-112809297.9 actor_loss=0.3515 critic_loss=142570997005.4737 entropy=17.5907 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 41100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409163.4 mean_steps=15.2
|
|
[Episode 41110] reward=-114544285.7 actor_loss=0.2551 critic_loss=138994014071.4667 entropy=17.5912 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 41120] reward=-119873381.4 actor_loss=0.2550 critic_loss=149768608153.6000 entropy=17.5766 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 41120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616520.1 mean_steps=12.5
|
|
[Episode 41130] reward=-116956599.0 actor_loss=0.3347 critic_loss=143958940876.8000 entropy=17.5722 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 41140] reward=-120462467.3 actor_loss=0.3197 critic_loss=152720587776.0000 entropy=17.5777 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 41140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-429910.8 mean_steps=16.2
|
|
[Episode 41150] reward=-116156522.6 actor_loss=0.3214 critic_loss=141967469772.8000 entropy=17.5690 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 41160] reward=-122217946.8 actor_loss=0.2047 critic_loss=176078427750.4000 entropy=17.5697 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 41160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500722.2 mean_steps=14.2
|
|
[Episode 41170] reward=-114819108.8 actor_loss=0.2639 critic_loss=145139343360.0000 entropy=17.5960 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 41180] reward=-113294716.1 actor_loss=0.1871 critic_loss=131342389156.9778 entropy=17.5974 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 41180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460339.0 mean_steps=13.7
|
|
[Episode 41190] reward=-117002989.5 actor_loss=0.3164 critic_loss=149984744009.1429 entropy=17.5848 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 41200] reward=-121281607.2 actor_loss=0.3046 critic_loss=149380846933.3333 entropy=17.5811 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 41200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-646331.4 mean_steps=12.3
|
|
[Episode 41210] reward=-118227993.9 actor_loss=0.2974 critic_loss=143765541355.5200 entropy=17.5752 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 41220] reward=-120669075.3 actor_loss=0.3004 critic_loss=151669013845.3333 entropy=17.5760 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 41220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-355368.4 mean_steps=16.1
|
|
[Episode 41230] reward=-110498984.9 actor_loss=0.3409 critic_loss=132714997760.0000 entropy=17.5840 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 41240] reward=-119268419.4 actor_loss=0.1889 critic_loss=144350261248.0000 entropy=17.5842 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 41240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589111.0 mean_steps=13.8
|
|
[Episode 41250] reward=-120420924.0 actor_loss=0.2315 critic_loss=147261438138.1818 entropy=17.5818 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 41260] reward=-117596781.7 actor_loss=0.3834 critic_loss=139658147612.4445 entropy=17.5849 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 41260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-352352.6 mean_steps=17.1
|
|
[Episode 41270] reward=-115183324.9 actor_loss=0.2116 critic_loss=146332524544.0000 entropy=17.5927 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 41280] reward=-121163116.1 actor_loss=0.4084 critic_loss=152487961486.2222 entropy=17.5873 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 41280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581147.7 mean_steps=13.9
|
|
[Episode 41290] reward=-123637512.1 actor_loss=0.2431 critic_loss=156876070001.7778 entropy=17.5860 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 41300] reward=-117138967.8 actor_loss=0.2862 critic_loss=149784671894.5882 entropy=17.5812 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 41300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544760.4 mean_steps=14.7
|
|
[Episode 41310] reward=-115362492.7 actor_loss=0.2025 critic_loss=141847470942.3158 entropy=17.5706 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 41320] reward=-118991408.6 actor_loss=0.2404 critic_loss=145454211072.0000 entropy=17.5663 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 41320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-311836.9 mean_steps=15.8
|
|
[Episode 41330] reward=-121915124.7 actor_loss=0.3057 critic_loss=152061636152.8889 entropy=17.5778 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 41340] reward=-120814355.4 actor_loss=0.2518 critic_loss=149846343680.0000 entropy=17.5826 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 41340] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-280067.7 mean_steps=18.8
|
|
[Episode 41350] reward=-115479282.0 actor_loss=0.3717 critic_loss=147768790220.8000 entropy=17.5805 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 41360] reward=-121254335.0 actor_loss=0.2373 critic_loss=176617699409.9200 entropy=17.5748 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 41360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622837.6 mean_steps=11.9
|
|
[Episode 41370] reward=-115096312.7 actor_loss=0.3183 critic_loss=145453844322.4615 entropy=17.5825 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 41380] reward=-113082118.2 actor_loss=0.3187 critic_loss=135837038136.8889 entropy=17.5951 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 41380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496444.0 mean_steps=14.2
|
|
[Episode 41390] reward=-123456699.7 actor_loss=0.3183 critic_loss=151447958674.2857 entropy=17.5779 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 41400] reward=-121747833.9 actor_loss=0.3216 critic_loss=156116010449.4546 entropy=17.5928 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549258.8 mean_steps=13.3
|
|
[Episode 41410] reward=-120457028.2 actor_loss=0.2969 critic_loss=146977553703.8222 entropy=17.5820 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 41420] reward=-118392303.1 actor_loss=0.2874 critic_loss=144550736607.1795 entropy=17.5767 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 41420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522799.3 mean_steps=13.7
|
|
[Episode 41430] reward=-119619260.3 actor_loss=0.1723 critic_loss=145271491032.6154 entropy=17.5884 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 41440] reward=-118924921.9 actor_loss=0.2496 critic_loss=143682116539.7333 entropy=17.5865 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 41440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-530543.7 mean_steps=15.2
|
|
[Episode 41450] reward=-113898747.6 actor_loss=0.2688 critic_loss=140688430609.6552 entropy=17.5947 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 41460] reward=-122117569.2 actor_loss=0.2525 critic_loss=152175225162.3226 entropy=17.5895 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 41460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389175.5 mean_steps=16.4
|
|
[Episode 41470] reward=-120757893.8 actor_loss=0.2915 critic_loss=151685854373.1613 entropy=17.5838 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 41480] reward=-119860245.3 actor_loss=0.2358 critic_loss=154597199708.1600 entropy=17.5781 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 41480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423062.3 mean_steps=14.5
|
|
[Episode 41490] reward=-116014925.6 actor_loss=0.3941 critic_loss=134364159453.8667 entropy=17.5818 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 41500] reward=-116701820.8 actor_loss=0.2901 critic_loss=144215641115.6757 entropy=17.5864 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 41500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517342.1 mean_steps=15.2
|
|
[Episode 41510] reward=-120547853.4 actor_loss=0.2424 critic_loss=146931345544.5333 entropy=17.5904 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 41520] reward=-116519038.3 actor_loss=0.2983 critic_loss=144174893832.8276 entropy=17.5940 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 41520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552401.0 mean_steps=14.4
|
|
[Episode 41530] reward=-116081644.8 actor_loss=0.3163 critic_loss=140057425165.4737 entropy=17.5770 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 41540] reward=-114518683.9 actor_loss=0.3339 critic_loss=140650590208.0000 entropy=17.5650 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 41540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-507755.0 mean_steps=15.3
|
|
[Episode 41550] reward=-121281115.8 actor_loss=0.2281 critic_loss=148291823348.8696 entropy=17.5797 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 41560] reward=-119834884.4 actor_loss=0.3108 critic_loss=147737199528.2286 entropy=17.5791 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 41560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571820.5 mean_steps=13.6
|
|
[Episode 41570] reward=-119339218.7 actor_loss=0.3087 critic_loss=143225566003.2000 entropy=17.5881 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 41580] reward=-121166635.2 actor_loss=0.3156 critic_loss=147587727360.0000 entropy=17.6012 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 41580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-612135.0 mean_steps=13.6
|
|
[Episode 41590] reward=-121790531.9 actor_loss=0.2024 critic_loss=145460922525.5385 entropy=17.5891 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 41600] reward=-123734243.6 actor_loss=0.2828 critic_loss=155895095808.0000 entropy=17.6083 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476543.9 mean_steps=14.9
|
|
[Episode 41610] reward=-121509158.3 actor_loss=0.2333 critic_loss=149755853358.5454 entropy=17.6123 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 41620] reward=-120873643.8 actor_loss=0.3960 critic_loss=152082478495.1351 entropy=17.6100 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 41620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582400.5 mean_steps=12.5
|
|
[Episode 41630] reward=-120310562.5 actor_loss=0.3868 critic_loss=145453889536.0000 entropy=17.6060 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 41640] reward=-118990431.9 actor_loss=0.2342 critic_loss=142165015315.6923 entropy=17.6009 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 41640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518066.2 mean_steps=13.2
|
|
[Episode 41650] reward=-119616192.5 actor_loss=0.2194 critic_loss=146809125487.3044 entropy=17.6086 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 41660] reward=-120678039.6 actor_loss=0.3010 critic_loss=146978460233.1429 entropy=17.6115 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 41660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502942.1 mean_steps=14.0
|
|
[Episode 41670] reward=-119567475.4 actor_loss=0.2938 critic_loss=149984627489.3913 entropy=17.6197 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 41680] reward=-117746882.1 actor_loss=0.2522 critic_loss=152883984022.5882 entropy=17.6115 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 41680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510700.4 mean_steps=13.3
|
|
[Episode 41690] reward=-123748163.0 actor_loss=0.2659 critic_loss=148048284120.6154 entropy=17.6112 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 41700] reward=-114216827.5 actor_loss=0.4122 critic_loss=147086519828.4800 entropy=17.6035 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 41700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527423.3 mean_steps=14.4
|
|
[Episode 41710] reward=-120350145.5 actor_loss=0.3248 critic_loss=144148645806.0800 entropy=17.6181 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 41720] reward=-117988161.1 actor_loss=0.2910 critic_loss=142894817490.0513 entropy=17.6244 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 41720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476340.0 mean_steps=14.6
|
|
[Episode 41730] reward=-117471883.4 actor_loss=0.4161 critic_loss=146443646789.8182 entropy=17.6320 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 41740] reward=-117891826.6 actor_loss=0.3324 critic_loss=147296352665.6000 entropy=17.6419 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467724.8 mean_steps=13.5
|
|
[Episode 41750] reward=-122547079.4 actor_loss=0.3050 critic_loss=148488899154.5807 entropy=17.6534 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 41760] reward=-121209415.2 actor_loss=0.2805 critic_loss=153677927453.2571 entropy=17.6413 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 41760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573257.5 mean_steps=13.5
|
|
[Episode 41770] reward=-119957411.5 actor_loss=0.2820 critic_loss=142699878324.1482 entropy=17.6422 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 41780] reward=-121862746.4 actor_loss=0.2291 critic_loss=148345279829.3333 entropy=17.6329 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 41780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493394.8 mean_steps=14.0
|
|
[Episode 41790] reward=-111677013.6 actor_loss=0.2605 critic_loss=136369195331.3684 entropy=17.6253 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 41800] reward=-117909289.1 actor_loss=0.3277 critic_loss=143887405465.6000 entropy=17.6207 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409889.2 mean_steps=16.6
|
|
[Episode 41810] reward=-121844603.4 actor_loss=0.2824 critic_loss=147680393746.9630 entropy=17.6065 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 41820] reward=-115919289.6 actor_loss=0.3647 critic_loss=142303081338.4348 entropy=17.6081 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 41820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492036.9 mean_steps=14.3
|
|
[Episode 41830] reward=-116805928.5 actor_loss=0.3676 critic_loss=143954567623.1111 entropy=17.6082 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 41840] reward=-121929148.1 actor_loss=0.2598 critic_loss=150830888525.5757 entropy=17.5857 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555971.4 mean_steps=13.6
|
|
[Episode 41850] reward=-115986796.5 actor_loss=0.2095 critic_loss=139471987745.0323 entropy=17.5887 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 41860] reward=-118086056.3 actor_loss=0.3399 critic_loss=142549561461.0286 entropy=17.5922 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 41860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552761.5 mean_steps=13.9
|
|
[Episode 41870] reward=-119353765.2 actor_loss=0.3432 critic_loss=146886186037.8947 entropy=17.5804 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 41880] reward=-121179441.7 actor_loss=0.3138 critic_loss=149462356690.8235 entropy=17.5872 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 41880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428434.4 mean_steps=15.6
|
|
[Episode 41890] reward=-120402563.6 actor_loss=0.3334 critic_loss=148285736487.3846 entropy=17.5856 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 41900] reward=-114525729.1 actor_loss=0.3267 critic_loss=134937967859.8095 entropy=17.5930 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 41900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532043.3 mean_steps=13.6
|
|
[Episode 41910] reward=-116886196.7 actor_loss=0.3325 critic_loss=142955124578.4615 entropy=17.5948 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 41920] reward=-120733237.1 actor_loss=0.3493 critic_loss=148550651318.8571 entropy=17.6115 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 41920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483772.4 mean_steps=14.2
|
|
[Episode 41930] reward=-118318138.6 actor_loss=0.3514 critic_loss=147274307811.5555 entropy=17.6077 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 41940] reward=-120240384.5 actor_loss=0.3367 critic_loss=142183402761.4815 entropy=17.6097 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 41940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-574633.6 mean_steps=12.1
|
|
[Episode 41950] reward=-120543994.9 actor_loss=0.2926 critic_loss=147789571868.4445 entropy=17.6049 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 41960] reward=-118235975.1 actor_loss=0.1776 critic_loss=141783988451.5555 entropy=17.6079 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 41960] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-287805.1 mean_steps=17.9
|
|
[Episode 41970] reward=-121656801.7 actor_loss=0.2834 critic_loss=149366448310.0444 entropy=17.6094 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 41980] reward=-120392710.8 actor_loss=0.2113 critic_loss=146545141561.8065 entropy=17.6155 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 41980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528611.4 mean_steps=14.3
|
|
[Episode 41990] reward=-118747326.7 actor_loss=0.2885 critic_loss=145776011709.2174 entropy=17.6075 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 42000] reward=-121153261.5 actor_loss=0.2855 critic_loss=145088823296.0000 entropy=17.6234 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 42000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578739.6 mean_steps=12.7
|
|
[Episode 42010] reward=-120228715.1 actor_loss=0.3370 critic_loss=149936998692.5714 entropy=17.6223 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 42020] reward=-120610331.8 actor_loss=0.3561 critic_loss=145067140517.6471 entropy=17.6475 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 42020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596252.7 mean_steps=13.6
|
|
[Episode 42030] reward=-118330232.2 actor_loss=0.3285 critic_loss=140369567516.4445 entropy=17.6493 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 42040] reward=-121406355.6 actor_loss=0.3607 critic_loss=145419352291.5555 entropy=17.6365 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 42040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528185.2 mean_steps=14.1
|
|
[Episode 42050] reward=-117495276.1 actor_loss=0.3205 critic_loss=150293373474.1333 entropy=17.6250 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 42060] reward=-123359115.2 actor_loss=0.1725 critic_loss=155917287424.0000 entropy=17.6326 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 42060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-281470.4 mean_steps=16.6
|
|
[Episode 42070] reward=-120376795.4 actor_loss=0.2627 critic_loss=144777073012.3636 entropy=17.6282 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 42080] reward=-119026677.8 actor_loss=0.2569 critic_loss=144734839808.0000 entropy=17.6237 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 42080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519322.3 mean_steps=14.4
|
|
[Episode 42090] reward=-115866102.1 actor_loss=0.3717 critic_loss=137420922880.0000 entropy=17.6230 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 42100] reward=-118403671.1 actor_loss=0.2284 critic_loss=146909707507.8095 entropy=17.6195 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 42100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525085.6 mean_steps=14.5
|
|
[Episode 42110] reward=-113745651.4 actor_loss=0.3663 critic_loss=133772023222.8571 entropy=17.6189 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 42120] reward=-124479694.0 actor_loss=0.2740 critic_loss=160025223168.0000 entropy=17.6174 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 42120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502466.2 mean_steps=15.1
|
|
[Episode 42130] reward=-121878498.5 actor_loss=0.3176 critic_loss=169487097856.0000 entropy=17.6202 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42140] reward=-122692774.0 actor_loss=0.2902 critic_loss=148510753353.1429 entropy=17.6260 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 42140] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-704243.3 mean_steps=10.8
|
|
[Episode 42150] reward=-122687290.9 actor_loss=0.2777 critic_loss=146999852714.6667 entropy=17.6443 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42160] reward=-123467459.9 actor_loss=0.2887 critic_loss=153297216512.0000 entropy=17.6438 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 42160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-664619.8 mean_steps=12.2
|
|
[Episode 42170] reward=-119010595.2 actor_loss=0.2841 critic_loss=152378318848.0000 entropy=17.6475 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 42180] reward=-116662784.2 actor_loss=0.2023 critic_loss=201763772974.5454 entropy=17.6461 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 42180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-602006.1 mean_steps=13.8
|
|
[Episode 42190] reward=-118497669.2 actor_loss=0.3624 critic_loss=144417411287.5789 entropy=17.6386 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 42200] reward=-118131767.3 actor_loss=0.2756 critic_loss=140445063213.5111 entropy=17.6156 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 42200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489807.4 mean_steps=14.2
|
|
[Episode 42210] reward=-120777585.3 actor_loss=0.1924 critic_loss=146668001962.6667 entropy=17.6261 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 42220] reward=-121432608.9 actor_loss=0.3663 critic_loss=149398509041.3714 entropy=17.6252 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 42220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-633105.4 mean_steps=13.3
|
|
[Episode 42230] reward=-115324007.9 actor_loss=0.2138 critic_loss=140152778260.4800 entropy=17.6254 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 42240] reward=-117783728.5 actor_loss=0.3208 critic_loss=149118924117.3333 entropy=17.6299 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 42240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490210.9 mean_steps=14.1
|
|
[Episode 42250] reward=-119411187.4 actor_loss=0.2567 critic_loss=144277436825.6000 entropy=17.6138 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 42260] reward=-121788310.7 actor_loss=0.2101 critic_loss=150095148889.9460 entropy=17.6168 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 42260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567128.0 mean_steps=13.8
|
|
[Episode 42270] reward=-122882379.7 actor_loss=0.2621 critic_loss=150740721232.8421 entropy=17.6187 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 42280] reward=-125936842.5 actor_loss=0.2490 critic_loss=152774909561.9048 entropy=17.6131 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 42280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443132.4 mean_steps=14.5
|
|
[Episode 42290] reward=-120527940.9 actor_loss=0.3394 critic_loss=145221692757.3333 entropy=17.6163 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 42300] reward=-121034536.5 actor_loss=0.3727 critic_loss=144426992058.8108 entropy=17.6073 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 42300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-703778.0 mean_steps=13.8
|
|
[Episode 42310] reward=-117229460.7 actor_loss=0.1703 critic_loss=143225439325.0909 entropy=17.6125 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 42320] reward=-116229133.7 actor_loss=0.3143 critic_loss=139444516337.3714 entropy=17.6178 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 42320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489047.8 mean_steps=15.0
|
|
[Episode 42330] reward=-120692073.3 actor_loss=0.2619 critic_loss=143039442571.6364 entropy=17.6106 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 42340] reward=-118104292.0 actor_loss=0.2697 critic_loss=142713902031.2381 entropy=17.6038 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 42340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-639241.4 mean_steps=13.1
|
|
[Episode 42350] reward=-114931627.6 actor_loss=0.3192 critic_loss=136533690686.5778 entropy=17.6030 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 42360] reward=-127225162.8 actor_loss=0.2044 critic_loss=155774640128.0000 entropy=17.5932 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512581.9 mean_steps=14.1
|
|
[Episode 42370] reward=-113755922.0 actor_loss=0.2865 critic_loss=134725561088.0000 entropy=17.5926 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 42380] reward=-117594612.6 actor_loss=0.3073 critic_loss=144594101799.3846 entropy=17.5846 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 42380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489571.2 mean_steps=14.0
|
|
[Episode 42390] reward=-111497335.7 actor_loss=0.2847 critic_loss=139593141899.6364 entropy=17.5959 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 42400] reward=-118968624.7 actor_loss=0.2520 critic_loss=144237773917.0909 entropy=17.6119 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 42400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-380162.2 mean_steps=15.2
|
|
[Episode 42410] reward=-117631452.5 actor_loss=0.3059 critic_loss=141858351891.6923 entropy=17.6126 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 42420] reward=-114289436.1 actor_loss=0.3182 critic_loss=184980048164.5714 entropy=17.6238 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-589395.6 mean_steps=14.7
|
|
[Episode 42430] reward=-120291589.0 actor_loss=0.2135 critic_loss=142996181937.2308 entropy=17.6119 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 42440] reward=-119672474.2 actor_loss=0.2792 critic_loss=144961772050.9630 entropy=17.6132 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 42440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591484.3 mean_steps=13.6
|
|
[Episode 42450] reward=-113874268.4 actor_loss=0.2962 critic_loss=135545010468.5714 entropy=17.6186 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 42460] reward=-119437208.0 actor_loss=0.3227 critic_loss=141430860276.6222 entropy=17.6114 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 42460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521562.1 mean_steps=15.2
|
|
[Episode 42470] reward=-122328863.3 actor_loss=0.3043 critic_loss=149379573760.0000 entropy=17.6143 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 42480] reward=-114526381.7 actor_loss=0.3083 critic_loss=154330261346.4615 entropy=17.6097 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 42480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543822.8 mean_steps=13.7
|
|
[Episode 42490] reward=-122380874.8 actor_loss=0.2138 critic_loss=173522904003.7647 entropy=17.6192 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 42500] reward=-113491293.6 actor_loss=0.4019 critic_loss=136549176040.7273 entropy=17.6123 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 42500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521110.4 mean_steps=14.4
|
|
[Episode 42510] reward=-126994019.4 actor_loss=0.2809 critic_loss=1059351731593.8462 entropy=17.6159 approx_kl=0.0041 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 42520] reward=-122742748.5 actor_loss=0.3037 critic_loss=148361509187.3684 entropy=17.6152 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 42520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521997.9 mean_steps=14.3
|
|
[Episode 42530] reward=-119908394.0 actor_loss=0.2477 critic_loss=144834153835.3548 entropy=17.6157 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 42540] reward=-118937117.2 actor_loss=0.3683 critic_loss=146667593142.8571 entropy=17.6073 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 42540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524952.7 mean_steps=13.5
|
|
[Episode 42550] reward=-112070924.2 actor_loss=0.2923 critic_loss=133709504184.3200 entropy=17.6112 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 42560] reward=-118713252.6 actor_loss=0.2241 critic_loss=150452203760.9412 entropy=17.6155 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 42560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430251.4 mean_steps=15.8
|
|
[Episode 42570] reward=-115036121.2 actor_loss=0.3679 critic_loss=148200432786.2857 entropy=17.6253 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 42580] reward=-126485997.3 actor_loss=0.2285 critic_loss=159878300194.1333 entropy=17.6135 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 42580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450642.9 mean_steps=15.1
|
|
[Episode 42590] reward=-118030877.5 actor_loss=0.3208 critic_loss=146740730993.7778 entropy=17.6161 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 42600] reward=-121057372.4 actor_loss=0.2628 critic_loss=148032630925.2414 entropy=17.6104 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 42600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-305095.7 mean_steps=16.9
|
|
[Episode 42610] reward=-121211165.5 actor_loss=0.2808 critic_loss=151820922197.3333 entropy=17.6189 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 42620] reward=-133434455.9 actor_loss=0.2574 critic_loss=723532080742.4000 entropy=17.6280 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-348153.3 mean_steps=16.2
|
|
[Episode 42630] reward=-130741476.7 actor_loss=0.3160 critic_loss=713710245205.3334 entropy=17.6230 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 42640] reward=-119547437.6 actor_loss=0.3279 critic_loss=146228194417.7778 entropy=17.6084 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 42640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464154.4 mean_steps=16.1
|
|
[Episode 42650] reward=-117308221.1 actor_loss=0.3478 critic_loss=136050840189.1555 entropy=17.6187 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 42660] reward=-113731258.8 actor_loss=0.3551 critic_loss=140610558361.6000 entropy=17.6154 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 42660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468408.2 mean_steps=15.1
|
|
[Episode 42670] reward=-121580027.1 actor_loss=0.3124 critic_loss=151432900803.0476 entropy=17.6209 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42680] reward=-121530460.7 actor_loss=0.3059 critic_loss=171490594542.9333 entropy=17.6247 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 42680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-473795.9 mean_steps=13.7
|
|
[Episode 42690] reward=-119053012.0 actor_loss=0.3156 critic_loss=148222706192.5161 entropy=17.6279 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 42700] reward=-118099136.2 actor_loss=0.2650 critic_loss=146235504867.5555 entropy=17.6349 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-615828.7 mean_steps=14.0
|
|
[Episode 42710] reward=-124287279.5 actor_loss=0.1461 critic_loss=148270743161.9048 entropy=17.6370 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 42720] reward=-119574244.5 actor_loss=0.3395 critic_loss=163870084437.3333 entropy=17.6415 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 42720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-540989.6 mean_steps=12.8
|
|
[Episode 42730] reward=-117538134.8 actor_loss=0.2068 critic_loss=139095791802.1818 entropy=17.6363 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 42740] reward=-123055471.8 actor_loss=0.2582 critic_loss=267948488411.4286 entropy=17.6329 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 42740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386574.3 mean_steps=16.4
|
|
[Episode 42750] reward=-122334924.9 actor_loss=0.2603 critic_loss=177134074479.3044 entropy=17.6275 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 42760] reward=-124227363.3 actor_loss=0.2798 critic_loss=169737414332.6316 entropy=17.6186 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 42760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478100.2 mean_steps=14.9
|
|
[Episode 42770] reward=-124637654.5 actor_loss=0.2559 critic_loss=155959180947.9111 entropy=17.6318 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 42780] reward=-119169152.8 actor_loss=0.2827 critic_loss=143203625779.2000 entropy=17.6250 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 42780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576779.7 mean_steps=13.5
|
|
[Episode 42790] reward=-115452476.2 actor_loss=0.3401 critic_loss=141294015692.8000 entropy=17.6137 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 42800] reward=-111447282.9 actor_loss=0.2865 critic_loss=135597847732.7059 entropy=17.6145 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-426602.0 mean_steps=17.2
|
|
[Episode 42810] reward=-119006172.4 actor_loss=0.1402 critic_loss=143802790083.0476 entropy=17.6011 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 42820] reward=-122651643.6 actor_loss=0.2349 critic_loss=159031358805.3333 entropy=17.5928 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 42820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443669.0 mean_steps=14.9
|
|
[Episode 42830] reward=-121076155.8 actor_loss=0.2588 critic_loss=147641475072.0000 entropy=17.6022 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 42840] reward=-126481708.0 actor_loss=0.2799 critic_loss=229022090854.4000 entropy=17.5876 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461516.7 mean_steps=14.3
|
|
[Episode 42850] reward=-115774080.8 actor_loss=0.4229 critic_loss=136740225987.7647 entropy=17.5868 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 42860] reward=-121640426.6 actor_loss=0.3278 critic_loss=207001944064.0000 entropy=17.5889 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 42860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445100.7 mean_steps=15.2
|
|
[Episode 42870] reward=-116600461.1 actor_loss=0.3151 critic_loss=138566263226.8108 entropy=17.6121 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42880] reward=-117952453.0 actor_loss=0.3497 critic_loss=170466012760.2758 entropy=17.6060 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 42880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-302474.7 mean_steps=17.2
|
|
[Episode 42890] reward=-121145767.1 actor_loss=0.2566 critic_loss=152128880640.0000 entropy=17.6140 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 42900] reward=-121207866.1 actor_loss=0.2480 critic_loss=143410794086.4000 entropy=17.6259 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 42900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502226.5 mean_steps=13.5
|
|
[Episode 42910] reward=-118248117.8 actor_loss=0.3893 critic_loss=141732619150.2222 entropy=17.6319 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 42920] reward=-118842512.0 actor_loss=0.2553 critic_loss=145063178240.0000 entropy=17.6446 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 42920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490446.2 mean_steps=14.4
|
|
[Episode 42930] reward=-118117224.8 actor_loss=0.2410 critic_loss=141138270720.0000 entropy=17.6434 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 42940] reward=-115933813.1 actor_loss=0.3485 critic_loss=139541946368.0000 entropy=17.6577 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 42940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438770.7 mean_steps=15.8
|
|
[Episode 42950] reward=-117634773.5 actor_loss=0.3420 critic_loss=146623072548.5714 entropy=17.6467 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42960] reward=-113488257.4 actor_loss=0.3392 critic_loss=142960767658.6667 entropy=17.6448 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 42960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423818.9 mean_steps=15.6
|
|
[Episode 42970] reward=-124891116.8 actor_loss=0.3041 critic_loss=184670387511.6522 entropy=17.6481 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42980] reward=-119843500.5 actor_loss=0.3060 critic_loss=141499491864.3810 entropy=17.6573 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 42980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-646639.9 mean_steps=13.7
|
|
[Episode 42990] reward=-125569626.9 actor_loss=0.3084 critic_loss=158341606219.2941 entropy=17.6433 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 43000] reward=-120603848.3 actor_loss=0.2362 critic_loss=154425005585.6552 entropy=17.6315 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 43000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-400069.1 mean_steps=14.5
|
|
[Episode 43010] reward=-121478432.1 actor_loss=0.2652 critic_loss=199633184013.4737 entropy=17.6312 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 43020] reward=-120964469.3 actor_loss=0.3021 critic_loss=145005127149.0370 entropy=17.6278 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 43020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481818.2 mean_steps=14.8
|
|
[Episode 43030] reward=-120983883.9 actor_loss=0.3015 critic_loss=148067526585.3793 entropy=17.6356 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 43040] reward=-119475851.3 actor_loss=0.3233 critic_loss=149241776971.2941 entropy=17.6286 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 43040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580836.7 mean_steps=12.8
|
|
[Episode 43050] reward=-118074256.3 actor_loss=0.2772 critic_loss=153495392471.5789 entropy=17.6256 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 43060] reward=-121962109.7 actor_loss=0.3966 critic_loss=175075896621.1765 entropy=17.6208 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 43060] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-662806.9 mean_steps=11.5
|
|
[Episode 43070] reward=-116170802.4 actor_loss=0.3678 critic_loss=148097159346.0869 entropy=17.6313 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 43080] reward=-118964585.4 actor_loss=0.1956 critic_loss=143012772193.1035 entropy=17.6307 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 43080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592099.2 mean_steps=12.9
|
|
[Episode 43090] reward=-120340593.3 actor_loss=0.3389 critic_loss=184088578667.1628 entropy=17.6384 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 43100] reward=-122906672.4 actor_loss=0.2791 critic_loss=152065019904.0000 entropy=17.6289 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507660.1 mean_steps=14.1
|
|
[Episode 43110] reward=-117251407.0 actor_loss=0.3358 critic_loss=150380266184.3478 entropy=17.6302 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 43120] reward=-123540426.5 actor_loss=0.3099 critic_loss=166539785947.4286 entropy=17.6272 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 43120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-660541.8 mean_steps=12.4
|
|
[Episode 43130] reward=-122936411.9 actor_loss=0.2601 critic_loss=153483831198.4762 entropy=17.6287 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43140] reward=-122575021.4 actor_loss=0.1986 critic_loss=149691168995.5555 entropy=17.6296 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 43140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-428796.2 mean_steps=16.8
|
|
[Episode 43150] reward=-121624618.6 actor_loss=0.4234 critic_loss=154277468023.4667 entropy=17.6358 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 43160] reward=-115642135.8 actor_loss=0.3970 critic_loss=140640167209.2903 entropy=17.6323 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 43160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488245.3 mean_steps=14.1
|
|
[Episode 43170] reward=-114737072.5 actor_loss=0.2985 critic_loss=138193973365.0286 entropy=17.6267 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43180] reward=-119216638.0 actor_loss=0.3067 critic_loss=145566484126.8965 entropy=17.6285 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 43180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-413658.9 mean_steps=15.3
|
|
[Episode 43190] reward=-122503297.8 actor_loss=0.3525 critic_loss=148736934980.2667 entropy=17.6200 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 43200] reward=-123243766.2 actor_loss=0.3147 critic_loss=154659916762.0741 entropy=17.6220 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 43200] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-303901.3 mean_steps=18.2
|
|
[Episode 43210] reward=-117856367.0 actor_loss=0.3469 critic_loss=139755480678.4000 entropy=17.6194 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 43220] reward=-120760164.1 actor_loss=0.3173 critic_loss=147090630610.4889 entropy=17.6172 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 43220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494459.6 mean_steps=15.2
|
|
[Episode 43230] reward=-122379709.6 actor_loss=0.2807 critic_loss=145162300695.2727 entropy=17.6143 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 43240] reward=-121822657.3 actor_loss=0.2586 critic_loss=147625676344.8889 entropy=17.6134 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 43240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-590356.6 mean_steps=12.1
|
|
[Episode 43250] reward=-123583873.8 actor_loss=0.1776 critic_loss=178017105237.3333 entropy=17.6165 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 43260] reward=-120612190.9 actor_loss=0.3602 critic_loss=283900197888.0000 entropy=17.6176 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 43260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-604785.1 mean_steps=11.9
|
|
[Episode 43270] reward=-118330993.2 actor_loss=0.3529 critic_loss=146789259673.6000 entropy=17.6164 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 43280] reward=-115590103.8 actor_loss=0.1989 critic_loss=140327781262.2222 entropy=17.6265 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 43280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-336143.0 mean_steps=17.2
|
|
[Episode 43290] reward=-115990833.9 actor_loss=0.2894 critic_loss=139841026785.2800 entropy=17.6421 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 43300] reward=-121107193.9 actor_loss=0.2430 critic_loss=152368971145.8462 entropy=17.6508 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 43300] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-274627.6 mean_steps=17.8
|
|
[Episode 43310] reward=-120610023.0 actor_loss=0.2406 critic_loss=149120987376.9412 entropy=17.6489 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 43320] reward=-117430544.8 actor_loss=0.3060 critic_loss=153719474242.0645 entropy=17.6537 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 43320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-577557.1 mean_steps=12.2
|
|
[Episode 43330] reward=-120937095.2 actor_loss=0.2872 critic_loss=146520393794.0645 entropy=17.6545 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43340] reward=-122429763.6 actor_loss=0.2984 critic_loss=149343400755.2000 entropy=17.6590 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 43340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-543445.9 mean_steps=15.5
|
|
[Episode 43350] reward=-115809245.5 actor_loss=0.3917 critic_loss=141379877707.2941 entropy=17.6575 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 43360] reward=-119517500.7 actor_loss=0.2446 critic_loss=145128762338.7429 entropy=17.6625 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581992.0 mean_steps=13.2
|
|
[Episode 43370] reward=-115933214.8 actor_loss=0.2097 critic_loss=140454981795.8400 entropy=17.6583 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 43380] reward=-118331527.3 actor_loss=0.2132 critic_loss=151080012458.6667 entropy=17.6681 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 43380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458582.2 mean_steps=15.3
|
|
[Episode 43390] reward=-121568263.3 actor_loss=0.2708 critic_loss=152398919590.9565 entropy=17.6581 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 43400] reward=-122870970.9 actor_loss=0.1953 critic_loss=148080506103.1724 entropy=17.6712 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 43400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-654859.2 mean_steps=12.7
|
|
[Episode 43410] reward=-120994920.1 actor_loss=0.2836 critic_loss=147166598485.3333 entropy=17.6752 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 43420] reward=-118298648.3 actor_loss=0.3071 critic_loss=144659214172.1600 entropy=17.6616 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 43420] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-350031.5 mean_steps=18.9
|
|
[Episode 43430] reward=-117099379.7 actor_loss=0.2187 critic_loss=177148783820.8000 entropy=17.6612 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 43440] reward=-119416794.3 actor_loss=0.2063 critic_loss=146040553914.8108 entropy=17.6512 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 43440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386877.5 mean_steps=16.8
|
|
[Episode 43450] reward=-123212200.7 actor_loss=0.2983 critic_loss=223840980992.0000 entropy=17.6609 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 43460] reward=-115723118.6 actor_loss=0.2074 critic_loss=156435265035.3778 entropy=17.6457 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 43460] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349075.3 mean_steps=17.8
|
|
[Episode 43470] reward=-120979602.3 actor_loss=0.3501 critic_loss=148137245627.7333 entropy=17.6456 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 43480] reward=-120594752.8 actor_loss=0.3651 critic_loss=155121807711.0857 entropy=17.6516 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 43480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468502.6 mean_steps=15.7
|
|
[Episode 43490] reward=-122152092.9 actor_loss=0.2417 critic_loss=153191976374.8571 entropy=17.6560 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 43500] reward=-116717531.7 actor_loss=0.3154 critic_loss=151300335691.8518 entropy=17.6466 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 43500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423056.0 mean_steps=15.8
|
|
[Episode 43510] reward=-115424800.3 actor_loss=0.3051 critic_loss=139419842332.4445 entropy=17.6373 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43520] reward=-124853120.8 actor_loss=0.2543 critic_loss=151947764622.2222 entropy=17.6358 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 43520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-558975.1 mean_steps=14.8
|
|
[Episode 43530] reward=-120458719.1 actor_loss=0.2600 critic_loss=150807203104.8205 entropy=17.6372 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43540] reward=-115978976.6 actor_loss=0.2488 critic_loss=142852336675.3103 entropy=17.6313 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 43540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521171.5 mean_steps=14.7
|
|
[Episode 43550] reward=-118911208.0 actor_loss=0.3114 critic_loss=142368196120.3810 entropy=17.6210 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 43560] reward=-122940790.7 actor_loss=0.2254 critic_loss=150023501047.1724 entropy=17.6233 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-496384.0 mean_steps=16.4
|
|
[Episode 43570] reward=-112437660.9 actor_loss=0.3201 critic_loss=139088870513.7778 entropy=17.6206 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 43580] reward=-112278882.9 actor_loss=0.3136 critic_loss=134308278272.0000 entropy=17.6100 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 43580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-517202.1 mean_steps=12.4
|
|
[Episode 43590] reward=-114402559.7 actor_loss=0.3222 critic_loss=139044389632.0000 entropy=17.6014 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 43600] reward=-116597776.5 actor_loss=0.2698 critic_loss=140825286997.3333 entropy=17.5914 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 43600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501398.1 mean_steps=13.6
|
|
[Episode 43610] reward=-120670380.2 actor_loss=0.2955 critic_loss=194640561341.6296 entropy=17.5930 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 43620] reward=-121020022.6 actor_loss=0.2550 critic_loss=146215293588.6452 entropy=17.5871 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 43620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-600180.9 mean_steps=14.1
|
|
[Episode 43630] reward=-120765757.0 actor_loss=0.2218 critic_loss=151353332349.1555 entropy=17.5835 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 43640] reward=-117473617.4 actor_loss=0.2864 critic_loss=142675247286.0444 entropy=17.5766 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 43640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503471.7 mean_steps=13.4
|
|
[Episode 43650] reward=-119333047.7 actor_loss=0.1708 critic_loss=142653403858.8235 entropy=17.5863 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 43660] reward=-120004675.1 actor_loss=0.2649 critic_loss=149021104537.6000 entropy=17.5762 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485326.0 mean_steps=14.6
|
|
[Episode 43670] reward=-116390609.8 actor_loss=0.3319 critic_loss=141526230812.4445 entropy=17.5700 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43680] reward=-118939960.2 actor_loss=0.2952 critic_loss=141263631397.9259 entropy=17.5774 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 43680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449671.3 mean_steps=14.8
|
|
[Episode 43690] reward=-121509805.1 actor_loss=0.2601 critic_loss=149158658404.1739 entropy=17.5684 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 43700] reward=-122718177.8 actor_loss=0.3528 critic_loss=254848307655.1111 entropy=17.5664 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 43700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365977.0 mean_steps=16.6
|
|
[Episode 43710] reward=-116072785.6 actor_loss=0.3597 critic_loss=144648932352.0000 entropy=17.5732 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 43720] reward=-120408155.6 actor_loss=0.2972 critic_loss=145293196902.4000 entropy=17.5911 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 43720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488358.7 mean_steps=15.6
|
|
[Episode 43730] reward=-120035572.0 actor_loss=0.3004 critic_loss=145313482160.3556 entropy=17.6010 approx_kl=0.0108 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 43740] reward=-117729504.5 actor_loss=0.2420 critic_loss=140895049386.6667 entropy=17.6088 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501127.8 mean_steps=14.4
|
|
[Episode 43750] reward=-119937600.3 actor_loss=0.2643 critic_loss=145168490811.0769 entropy=17.6213 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 43760] reward=-116740889.6 actor_loss=0.3365 critic_loss=189294812501.3333 entropy=17.6245 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 43760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503929.8 mean_steps=15.7
|
|
[Episode 43770] reward=-121744119.4 actor_loss=0.3011 critic_loss=148384702242.5946 entropy=17.6412 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 43780] reward=-122950887.9 actor_loss=0.2249 critic_loss=148416532889.6000 entropy=17.6445 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 43780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-504701.3 mean_steps=16.6
|
|
[Episode 43790] reward=-117609901.5 actor_loss=0.3594 critic_loss=258987997742.5454 entropy=17.6436 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 43800] reward=-122836593.1 actor_loss=0.3229 critic_loss=203367889408.0000 entropy=17.6562 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 43800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530273.5 mean_steps=13.6
|
|
[Episode 43810] reward=-113154040.0 actor_loss=0.3008 critic_loss=139901597882.1818 entropy=17.6393 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 43820] reward=-125624245.6 actor_loss=0.3152 critic_loss=156210519433.8462 entropy=17.6546 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 43820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402531.6 mean_steps=15.7
|
|
[Episode 43830] reward=-121549764.1 actor_loss=0.2938 critic_loss=147207076717.7143 entropy=17.6494 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 43840] reward=-116662660.1 actor_loss=0.2668 critic_loss=148964291806.6087 entropy=17.6504 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 43840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518287.5 mean_steps=13.8
|
|
[Episode 43850] reward=-116216760.7 actor_loss=0.3453 critic_loss=141454980437.3333 entropy=17.6496 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 43860] reward=-113771939.4 actor_loss=0.3090 critic_loss=140742489429.3333 entropy=17.6550 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 43860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449470.3 mean_steps=15.8
|
|
[Episode 43870] reward=-119733277.5 actor_loss=0.2981 critic_loss=307307013266.2857 entropy=17.6483 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 43880] reward=-117312034.3 actor_loss=0.3067 critic_loss=146881987788.8000 entropy=17.6418 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 43880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612472.1 mean_steps=12.2
|
|
[Episode 43890] reward=-243306368.0 actor_loss=0.2088 critic_loss=37417444005205.3359 entropy=17.6405 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 43900] reward=-118424004.8 actor_loss=0.2679 critic_loss=144556346900.4800 entropy=17.6481 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641619.4 mean_steps=12.2
|
|
[Episode 43910] reward=-122446877.5 actor_loss=0.3267 critic_loss=177802536082.2857 entropy=17.6415 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 43920] reward=-116593180.5 actor_loss=0.2332 critic_loss=142175867790.2222 entropy=17.6510 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 43920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-407524.1 mean_steps=14.7
|
|
[Episode 43930] reward=-116451356.9 actor_loss=0.2826 critic_loss=140666727330.9091 entropy=17.6383 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 43940] reward=-121490630.1 actor_loss=0.2653 critic_loss=152577372694.2609 entropy=17.6254 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 43940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-663633.9 mean_steps=12.7
|
|
[Episode 43950] reward=-113582704.4 actor_loss=0.3812 critic_loss=138195931287.7037 entropy=17.6307 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 43960] reward=-114207131.9 actor_loss=0.3146 critic_loss=135472351027.2000 entropy=17.6150 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 43960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448107.9 mean_steps=16.0
|
|
[Episode 43970] reward=-117399344.8 actor_loss=0.2693 critic_loss=141013797156.5714 entropy=17.6210 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 43980] reward=-121229926.9 actor_loss=0.2877 critic_loss=147019852458.6667 entropy=17.6094 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 43980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480203.3 mean_steps=14.4
|
|
[Episode 43990] reward=-120784317.1 actor_loss=0.2584 critic_loss=145245987752.2286 entropy=17.6004 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 44000] reward=-115376880.4 actor_loss=0.3031 critic_loss=142736591282.4243 entropy=17.6223 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 44000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-591705.2 mean_steps=11.8
|
|
[Episode 44010] reward=-115852920.2 actor_loss=0.3034 critic_loss=142124228139.8857 entropy=17.6159 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 44020] reward=-119466453.4 actor_loss=0.3837 critic_loss=162578153040.8421 entropy=17.6144 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 44020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412604.5 mean_steps=15.6
|
|
[Episode 44030] reward=-116368119.1 actor_loss=0.4097 critic_loss=149421211648.0000 entropy=17.6086 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 44040] reward=-128579846.8 actor_loss=0.3320 critic_loss=504985795242.6667 entropy=17.6140 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 44040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439619.1 mean_steps=15.1
|
|
[Episode 44050] reward=-118807599.3 actor_loss=0.3443 critic_loss=154131420645.0526 entropy=17.6136 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 44060] reward=-124510670.0 actor_loss=0.4192 critic_loss=452873623365.8182 entropy=17.6242 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 44060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555236.5 mean_steps=13.9
|
|
[Episode 44070] reward=-117824918.5 actor_loss=0.2067 critic_loss=141771214028.8000 entropy=17.6267 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 44080] reward=-115441239.3 actor_loss=0.3649 critic_loss=140356021760.0000 entropy=17.6387 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 44080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467870.2 mean_steps=14.3
|
|
[Episode 44090] reward=-120227886.2 actor_loss=0.2711 critic_loss=146269404715.8857 entropy=17.6440 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 44100] reward=-118257867.0 actor_loss=0.2763 critic_loss=144891352157.0909 entropy=17.6422 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 44100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-652338.2 mean_steps=12.3
|
|
[Episode 44110] reward=-116995482.4 actor_loss=0.3396 critic_loss=157722982400.0000 entropy=17.6409 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 44120] reward=-120237181.7 actor_loss=0.3808 critic_loss=148618976460.8000 entropy=17.6408 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 44120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-484358.4 mean_steps=16.4
|
|
[Episode 44130] reward=-119034816.1 actor_loss=0.4185 critic_loss=225524213532.4445 entropy=17.6484 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 44140] reward=-130124164.2 actor_loss=0.3025 critic_loss=559362685415.6190 entropy=17.6624 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 44140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488210.5 mean_steps=14.2
|
|
[Episode 44150] reward=-118164428.6 actor_loss=0.3314 critic_loss=145313627447.6522 entropy=17.6697 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44160] reward=-124527131.9 actor_loss=0.3520 critic_loss=156807007072.7111 entropy=17.6725 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 44160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464213.6 mean_steps=16.1
|
|
[Episode 44170] reward=-120788431.2 actor_loss=0.2419 critic_loss=143709321688.6154 entropy=17.6800 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 44180] reward=-121364349.8 actor_loss=0.3002 critic_loss=142669564222.5778 entropy=17.6791 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 44180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537974.7 mean_steps=13.6
|
|
[Episode 44190] reward=-119773569.1 actor_loss=0.3121 critic_loss=152623046109.8667 entropy=17.6917 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 44200] reward=-124500914.6 actor_loss=0.2413 critic_loss=156374403572.6222 entropy=17.7079 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 44200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619769.7 mean_steps=13.1
|
|
[Episode 44210] reward=-118313868.4 actor_loss=0.2355 critic_loss=154350713241.6000 entropy=17.6913 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 44220] reward=-185248404.7 actor_loss=15.9681 critic_loss=15714963982472.5332 entropy=17.6769 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 44220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486767.6 mean_steps=14.3
|
|
[Episode 44230] reward=-117364600.1 actor_loss=0.3389 critic_loss=143379371417.6000 entropy=17.6785 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 44240] reward=-117279661.4 actor_loss=0.2609 critic_loss=158584796891.4286 entropy=17.6682 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 44240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-437600.1 mean_steps=16.4
|
|
[Episode 44250] reward=-116605062.7 actor_loss=0.2846 critic_loss=145290895177.9556 entropy=17.6514 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 44260] reward=-115979576.9 actor_loss=0.2643 critic_loss=140682963990.7556 entropy=17.6469 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 44260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504936.8 mean_steps=14.6
|
|
[Episode 44270] reward=-120870467.3 actor_loss=0.3366 critic_loss=145991196672.0000 entropy=17.6398 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 44280] reward=-128753178.3 actor_loss=0.2824 critic_loss=372836718405.8182 entropy=17.6438 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 44280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544175.8 mean_steps=14.8
|
|
[Episode 44290] reward=-116447921.7 actor_loss=0.3419 critic_loss=144223799019.2433 entropy=17.6382 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44300] reward=-105263376.5 actor_loss=0.3486 critic_loss=132101720994.9091 entropy=17.6180 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 44300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462003.9 mean_steps=15.9
|
|
[Episode 44310] reward=-120693165.2 actor_loss=0.1550 critic_loss=174948044153.2632 entropy=17.6195 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 44320] reward=-116497697.3 actor_loss=0.3442 critic_loss=139434241930.9714 entropy=17.6264 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 44320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462029.5 mean_steps=15.2
|
|
[Episode 44330] reward=-111135246.0 actor_loss=0.2368 critic_loss=135321804409.9048 entropy=17.6261 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 44340] reward=-118469680.6 actor_loss=0.1984 critic_loss=167730221371.0769 entropy=17.6355 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 44340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-659695.3 mean_steps=12.4
|
|
[Episode 44350] reward=-114325981.0 actor_loss=0.3181 critic_loss=138436139506.1622 entropy=17.6483 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 44360] reward=-122406402.2 actor_loss=0.2667 critic_loss=145426537585.7778 entropy=17.6541 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 44360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468405.4 mean_steps=14.2
|
|
[Episode 44370] reward=-120254842.9 actor_loss=0.3462 critic_loss=150386423718.9565 entropy=17.6384 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 44380] reward=-117150978.6 actor_loss=0.2466 critic_loss=140427626968.6154 entropy=17.6446 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 44380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367549.2 mean_steps=16.7
|
|
[Episode 44390] reward=-116736958.3 actor_loss=0.2893 critic_loss=148034733444.4138 entropy=17.6408 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 44400] reward=-114066511.0 actor_loss=0.3684 critic_loss=145863747334.2439 entropy=17.6369 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 44400] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-368267.0 mean_steps=18.6
|
|
[Episode 44410] reward=-119548928.0 actor_loss=0.2867 critic_loss=145222863530.6667 entropy=17.6366 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 44420] reward=-117889493.4 actor_loss=0.2769 critic_loss=146316509790.8148 entropy=17.6201 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 44420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503806.5 mean_steps=14.6
|
|
[Episode 44430] reward=-123186173.4 actor_loss=0.1840 critic_loss=150118497441.6842 entropy=17.6167 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 44440] reward=-114982589.6 actor_loss=0.3125 critic_loss=134596574102.0690 entropy=17.6223 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 44440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478327.0 mean_steps=14.2
|
|
[Episode 44450] reward=-117587304.5 actor_loss=0.3633 critic_loss=140942716776.2963 entropy=17.6229 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 44460] reward=-121433672.6 actor_loss=0.2910 critic_loss=175289034752.0000 entropy=17.6213 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 44460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432379.7 mean_steps=16.9
|
|
[Episode 44470] reward=-121908178.7 actor_loss=0.2785 critic_loss=144947997448.8276 entropy=17.6335 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44480] reward=-119238829.7 actor_loss=0.2569 critic_loss=139750397815.4667 entropy=17.6410 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 44480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484696.4 mean_steps=14.2
|
|
[Episode 44490] reward=-120068430.7 actor_loss=0.3067 critic_loss=144232892482.0645 entropy=17.6424 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 44500] reward=-121554906.5 actor_loss=0.2296 critic_loss=150124053904.6956 entropy=17.6347 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 44500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550300.9 mean_steps=12.8
|
|
[Episode 44510] reward=-117404296.4 actor_loss=0.3680 critic_loss=149440964794.1818 entropy=17.6208 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 44520] reward=-116498914.8 actor_loss=0.2768 critic_loss=140480361946.5366 entropy=17.6351 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 44520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462699.6 mean_steps=14.9
|
|
[Episode 44530] reward=-116215124.8 actor_loss=0.2272 critic_loss=135821922655.0857 entropy=17.6293 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 44540] reward=-118523513.0 actor_loss=0.3188 critic_loss=138742469017.6000 entropy=17.6292 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 44540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-325966.9 mean_steps=16.6
|
|
[Episode 44550] reward=-117140183.0 actor_loss=0.4010 critic_loss=141691252584.2963 entropy=17.6259 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 44560] reward=-124002814.4 actor_loss=0.2036 critic_loss=149644557555.8095 entropy=17.6287 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 44560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405997.5 mean_steps=16.1
|
|
[Episode 44570] reward=-122707825.8 actor_loss=0.2947 critic_loss=158313116103.1111 entropy=17.6288 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 44580] reward=-118301227.4 actor_loss=0.3030 critic_loss=153114386432.0000 entropy=17.6344 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 44580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461951.5 mean_steps=15.3
|
|
[Episode 44590] reward=-117313767.7 actor_loss=0.2886 critic_loss=139467404811.3778 entropy=17.6371 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 44600] reward=-123984021.1 actor_loss=0.2231 critic_loss=150643981839.5151 entropy=17.6558 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 44600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-352808.7 mean_steps=16.1
|
|
[Episode 44610] reward=-117776893.1 actor_loss=0.3268 critic_loss=143448343473.2308 entropy=17.6521 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 44620] reward=-128469450.7 actor_loss=0.2370 critic_loss=334287253325.9130 entropy=17.6486 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 44620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-372180.9 mean_steps=14.6
|
|
[Episode 44630] reward=-121347323.2 actor_loss=0.3272 critic_loss=161566441472.0000 entropy=17.6509 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 44640] reward=-116329192.0 actor_loss=0.3818 critic_loss=141383188935.1111 entropy=17.6539 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 44640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360183.6 mean_steps=16.4
|
|
[Episode 44650] reward=-118031765.4 actor_loss=0.2447 critic_loss=150916830354.2857 entropy=17.6624 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 44660] reward=-123312777.7 actor_loss=0.2453 critic_loss=153530834616.3200 entropy=17.6616 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 44660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612311.3 mean_steps=12.0
|
|
[Episode 44670] reward=-117826698.8 actor_loss=0.2405 critic_loss=145980794880.0000 entropy=17.6650 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 44680] reward=-116067176.9 actor_loss=0.3076 critic_loss=146063966208.0000 entropy=17.6622 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 44680] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-712154.4 mean_steps=11.2
|
|
[Episode 44690] reward=-120454182.1 actor_loss=0.2790 critic_loss=149509959033.2632 entropy=17.6605 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44700] reward=-118160663.0 actor_loss=0.2824 critic_loss=144914869283.3103 entropy=17.6646 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 44700] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-708247.4 mean_steps=11.7
|
|
[Episode 44710] reward=-123913146.2 actor_loss=0.2512 critic_loss=147929877552.7619 entropy=17.6637 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 44720] reward=-115653503.4 actor_loss=0.3021 critic_loss=154703892695.5789 entropy=17.6569 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 44720] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-770832.5 mean_steps=10.2
|
|
[Episode 44730] reward=-122215779.9 actor_loss=0.3152 critic_loss=150236822732.8000 entropy=17.6476 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 44740] reward=-121498966.2 actor_loss=0.2443 critic_loss=156624602978.4615 entropy=17.6375 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 44740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531131.2 mean_steps=14.7
|
|
[Episode 44750] reward=-123777890.3 actor_loss=0.3370 critic_loss=297076283284.2105 entropy=17.6435 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 44760] reward=-112853696.1 actor_loss=0.3087 critic_loss=154108187807.2889 entropy=17.6553 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 44760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505875.6 mean_steps=14.8
|
|
[Episode 44770] reward=-119208275.3 actor_loss=0.3577 critic_loss=147793402265.6000 entropy=17.6811 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 44780] reward=-122500277.6 actor_loss=0.2519 critic_loss=154689921258.0571 entropy=17.6800 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 44780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493916.4 mean_steps=14.9
|
|
[Episode 44790] reward=-117091285.1 actor_loss=0.2511 critic_loss=144760754995.2000 entropy=17.6895 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 44800] reward=-115347176.8 actor_loss=0.3834 critic_loss=143306336938.6667 entropy=17.6950 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 44800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644722.2 mean_steps=12.6
|
|
[Episode 44810] reward=-123985721.4 actor_loss=0.3797 critic_loss=167790004797.4400 entropy=17.6937 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 44820] reward=-117311419.0 actor_loss=0.3418 critic_loss=210335478837.8947 entropy=17.7020 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 44820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497991.4 mean_steps=14.3
|
|
[Episode 44830] reward=-118821943.9 actor_loss=0.2854 critic_loss=151983387922.7317 entropy=17.6966 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 44840] reward=-117654209.2 actor_loss=0.2579 critic_loss=146784238250.6667 entropy=17.6930 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 44840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478824.1 mean_steps=14.2
|
|
[Episode 44850] reward=-115402492.9 actor_loss=0.3568 critic_loss=144809217807.0588 entropy=17.7042 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 44860] reward=-122670645.0 actor_loss=0.2563 critic_loss=195376975052.8000 entropy=17.6941 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 44860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559522.4 mean_steps=12.8
|
|
[Episode 44870] reward=-116171150.8 actor_loss=0.3980 critic_loss=141272209115.4286 entropy=17.6920 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 44880] reward=-117103089.9 actor_loss=0.2525 critic_loss=142968884766.1176 entropy=17.6924 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 44880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484129.0 mean_steps=14.3
|
|
[Episode 44890] reward=-121137759.5 actor_loss=0.3428 critic_loss=147019879765.3333 entropy=17.6941 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 44900] reward=-122297137.1 actor_loss=0.2022 critic_loss=161274600106.6667 entropy=17.6823 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 44900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450308.9 mean_steps=15.9
|
|
[Episode 44910] reward=-120852467.4 actor_loss=0.1893 critic_loss=146767197962.2400 entropy=17.7003 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 44920] reward=-115338424.1 actor_loss=0.3182 critic_loss=172481419309.5111 entropy=17.7116 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 44920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410287.1 mean_steps=15.6
|
|
[Episode 44930] reward=-117294190.0 actor_loss=0.3257 critic_loss=143572883456.0000 entropy=17.7008 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44940] reward=-121762803.3 actor_loss=0.1988 critic_loss=149155515596.8000 entropy=17.7025 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 44940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596509.4 mean_steps=12.7
|
|
[Episode 44950] reward=-122879920.5 actor_loss=0.1996 critic_loss=145284426301.4400 entropy=17.6942 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 44960] reward=-121144736.8 actor_loss=0.2291 critic_loss=180708774260.3636 entropy=17.6939 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 44960] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-652927.9 mean_steps=11.5
|
|
[Episode 44970] reward=-115162424.1 actor_loss=0.3511 critic_loss=136253595648.0000 entropy=17.6914 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44980] reward=-121672517.4 actor_loss=0.1982 critic_loss=141615286810.9474 entropy=17.6877 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 44980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-435855.9 mean_steps=17.1
|
|
[Episode 44990] reward=-119582579.8 actor_loss=0.2820 critic_loss=182386186295.3513 entropy=17.6812 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 45000] reward=-119454057.2 actor_loss=0.3217 critic_loss=153015915178.6667 entropy=17.6846 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 45000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470019.9 mean_steps=14.9
|
|
[Episode 45010] reward=-118883559.9 actor_loss=0.2718 critic_loss=139638601570.4615 entropy=17.6956 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 45020] reward=-115776948.9 actor_loss=0.2143 critic_loss=140488113395.8095 entropy=17.6816 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 45020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556484.0 mean_steps=13.8
|
|
[Episode 45030] reward=-123111074.6 actor_loss=0.3225 critic_loss=214205047603.2000 entropy=17.6744 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 45040] reward=-119893511.5 actor_loss=0.2817 critic_loss=143907448077.4737 entropy=17.6567 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 45040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438086.6 mean_steps=15.8
|
|
[Episode 45050] reward=-113975575.8 actor_loss=0.3330 critic_loss=163789056186.1818 entropy=17.6417 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 45060] reward=-110851539.6 actor_loss=0.3086 critic_loss=133330461416.7273 entropy=17.6403 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 45060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493242.8 mean_steps=15.3
|
|
[Episode 45070] reward=-118355366.1 actor_loss=0.2649 critic_loss=142273773958.0952 entropy=17.6345 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 45080] reward=-118103878.1 actor_loss=0.1254 critic_loss=143513597269.3333 entropy=17.6359 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 45080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548362.9 mean_steps=12.8
|
|
[Episode 45090] reward=-115038156.9 actor_loss=0.2776 critic_loss=150421327189.3333 entropy=17.6382 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 45100] reward=-116747785.2 actor_loss=0.4054 critic_loss=145967994197.3333 entropy=17.6343 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 45100] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-276947.7 mean_steps=17.0
|
|
[Episode 45110] reward=-114946096.7 actor_loss=0.2705 critic_loss=140928027232.8649 entropy=17.6320 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 45120] reward=-121889849.4 actor_loss=0.2743 critic_loss=194075374214.7368 entropy=17.6369 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 45120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-344024.7 mean_steps=15.8
|
|
[Episode 45130] reward=-121787188.2 actor_loss=0.1876 critic_loss=165026609015.4667 entropy=17.6434 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45140] reward=-114466466.1 actor_loss=0.3983 critic_loss=139624332449.6842 entropy=17.6576 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 45140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509196.0 mean_steps=14.3
|
|
[Episode 45150] reward=-112268659.4 actor_loss=0.3089 critic_loss=133977512043.7895 entropy=17.6702 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 45160] reward=-121620651.3 actor_loss=0.2780 critic_loss=248300998200.8889 entropy=17.6762 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 45160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452301.6 mean_steps=15.9
|
|
[Episode 45170] reward=-119668310.3 actor_loss=0.2710 critic_loss=148706706090.6667 entropy=17.6791 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 45180] reward=-120876912.7 actor_loss=0.2662 critic_loss=154134038820.5714 entropy=17.6923 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 45180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-486330.7 mean_steps=16.1
|
|
[Episode 45190] reward=-118224410.9 actor_loss=0.1526 critic_loss=141505783552.0000 entropy=17.6828 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 45200] reward=-117165037.8 actor_loss=0.2173 critic_loss=143664805205.3333 entropy=17.6880 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 45200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509597.4 mean_steps=14.2
|
|
[Episode 45210] reward=-121917603.6 actor_loss=0.1123 critic_loss=162027695812.9231 entropy=17.6763 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 45220] reward=-116845062.3 actor_loss=0.3439 critic_loss=151043995298.3415 entropy=17.6583 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 45220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502387.1 mean_steps=14.5
|
|
[Episode 45230] reward=-121800266.8 actor_loss=0.3771 critic_loss=149780764717.5111 entropy=17.6583 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 45240] reward=-116467768.1 actor_loss=0.4001 critic_loss=147256636939.3778 entropy=17.6406 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 45240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421058.4 mean_steps=15.8
|
|
[Episode 45250] reward=-115409843.7 actor_loss=0.3248 critic_loss=141152539270.7368 entropy=17.6398 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 45260] reward=-120321546.5 actor_loss=0.2577 critic_loss=146537113736.5333 entropy=17.6411 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 45260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561794.5 mean_steps=14.9
|
|
[Episode 45270] reward=-115578302.5 actor_loss=0.2148 critic_loss=144504696376.8889 entropy=17.6356 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 45280] reward=-132986263.1 actor_loss=0.3912 critic_loss=1551996767940.9231 entropy=17.6269 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 45280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-333882.9 mean_steps=16.6
|
|
[Episode 45290] reward=-116595385.3 actor_loss=0.2318 critic_loss=167248205894.6207 entropy=17.6166 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 45300] reward=-117367808.5 actor_loss=0.3100 critic_loss=140787909700.2667 entropy=17.6219 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 45300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517989.5 mean_steps=15.6
|
|
[Episode 45310] reward=-112757677.2 actor_loss=0.2632 critic_loss=139014304563.2000 entropy=17.6296 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 45320] reward=-1392574758.5 actor_loss=66.2622 critic_loss=1863912175464903.0000 entropy=17.6147 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1003 front_blocked=0
|
|
[Eval 45320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611016.5 mean_steps=12.9
|
|
[Episode 45330] reward=-124293936.9 actor_loss=0.3912 critic_loss=532028984368.7619 entropy=17.6157 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 45340] reward=-132944830.6 actor_loss=0.2815 critic_loss=1000024665034.1052 entropy=17.6220 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 45340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535898.4 mean_steps=13.3
|
|
[Episode 45350] reward=-250895502.4 actor_loss=54.6066 critic_loss=43145606900394.6641 entropy=17.6162 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 45360] reward=-231629080.4 actor_loss=0.2238 critic_loss=21842932551364.9219 entropy=17.6272 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 45360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-654534.8 mean_steps=12.8
|
|
[Episode 45370] reward=-114221309.4 actor_loss=0.2837 critic_loss=141224769846.3030 entropy=17.6181 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45380] reward=-121761177.6 actor_loss=0.2544 critic_loss=149993536079.6444 entropy=17.6391 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 45380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-381672.5 mean_steps=16.2
|
|
[Episode 45390] reward=-113934199.6 actor_loss=0.3369 critic_loss=136593875353.6000 entropy=17.6348 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 45400] reward=-128484452.4 actor_loss=0.2689 critic_loss=161466874774.0690 entropy=17.6510 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 45400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455062.4 mean_steps=15.1
|
|
[Episode 45410] reward=-113793687.9 actor_loss=0.2741 critic_loss=133710812364.8000 entropy=17.6420 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 45420] reward=-119714508.5 actor_loss=0.2459 critic_loss=148269769031.6800 entropy=17.6557 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 45420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523464.3 mean_steps=13.2
|
|
[Episode 45430] reward=-121115419.2 actor_loss=0.2699 critic_loss=146481832846.2222 entropy=17.6706 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 45440] reward=-113125710.7 actor_loss=0.2781 critic_loss=132295273033.1429 entropy=17.6663 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 45440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549771.0 mean_steps=14.6
|
|
[Episode 45450] reward=-121061561.3 actor_loss=0.3131 critic_loss=151630123593.1429 entropy=17.6774 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 45460] reward=-121470258.3 actor_loss=0.2780 critic_loss=145384559537.2308 entropy=17.6850 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 45460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486112.2 mean_steps=15.2
|
|
[Episode 45470] reward=-121136096.0 actor_loss=0.2286 critic_loss=166007643415.2727 entropy=17.6823 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 45480] reward=-111228121.1 actor_loss=0.2752 critic_loss=138396930867.2000 entropy=17.6897 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 45480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-386132.0 mean_steps=15.1
|
|
[Episode 45490] reward=-115900983.8 actor_loss=0.3552 critic_loss=144937465537.4222 entropy=17.6898 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 45500] reward=-120204570.5 actor_loss=0.2865 critic_loss=152874492723.2000 entropy=17.7005 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 45500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428108.1 mean_steps=15.4
|
|
[Episode 45510] reward=-127175011.2 actor_loss=0.2318 critic_loss=246379341768.6487 entropy=17.6975 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 45520] reward=-233316176.8 actor_loss=1.5483 critic_loss=37948812549597.8672 entropy=17.7064 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 45520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432699.8 mean_steps=14.9
|
|
[Episode 45530] reward=-122164416.3 actor_loss=0.1286 critic_loss=155912763255.4667 entropy=17.6863 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 45540] reward=-117746270.8 actor_loss=0.3231 critic_loss=188718323782.6207 entropy=17.6986 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 45540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513011.6 mean_steps=14.8
|
|
[Episode 45550] reward=-122237281.3 actor_loss=0.3027 critic_loss=358135190869.3333 entropy=17.6855 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45560] reward=-125721078.3 actor_loss=0.3347 critic_loss=154196361216.0000 entropy=17.6882 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 45560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502304.9 mean_steps=14.5
|
|
[Episode 45570] reward=-119775715.9 actor_loss=0.3258 critic_loss=149882798735.3600 entropy=17.6935 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 45580] reward=-116403676.7 actor_loss=0.2406 critic_loss=132650570159.1579 entropy=17.6831 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 45580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584636.5 mean_steps=12.5
|
|
[Episode 45590] reward=-123399171.1 actor_loss=0.3218 critic_loss=425215704225.6842 entropy=17.6899 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 45600] reward=-120396828.7 actor_loss=0.2770 critic_loss=146614597339.4286 entropy=17.6987 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 45600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-348677.2 mean_steps=18.1
|
|
[Episode 45610] reward=-117599381.1 actor_loss=0.2619 critic_loss=155569669734.4000 entropy=17.6951 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 45620] reward=-115043027.0 actor_loss=0.2797 critic_loss=154415993651.2000 entropy=17.7039 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 45620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449856.1 mean_steps=14.7
|
|
[Episode 45630] reward=-3613184834.2 actor_loss=58.3778 critic_loss=10670776577586728.0000 entropy=17.7038 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1061 front_blocked=0
|
|
[Episode 45640] reward=-125606188.4 actor_loss=0.2117 critic_loss=158096115120.3556 entropy=17.7080 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 45640] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-330018.7 mean_steps=17.6
|
|
[Episode 45650] reward=-121024818.5 actor_loss=0.2382 critic_loss=236818935170.8445 entropy=17.7186 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45660] reward=-119433992.9 actor_loss=0.2090 critic_loss=147638846168.1778 entropy=17.7165 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 45660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441171.0 mean_steps=15.6
|
|
[Episode 45670] reward=-117721114.1 actor_loss=0.2907 critic_loss=152119927694.2222 entropy=17.7202 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 45680] reward=-117659373.0 actor_loss=0.2264 critic_loss=143287838786.0645 entropy=17.7225 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 45680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495868.3 mean_steps=15.5
|
|
[Episode 45690] reward=-115952793.5 actor_loss=0.3244 critic_loss=145059176820.3636 entropy=17.7074 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 45700] reward=-121599498.9 actor_loss=0.2337 critic_loss=150705009459.2000 entropy=17.7002 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 45700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-605864.2 mean_steps=12.1
|
|
[Episode 45710] reward=-125612191.3 actor_loss=0.2635 critic_loss=392778934681.6000 entropy=17.6882 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 45720] reward=-200957712.1 actor_loss=9.7935 critic_loss=14954820347026.2852 entropy=17.7028 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 45720] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-262617.1 mean_steps=18.3
|
|
[Episode 45730] reward=-121165891.3 actor_loss=0.3773 critic_loss=146895152593.4546 entropy=17.6996 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 45740] reward=-121400716.4 actor_loss=0.2097 critic_loss=149210306059.3778 entropy=17.7125 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 45740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431545.5 mean_steps=15.9
|
|
[Episode 45750] reward=-141338500.6 actor_loss=0.4322 critic_loss=2579620233216.0000 entropy=17.7041 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 45760] reward=-117769158.1 actor_loss=0.2524 critic_loss=141989976687.3044 entropy=17.6983 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 45760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-554562.4 mean_steps=15.5
|
|
[Episode 45770] reward=-118365731.2 actor_loss=0.3080 critic_loss=168326027556.5714 entropy=17.6931 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 45780] reward=-750305925.1 actor_loss=6.5046 critic_loss=1014540746411212.7500 entropy=17.7105 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 45780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417933.2 mean_steps=14.9
|
|
[Episode 45790] reward=-122346102.5 actor_loss=0.2220 critic_loss=172080852659.8919 entropy=17.7184 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45800] reward=-123662522.4 actor_loss=0.2340 critic_loss=173534496209.4546 entropy=17.7162 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 45800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465094.9 mean_steps=14.4
|
|
[Episode 45810] reward=-110784431.5 actor_loss=0.4378 critic_loss=135788948138.6667 entropy=17.7225 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 45820] reward=-115463896.7 actor_loss=0.4178 critic_loss=139030864457.1429 entropy=17.7205 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 45820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532838.7 mean_steps=13.2
|
|
[Episode 45830] reward=-120246443.7 actor_loss=0.3784 critic_loss=169797722468.1739 entropy=17.7283 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 45840] reward=-119185534.6 actor_loss=0.2509 critic_loss=146898570891.6364 entropy=17.7099 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 45840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581525.0 mean_steps=12.4
|
|
[Episode 45850] reward=-124314119.8 actor_loss=0.3141 critic_loss=251153594691.3684 entropy=17.7030 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 45860] reward=-116123324.3 actor_loss=0.4628 critic_loss=186479597613.5111 entropy=17.6975 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 45860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515649.2 mean_steps=13.8
|
|
[Episode 45870] reward=-121140365.3 actor_loss=0.1943 critic_loss=145033512732.4445 entropy=17.6826 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 45880] reward=-118813899.5 actor_loss=0.2555 critic_loss=144811066800.3556 entropy=17.6621 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 45880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-422009.7 mean_steps=14.4
|
|
[Episode 45890] reward=-122732554.0 actor_loss=0.2255 critic_loss=147767547699.2000 entropy=17.6706 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 45900] reward=-120893557.1 actor_loss=0.3164 critic_loss=157895558212.2667 entropy=17.6693 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 45900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-427213.0 mean_steps=16.6
|
|
[Episode 45910] reward=-119671632.2 actor_loss=0.3059 critic_loss=149770925787.4286 entropy=17.6670 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 45920] reward=-116743811.8 actor_loss=0.2757 critic_loss=148429968725.3333 entropy=17.6746 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 45920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-672464.6 mean_steps=13.1
|
|
[Episode 45930] reward=-151552469.6 actor_loss=0.2844 critic_loss=3294070065444.5713 entropy=17.6737 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45940] reward=-114732346.1 actor_loss=0.3987 critic_loss=167996632678.4000 entropy=17.6790 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 45940] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-430607.7 mean_steps=17.7
|
|
[Episode 45950] reward=-119388558.5 actor_loss=0.3157 critic_loss=210006660892.4445 entropy=17.6859 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 45960] reward=-120678356.8 actor_loss=0.2325 critic_loss=148220312689.7778 entropy=17.6949 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 45960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359180.2 mean_steps=16.1
|
|
[Episode 45970] reward=-120246368.9 actor_loss=0.3067 critic_loss=148793812012.5217 entropy=17.6778 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 45980] reward=-123381309.8 actor_loss=0.1700 critic_loss=151125631340.0889 entropy=17.6552 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 45980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389726.4 mean_steps=16.4
|
|
[Episode 45990] reward=-113737915.3 actor_loss=0.2680 critic_loss=132935788710.0540 entropy=17.6632 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 46000] reward=-118007891.9 actor_loss=0.3542 critic_loss=145840781448.5333 entropy=17.6646 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 46000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444672.5 mean_steps=14.7
|
|
[Episode 46010] reward=-126728977.8 actor_loss=0.2268 critic_loss=152393877454.0488 entropy=17.6629 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 46020] reward=-121138435.9 actor_loss=0.2399 critic_loss=149878569642.6667 entropy=17.6745 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 46020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-307283.3 mean_steps=16.7
|
|
[Episode 46030] reward=-115948637.6 actor_loss=0.3241 critic_loss=184611734674.2857 entropy=17.6899 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 46040] reward=-116693965.1 actor_loss=0.3127 critic_loss=146620183893.3333 entropy=17.6923 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 46040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453652.4 mean_steps=15.8
|
|
[Episode 46050] reward=-116632236.1 actor_loss=0.3312 critic_loss=140669759172.9231 entropy=17.6881 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 46060] reward=-117284965.2 actor_loss=0.2810 critic_loss=144646734892.5217 entropy=17.6879 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 46060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530848.2 mean_steps=14.1
|
|
[Episode 46070] reward=-117325438.9 actor_loss=0.2621 critic_loss=144367747072.0000 entropy=17.6797 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 46080] reward=-122460565.5 actor_loss=0.2348 critic_loss=145928621134.7692 entropy=17.6768 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 46080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488025.0 mean_steps=14.0
|
|
[Episode 46090] reward=-120696870.9 actor_loss=0.2924 critic_loss=148421630275.3684 entropy=17.6675 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 46100] reward=-116775324.4 actor_loss=0.2271 critic_loss=137242953500.4444 entropy=17.6608 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 46100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455127.7 mean_steps=14.8
|
|
[Episode 46110] reward=-118063311.9 actor_loss=0.3299 critic_loss=143318735803.7333 entropy=17.6625 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 46120] reward=-114765149.7 actor_loss=0.2692 critic_loss=139631673974.1538 entropy=17.6703 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 46120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626443.3 mean_steps=13.0
|
|
[Episode 46130] reward=-124337312.6 actor_loss=0.2417 critic_loss=155812579866.9474 entropy=17.6789 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 46140] reward=-112801024.6 actor_loss=0.2826 critic_loss=136607171677.0909 entropy=17.6859 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 46140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572226.3 mean_steps=12.4
|
|
[Episode 46150] reward=-118349985.3 actor_loss=0.3590 critic_loss=143019185766.4000 entropy=17.6818 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 46160] reward=-114831570.9 actor_loss=0.3218 critic_loss=147003655964.4445 entropy=17.6906 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 46160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457519.6 mean_steps=14.0
|
|
[Episode 46170] reward=-119949806.3 actor_loss=0.2318 critic_loss=155240833934.2222 entropy=17.7048 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 46180] reward=-120321171.0 actor_loss=0.2898 critic_loss=150926195678.9677 entropy=17.7079 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 46180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501840.3 mean_steps=15.3
|
|
[Episode 46190] reward=-118968652.9 actor_loss=0.3403 critic_loss=166260017995.2941 entropy=17.7106 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 46200] reward=-122413752.2 actor_loss=0.2104 critic_loss=186423057729.8286 entropy=17.7405 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 46200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614534.0 mean_steps=12.1
|
|
[Episode 46210] reward=-125926705.7 actor_loss=0.3284 critic_loss=1330532743168.0000 entropy=17.7374 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 46220] reward=-118083737.9 actor_loss=0.2478 critic_loss=173902820034.2069 entropy=17.7416 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 46220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-601523.8 mean_steps=14.1
|
|
[Episode 46230] reward=-118592433.7 actor_loss=0.2690 critic_loss=150047031926.1538 entropy=17.7393 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 46240] reward=-115878043.6 actor_loss=0.4215 critic_loss=141889179506.7586 entropy=17.7492 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 46240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626488.3 mean_steps=13.2
|
|
[Episode 46250] reward=-123157649.0 actor_loss=0.3063 critic_loss=261234918175.2195 entropy=17.7352 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 46260] reward=-122110722.4 actor_loss=0.2709 critic_loss=226699562188.8000 entropy=17.7390 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 46260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501669.7 mean_steps=14.4
|
|
[Episode 46270] reward=-124455857.0 actor_loss=0.2731 critic_loss=295680039789.7143 entropy=17.7389 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 46280] reward=-119643524.4 actor_loss=0.3068 critic_loss=150482064020.6452 entropy=17.7532 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 46280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490889.6 mean_steps=14.4
|
|
[Episode 46290] reward=-115075552.4 actor_loss=0.2605 critic_loss=143884976640.0000 entropy=17.7423 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 46300] reward=-114758752.8 actor_loss=0.2332 critic_loss=139008911018.6667 entropy=17.7383 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 46300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422077.5 mean_steps=15.9
|
|
[Episode 46310] reward=-113370020.5 actor_loss=0.2127 critic_loss=150580902115.5555 entropy=17.7361 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 46320] reward=-116970187.7 actor_loss=0.2838 critic_loss=143477451239.6190 entropy=17.7126 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 46320] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-657471.2 mean_steps=10.2
|
|
[Episode 46330] reward=-116329108.1 actor_loss=0.2659 critic_loss=139428251966.5778 entropy=17.7132 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 46340] reward=-115813762.7 actor_loss=0.3187 critic_loss=143843964499.3488 entropy=17.7123 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 46340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-618924.8 mean_steps=12.0
|
|
[Episode 46350] reward=-118657690.1 actor_loss=0.3092 critic_loss=166802683904.0000 entropy=17.7282 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 46360] reward=-121271836.1 actor_loss=0.2790 critic_loss=144545619337.8462 entropy=17.7275 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 46360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417445.6 mean_steps=15.9
|
|
[Episode 46370] reward=-120148792.6 actor_loss=0.3081 critic_loss=148188603609.2121 entropy=17.7246 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 46380] reward=-117782565.7 actor_loss=0.3295 critic_loss=143410111409.2308 entropy=17.7090 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 46380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502123.0 mean_steps=14.6
|
|
[Episode 46390] reward=-120781004.6 actor_loss=0.2956 critic_loss=147721979728.4572 entropy=17.6924 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 46400] reward=-120617531.6 actor_loss=0.2224 critic_loss=144933004256.9697 entropy=17.6765 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 46400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435555.1 mean_steps=15.9
|
|
[Episode 46410] reward=-117871283.3 actor_loss=0.2905 critic_loss=141075269950.5778 entropy=17.6715 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 46420] reward=-116007656.2 actor_loss=0.3598 critic_loss=145692380774.4000 entropy=17.6681 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 46420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437989.3 mean_steps=15.0
|
|
[Episode 46430] reward=-116490735.9 actor_loss=0.2608 critic_loss=141785897537.6410 entropy=17.6645 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 46440] reward=-106532089.4 actor_loss=0.3222 critic_loss=133264597647.3600 entropy=17.6718 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 46440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518851.9 mean_steps=15.2
|
|
[Episode 46450] reward=-118677988.8 actor_loss=0.3585 critic_loss=142273981644.8000 entropy=17.6734 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 46460] reward=-114219627.8 actor_loss=0.2369 critic_loss=139342620113.4546 entropy=17.6752 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 46460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512283.8 mean_steps=13.8
|
|
[Episode 46470] reward=-115342922.8 actor_loss=0.3066 critic_loss=137754158545.4546 entropy=17.6713 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 46480] reward=-120719405.4 actor_loss=0.2063 critic_loss=144248461448.5333 entropy=17.6795 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 46480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570605.2 mean_steps=13.3
|
|
[Episode 46490] reward=-116609328.8 actor_loss=0.3442 critic_loss=155124267546.9474 entropy=17.6865 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 46500] reward=-117454136.3 actor_loss=0.2829 critic_loss=145908269238.0444 entropy=17.6901 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 46500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-418294.5 mean_steps=14.4
|
|
[Episode 46510] reward=-118885461.5 actor_loss=0.2722 critic_loss=142928231628.8000 entropy=17.6696 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 46520] reward=-119541442.6 actor_loss=0.2978 critic_loss=145317636388.5714 entropy=17.6841 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 46520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-573219.6 mean_steps=14.8
|
|
[Episode 46530] reward=-120340810.8 actor_loss=0.3166 critic_loss=144865252693.3333 entropy=17.6863 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 46540] reward=-119579572.5 actor_loss=0.3214 critic_loss=140210685033.9310 entropy=17.6787 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 46540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-764528.2 mean_steps=13.8
|
|
[Episode 46550] reward=-119209436.9 actor_loss=0.2922 critic_loss=141339885568.0000 entropy=17.6762 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 46560] reward=-115312073.6 actor_loss=0.2792 critic_loss=140287021875.2000 entropy=17.6774 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 46560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-880501.7 mean_steps=13.2
|
|
[Episode 46570] reward=-112772075.4 actor_loss=0.3062 critic_loss=161310211584.0000 entropy=17.6726 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 46580] reward=-120959786.6 actor_loss=0.3796 critic_loss=262072487116.8000 entropy=17.6721 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 46580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529186.2 mean_steps=13.3
|
|
[Episode 46590] reward=-122140627.2 actor_loss=0.2781 critic_loss=147825120051.2000 entropy=17.6806 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 46600] reward=-119060291.0 actor_loss=0.3034 critic_loss=142024738224.3556 entropy=17.6471 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 46600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437056.5 mean_steps=14.9
|
|
[Episode 46610] reward=-113226585.7 actor_loss=0.2928 critic_loss=134382110674.4889 entropy=17.6527 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 46620] reward=-122819673.3 actor_loss=0.2578 critic_loss=157348583365.4857 entropy=17.6465 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 46620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405048.8 mean_steps=15.9
|
|
[Episode 46630] reward=-121613112.0 actor_loss=0.1795 critic_loss=149723673258.6667 entropy=17.6336 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 46640] reward=-134925785.3 actor_loss=0.2916 critic_loss=1177970665917.2173 entropy=17.6441 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 46640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506477.6 mean_steps=14.6
|
|
[Episode 46650] reward=-113196420.7 actor_loss=0.3198 critic_loss=136247452740.2667 entropy=17.6197 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 46660] reward=-161339497.7 actor_loss=0.5323 critic_loss=6609900987278.2227 entropy=17.6232 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 46660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559129.2 mean_steps=13.8
|
|
[Episode 46670] reward=-120437430.4 actor_loss=0.3021 critic_loss=150459780956.1600 entropy=17.6272 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 46680] reward=-118009498.5 actor_loss=0.2338 critic_loss=139045513716.6222 entropy=17.6407 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 46680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359835.3 mean_steps=17.4
|
|
[Episode 46690] reward=-120687074.3 actor_loss=0.2525 critic_loss=147139926546.9630 entropy=17.6381 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 46700] reward=-129608420.1 actor_loss=0.3075 critic_loss=1117917381336.1777 entropy=17.6416 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 46700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502703.4 mean_steps=15.4
|
|
[Episode 46710] reward=-117903105.2 actor_loss=0.3988 critic_loss=147959190016.0000 entropy=17.6340 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 46720] reward=-121066851.8 actor_loss=0.2060 critic_loss=145716447368.5333 entropy=17.6359 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 46720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592516.9 mean_steps=12.9
|
|
[Episode 46730] reward=-119493154.9 actor_loss=0.3084 critic_loss=142821961728.0000 entropy=17.6382 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 46740] reward=-115762799.2 actor_loss=0.3087 critic_loss=141537073906.5263 entropy=17.6308 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 46740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576822.4 mean_steps=13.7
|
|
[Episode 46750] reward=-122318380.0 actor_loss=0.3200 critic_loss=165378593223.1111 entropy=17.6437 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 46760] reward=-113538758.1 actor_loss=0.2020 critic_loss=133827226510.2222 entropy=17.6482 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 46760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556175.6 mean_steps=14.4
|
|
[Episode 46770] reward=-123071993.5 actor_loss=0.3266 critic_loss=147082125743.1579 entropy=17.6588 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 46780] reward=-117699435.0 actor_loss=0.3587 critic_loss=138245663792.7619 entropy=17.6618 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 46780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422139.2 mean_steps=15.6
|
|
[Episode 46790] reward=-113951917.6 actor_loss=0.2628 critic_loss=141753932399.3044 entropy=17.6649 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 46800] reward=-120426507.6 actor_loss=0.2925 critic_loss=156774028950.5882 entropy=17.6673 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 46800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395307.4 mean_steps=16.4
|
|
[Episode 46810] reward=-115624439.5 actor_loss=0.2586 critic_loss=138109828513.1852 entropy=17.6666 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 46820] reward=-116382444.9 actor_loss=0.3667 critic_loss=135548866078.1176 entropy=17.6623 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 46820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486225.5 mean_steps=14.1
|
|
[Episode 46830] reward=-112825883.2 actor_loss=0.3364 critic_loss=132945095813.5652 entropy=17.6797 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 46840] reward=-113516618.3 actor_loss=0.3187 critic_loss=139950607661.1765 entropy=17.6779 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 46840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518500.3 mean_steps=13.1
|
|
[Episode 46850] reward=-119138688.5 actor_loss=0.2590 critic_loss=142600203299.3103 entropy=17.6900 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 46860] reward=-113973178.6 actor_loss=0.3481 critic_loss=131190658533.0526 entropy=17.6905 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 46860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-605999.9 mean_steps=13.8
|
|
[Episode 46870] reward=-117383323.0 actor_loss=0.2903 critic_loss=142828703861.0286 entropy=17.6892 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 46880] reward=-122918349.2 actor_loss=0.2803 critic_loss=148627897685.3333 entropy=17.7051 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 46880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-565991.9 mean_steps=12.7
|
|
[Episode 46890] reward=-121396468.8 actor_loss=0.3078 critic_loss=152155292861.6296 entropy=17.7078 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 46900] reward=-116779027.7 actor_loss=0.2550 critic_loss=143717695488.0000 entropy=17.6905 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 46900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401371.6 mean_steps=16.4
|
|
[Episode 46910] reward=-121849701.1 actor_loss=0.2311 critic_loss=155832623786.6667 entropy=17.6984 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 46920] reward=-120109849.0 actor_loss=0.2453 critic_loss=143586135244.8000 entropy=17.6858 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 46920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474434.1 mean_steps=14.1
|
|
[Episode 46930] reward=-116973204.6 actor_loss=0.3099 critic_loss=143925625651.2000 entropy=17.6750 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 46940] reward=-116557057.3 actor_loss=0.2319 critic_loss=140052581580.8000 entropy=17.6749 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 46940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504552.2 mean_steps=14.0
|
|
[Episode 46950] reward=-118617953.1 actor_loss=0.3139 critic_loss=146488904704.0000 entropy=17.6781 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 46960] reward=-122727394.8 actor_loss=0.3056 critic_loss=156043758933.3333 entropy=17.6698 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 46960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364804.1 mean_steps=16.2
|
|
[Episode 46970] reward=-123233817.2 actor_loss=0.3494 critic_loss=146538055452.4445 entropy=17.6569 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 46980] reward=-132228444.1 actor_loss=0.3008 critic_loss=1185214024908.8000 entropy=17.6530 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 46980] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-273956.1 mean_steps=17.6
|
|
[Episode 46990] reward=-119142898.5 actor_loss=0.3794 critic_loss=190718076245.3333 entropy=17.6542 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 47000] reward=-114784196.7 actor_loss=0.2863 critic_loss=140532563968.0000 entropy=17.6396 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 47000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581608.6 mean_steps=13.5
|
|
[Episode 47010] reward=-118691083.5 actor_loss=0.1709 critic_loss=143281149633.4222 entropy=17.6469 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 47020] reward=-121395850.0 actor_loss=0.3182 critic_loss=150589892450.4615 entropy=17.6637 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 47020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507932.9 mean_steps=14.0
|
|
[Episode 47030] reward=-123915540.5 actor_loss=0.3171 critic_loss=425962308312.1778 entropy=17.6702 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 47040] reward=-116635189.9 actor_loss=0.4173 critic_loss=142396415122.2857 entropy=17.6982 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 47040] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332522.1 mean_steps=17.4
|
|
[Episode 47050] reward=-649310441.4 actor_loss=10.1641 critic_loss=478488406523904.0000 entropy=17.7077 approx_kl=0.0038 kl_stop=1 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 47060] reward=-115382001.2 actor_loss=0.3541 critic_loss=146972745337.9048 entropy=17.6997 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 47060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536059.8 mean_steps=13.4
|
|
[Episode 47070] reward=-119710409.0 actor_loss=0.2619 critic_loss=148275742257.5484 entropy=17.6960 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47080] reward=-119232537.3 actor_loss=0.2753 critic_loss=144549672459.3778 entropy=17.6911 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 47080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407292.5 mean_steps=16.8
|
|
[Episode 47090] reward=-119346622.4 actor_loss=0.2488 critic_loss=141305268435.8621 entropy=17.6903 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 47100] reward=-116402741.3 actor_loss=0.3022 critic_loss=140716438802.7317 entropy=17.7100 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 47100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-563876.3 mean_steps=14.7
|
|
[Episode 47110] reward=-118500051.7 actor_loss=0.2815 critic_loss=146136849019.5862 entropy=17.7029 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 47120] reward=-120710509.5 actor_loss=0.3054 critic_loss=147794942361.6000 entropy=17.7110 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 47120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590636.4 mean_steps=13.6
|
|
[Episode 47130] reward=-113011240.6 actor_loss=0.2857 critic_loss=132506943670.0444 entropy=17.7066 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47140] reward=-116862756.6 actor_loss=0.2295 critic_loss=143852853475.5555 entropy=17.7224 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 47140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-582078.6 mean_steps=13.7
|
|
[Episode 47150] reward=-111602810.5 actor_loss=0.3152 critic_loss=142324135749.8182 entropy=17.7202 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 47160] reward=-113194395.3 actor_loss=0.3750 critic_loss=139612808862.8965 entropy=17.7256 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 47160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473202.7 mean_steps=14.9
|
|
[Episode 47170] reward=-121362220.8 actor_loss=0.2644 critic_loss=154594590720.0000 entropy=17.7057 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47180] reward=-124402175.0 actor_loss=0.2946 critic_loss=149055607239.1111 entropy=17.7138 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 47180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450747.4 mean_steps=14.3
|
|
[Episode 47190] reward=-125364382.4 actor_loss=0.1988 critic_loss=154453819392.0000 entropy=17.7153 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 47200] reward=-119682139.7 actor_loss=0.2836 critic_loss=143187566299.4286 entropy=17.6966 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 47200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473354.4 mean_steps=14.1
|
|
[Episode 47210] reward=-119325173.7 actor_loss=0.2480 critic_loss=141482460592.3556 entropy=17.6943 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47220] reward=-120673965.0 actor_loss=0.2653 critic_loss=143819413857.1035 entropy=17.6971 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 47220] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-688816.5 mean_steps=11.6
|
|
[Episode 47230] reward=-118942515.0 actor_loss=0.2260 critic_loss=141329001715.8095 entropy=17.6972 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 47240] reward=-116575278.0 actor_loss=0.2627 critic_loss=143320607129.6000 entropy=17.6850 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 47240] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-623837.3 mean_steps=11.1
|
|
[Episode 47250] reward=-117283174.3 actor_loss=0.2928 critic_loss=149820637184.0000 entropy=17.6751 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 47260] reward=-117487988.5 actor_loss=0.4288 critic_loss=167369763810.7429 entropy=17.6712 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 47260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464964.5 mean_steps=16.1
|
|
[Episode 47270] reward=-117146478.0 actor_loss=0.3002 critic_loss=138458666469.0526 entropy=17.6533 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 47280] reward=-118509647.4 actor_loss=0.3176 critic_loss=142281052797.1555 entropy=17.6584 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 47280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-353166.0 mean_steps=16.5
|
|
[Episode 47290] reward=-119274522.8 actor_loss=0.2776 critic_loss=141854623901.5385 entropy=17.6549 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 47300] reward=-114757020.1 actor_loss=0.2707 critic_loss=135780839915.5200 entropy=17.6673 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 47300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-389937.7 mean_steps=15.3
|
|
[Episode 47310] reward=-112537825.6 actor_loss=0.4206 critic_loss=135536514935.4667 entropy=17.6766 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 47320] reward=-118413640.4 actor_loss=0.3454 critic_loss=148308972202.6667 entropy=17.6650 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 47320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480721.8 mean_steps=15.2
|
|
[Episode 47330] reward=-122370699.6 actor_loss=0.2878 critic_loss=156046413004.8000 entropy=17.6669 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 47340] reward=-124569330.5 actor_loss=0.2833 critic_loss=151324631950.2222 entropy=17.6551 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 47340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417223.3 mean_steps=15.7
|
|
[Episode 47350] reward=-112976801.9 actor_loss=0.4060 critic_loss=138094938112.0000 entropy=17.6616 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 47360] reward=-123861280.7 actor_loss=0.2676 critic_loss=156828387620.5714 entropy=17.6511 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 47360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528262.9 mean_steps=13.2
|
|
[Episode 47370] reward=-118492428.9 actor_loss=0.2946 critic_loss=147056321008.4849 entropy=17.6557 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 47380] reward=-118358114.8 actor_loss=0.2942 critic_loss=139774494134.8571 entropy=17.6473 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 47380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439337.3 mean_steps=15.1
|
|
[Episode 47390] reward=-125142205.6 actor_loss=0.3485 critic_loss=284604138291.2000 entropy=17.6509 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 47400] reward=-121250678.6 actor_loss=0.2269 critic_loss=147332965034.6667 entropy=17.6389 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 47400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523970.2 mean_steps=14.2
|
|
[Episode 47410] reward=-122297734.2 actor_loss=0.2690 critic_loss=150618802468.5714 entropy=17.6315 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47420] reward=-122065130.3 actor_loss=0.3804 critic_loss=146889140906.6667 entropy=17.6399 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 47420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421243.7 mean_steps=16.7
|
|
[Episode 47430] reward=-115367635.5 actor_loss=0.3074 critic_loss=135569779097.6000 entropy=17.6332 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 47440] reward=-121684460.5 actor_loss=0.2654 critic_loss=142814918246.4000 entropy=17.6406 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 47440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487890.4 mean_steps=14.2
|
|
[Episode 47450] reward=-116144479.8 actor_loss=0.3502 critic_loss=138085997681.7778 entropy=17.6575 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 47460] reward=-114613842.7 actor_loss=0.4465 critic_loss=141668528128.0000 entropy=17.6434 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 47460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580345.2 mean_steps=12.7
|
|
[Episode 47470] reward=-121679227.8 actor_loss=0.3053 critic_loss=145147375748.1290 entropy=17.6481 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 47480] reward=-115432091.8 actor_loss=0.3298 critic_loss=137578434150.4000 entropy=17.6504 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 47480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431505.4 mean_steps=15.8
|
|
[Episode 47490] reward=-122188264.6 actor_loss=0.2371 critic_loss=147452421006.2222 entropy=17.6414 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 47500] reward=-117048213.4 actor_loss=0.1793 critic_loss=144391624614.9565 entropy=17.6449 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 47500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535447.4 mean_steps=14.4
|
|
[Episode 47510] reward=-112102006.2 actor_loss=0.3672 critic_loss=141700845663.2558 entropy=17.6555 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47520] reward=-123640976.5 actor_loss=0.2146 critic_loss=147704838054.9565 entropy=17.6498 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 47520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606244.0 mean_steps=13.2
|
|
[Episode 47530] reward=-123576790.2 actor_loss=0.3602 critic_loss=294356921344.0000 entropy=17.6431 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 47540] reward=-116307342.5 actor_loss=0.3690 critic_loss=139010277717.3333 entropy=17.6380 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 47540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589985.9 mean_steps=13.2
|
|
[Episode 47550] reward=-113851879.6 actor_loss=0.3407 critic_loss=151380171434.6667 entropy=17.6294 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 47560] reward=-117645751.5 actor_loss=0.2019 critic_loss=138972866402.4615 entropy=17.6355 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 47560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505813.8 mean_steps=14.7
|
|
[Episode 47570] reward=-116784659.4 actor_loss=0.3975 critic_loss=139406293219.5555 entropy=17.6378 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 47580] reward=-117064524.4 actor_loss=0.2701 critic_loss=138396536012.8000 entropy=17.6343 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 47580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451178.2 mean_steps=14.6
|
|
[Episode 47590] reward=-118710039.9 actor_loss=0.3015 critic_loss=154459748608.0000 entropy=17.6317 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 47600] reward=-119757576.5 actor_loss=0.3200 critic_loss=145672245248.0000 entropy=17.6313 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 47600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510290.9 mean_steps=14.6
|
|
[Episode 47610] reward=-119615246.1 actor_loss=0.2480 critic_loss=144039366144.0000 entropy=17.6339 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 47620] reward=-112873253.9 actor_loss=0.1795 critic_loss=133307268871.7576 entropy=17.6260 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 47620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476668.3 mean_steps=15.1
|
|
[Episode 47630] reward=-122106868.1 actor_loss=0.2165 critic_loss=147440541696.0000 entropy=17.6228 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 47640] reward=-117910713.2 actor_loss=0.3777 critic_loss=139936141056.0000 entropy=17.6215 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 47640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-535224.1 mean_steps=15.6
|
|
[Episode 47650] reward=-116568839.4 actor_loss=0.2256 critic_loss=140284092043.6364 entropy=17.6353 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 47660] reward=-114648225.1 actor_loss=0.3385 critic_loss=133561998729.8462 entropy=17.6300 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 47660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435274.1 mean_steps=15.8
|
|
[Episode 47670] reward=-116680429.3 actor_loss=0.3214 critic_loss=135248827538.2857 entropy=17.6329 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 47680] reward=-116450284.6 actor_loss=0.4235 critic_loss=139687091260.2353 entropy=17.6221 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 47680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-582895.0 mean_steps=13.9
|
|
[Episode 47690] reward=-119427435.5 actor_loss=0.2103 critic_loss=148693583248.6956 entropy=17.6114 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 47700] reward=-116095445.1 actor_loss=0.3069 critic_loss=137976811341.9131 entropy=17.6046 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 47700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-254005.6 mean_steps=17.9
|
|
[Episode 47710] reward=-119591977.5 actor_loss=0.1432 critic_loss=265008466850.9091 entropy=17.6196 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 47720] reward=-117057405.3 actor_loss=0.3566 critic_loss=140079224877.5111 entropy=17.6360 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 47720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463196.0 mean_steps=15.1
|
|
[Episode 47730] reward=-116214624.0 actor_loss=0.2885 critic_loss=143589105664.0000 entropy=17.6382 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 47740] reward=-116481860.6 actor_loss=0.3684 critic_loss=142200211206.2439 entropy=17.6463 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 47740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566396.5 mean_steps=13.8
|
|
[Episode 47750] reward=-117730567.7 actor_loss=0.3633 critic_loss=139875655115.0345 entropy=17.6567 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 47760] reward=-118668829.8 actor_loss=0.2417 critic_loss=148338550663.5294 entropy=17.6571 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 47760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515504.5 mean_steps=14.2
|
|
[Episode 47770] reward=-119371197.1 actor_loss=0.3423 critic_loss=146438569398.8571 entropy=17.6541 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 47780] reward=-114555774.1 actor_loss=0.2361 critic_loss=145224359025.7778 entropy=17.6589 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 47780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422928.8 mean_steps=16.4
|
|
[Episode 47790] reward=-119128309.9 actor_loss=0.2727 critic_loss=154420102212.2667 entropy=17.6586 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47800] reward=-119628503.6 actor_loss=0.3019 critic_loss=148458037551.4074 entropy=17.6628 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 47800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583295.2 mean_steps=12.8
|
|
[Episode 47810] reward=-116449606.5 actor_loss=0.2635 critic_loss=136792698606.9333 entropy=17.6443 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 47820] reward=-120244501.2 actor_loss=0.2165 critic_loss=146855453416.7273 entropy=17.6491 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 47820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-468457.6 mean_steps=13.1
|
|
[Episode 47830] reward=-121319337.4 actor_loss=0.2603 critic_loss=153345898723.5555 entropy=17.6467 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 47840] reward=-119776012.7 actor_loss=0.3329 critic_loss=150026169548.8000 entropy=17.6499 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 47840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592970.4 mean_steps=12.5
|
|
[Episode 47850] reward=-117126338.8 actor_loss=0.3038 critic_loss=153138487777.8824 entropy=17.6527 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 47860] reward=-118250521.0 actor_loss=0.3494 critic_loss=160231651328.0000 entropy=17.6601 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 47860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-364415.7 mean_steps=15.7
|
|
[Episode 47870] reward=-117791154.0 actor_loss=0.3500 critic_loss=142583485676.3077 entropy=17.6716 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 47880] reward=-118002671.5 actor_loss=0.3745 critic_loss=162077445168.7619 entropy=17.6746 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 47880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-334551.5 mean_steps=16.5
|
|
[Episode 47890] reward=-113168847.8 actor_loss=0.3336 critic_loss=136767480685.7143 entropy=17.6670 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47900] reward=-114495079.6 actor_loss=0.3319 critic_loss=140830174916.9231 entropy=17.6614 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 47900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577744.3 mean_steps=13.7
|
|
[Episode 47910] reward=-117029682.5 actor_loss=0.2726 critic_loss=145375712256.0000 entropy=17.6596 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 47920] reward=-122680912.0 actor_loss=0.2287 critic_loss=144840523385.9048 entropy=17.6615 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 47920] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-222186.4 mean_steps=19.1
|
|
[Episode 47930] reward=-121603929.2 actor_loss=0.2501 critic_loss=143884226755.0476 entropy=17.6535 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 47940] reward=-120432303.2 actor_loss=0.2427 critic_loss=155155074947.8788 entropy=17.6336 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 47940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525689.1 mean_steps=14.3
|
|
[Episode 47950] reward=-111177692.7 actor_loss=0.3926 critic_loss=129482043782.0952 entropy=17.6294 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 47960] reward=-122415861.7 actor_loss=0.2839 critic_loss=148054706555.2592 entropy=17.6375 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 47960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-577135.8 mean_steps=15.8
|
|
[Episode 47970] reward=-122878162.8 actor_loss=0.1922 critic_loss=144240878842.3111 entropy=17.6439 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 47980] reward=-117208907.3 actor_loss=0.2724 critic_loss=143152573274.8387 entropy=17.6417 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 47980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-703579.8 mean_steps=13.1
|
|
[Episode 47990] reward=-117915409.9 actor_loss=0.2485 critic_loss=143738758212.2667 entropy=17.6498 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 48000] reward=-117077135.2 actor_loss=0.3158 critic_loss=140130853428.9655 entropy=17.6475 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 48000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572417.5 mean_steps=12.9
|
|
[Episode 48010] reward=-120276320.1 actor_loss=0.2560 critic_loss=142330472220.4445 entropy=17.6379 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 48020] reward=-121624785.4 actor_loss=0.2029 critic_loss=144304811804.4445 entropy=17.6283 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 48020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-618908.6 mean_steps=12.6
|
|
[Episode 48030] reward=-112342335.1 actor_loss=0.3081 critic_loss=139866291313.7778 entropy=17.6384 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 48040] reward=-120556440.3 actor_loss=0.2520 critic_loss=142644411202.3704 entropy=17.6343 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 48040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377422.2 mean_steps=16.2
|
|
[Episode 48050] reward=-121235051.5 actor_loss=0.3157 critic_loss=141763259392.0000 entropy=17.6129 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 48060] reward=-113312354.1 actor_loss=0.3806 critic_loss=134920060507.8974 entropy=17.6122 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 48060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365338.7 mean_steps=16.4
|
|
[Episode 48070] reward=-117152274.8 actor_loss=0.2753 critic_loss=143337818014.4762 entropy=17.6174 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 48080] reward=-113810993.0 actor_loss=0.3419 critic_loss=131879210734.9333 entropy=17.6260 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 48080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528630.7 mean_steps=14.2
|
|
[Episode 48090] reward=-114898073.9 actor_loss=0.3296 critic_loss=136488373134.2222 entropy=17.6207 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 48100] reward=-117472626.6 actor_loss=0.2739 critic_loss=134986559272.4211 entropy=17.6148 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 48100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511578.2 mean_steps=14.2
|
|
[Episode 48110] reward=-159581340.2 actor_loss=0.3499 critic_loss=7519019289170.5811 entropy=17.6166 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 48120] reward=-115636799.6 actor_loss=0.2369 critic_loss=142672637132.8000 entropy=17.6230 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 48120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474948.2 mean_steps=15.5
|
|
[Episode 48130] reward=-116772650.2 actor_loss=0.2311 critic_loss=173022626520.1778 entropy=17.6255 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 48140] reward=-119098603.4 actor_loss=0.2948 critic_loss=140891067970.7826 entropy=17.6356 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 48140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-376904.8 mean_steps=15.6
|
|
[Episode 48150] reward=-116153365.1 actor_loss=0.2983 critic_loss=138692332014.3448 entropy=17.6294 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 48160] reward=-124458872.3 actor_loss=0.2199 critic_loss=180687544623.4074 entropy=17.6290 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 48160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-334784.7 mean_steps=16.9
|
|
[Episode 48170] reward=-119171372.5 actor_loss=0.3201 critic_loss=148011838668.8000 entropy=17.6366 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 48180] reward=-119939951.3 actor_loss=0.2850 critic_loss=148448297324.0889 entropy=17.6389 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 48180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612195.2 mean_steps=11.9
|
|
[Episode 48190] reward=-118463384.9 actor_loss=0.1983 critic_loss=158035658384.4102 entropy=17.6539 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 48200] reward=-114427031.4 actor_loss=0.3050 critic_loss=139973099155.9111 entropy=17.6714 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 48200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548916.9 mean_steps=13.9
|
|
[Episode 48210] reward=-120714543.2 actor_loss=0.3231 critic_loss=150436276585.4118 entropy=17.6761 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 48220] reward=-112669295.7 actor_loss=0.2753 critic_loss=139191398985.1429 entropy=17.6713 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 48220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-404670.3 mean_steps=17.6
|
|
[Episode 48230] reward=-115214522.5 actor_loss=0.3224 critic_loss=140243378878.1714 entropy=17.6735 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 48240] reward=-126801971.6 actor_loss=0.2736 critic_loss=229766802711.2727 entropy=17.6689 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 48240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448455.5 mean_steps=14.9
|
|
[Episode 48250] reward=-118967865.6 actor_loss=0.1580 critic_loss=145002016085.3333 entropy=17.6733 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 48260] reward=-120316221.4 actor_loss=0.2678 critic_loss=153394626560.0000 entropy=17.6723 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 48260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-541618.9 mean_steps=15.3
|
|
[Episode 48270] reward=-117138602.8 actor_loss=0.2883 critic_loss=135920089861.6889 entropy=17.6635 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 48280] reward=-115702706.9 actor_loss=0.3106 critic_loss=139341446375.2258 entropy=17.6675 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 48280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588808.4 mean_steps=13.1
|
|
[Episode 48290] reward=-115533292.7 actor_loss=0.2767 critic_loss=151977283762.0869 entropy=17.6544 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 48300] reward=-119624707.3 actor_loss=0.1878 critic_loss=143003234544.9412 entropy=17.6515 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 48300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422782.8 mean_steps=16.7
|
|
[Episode 48310] reward=-116893914.7 actor_loss=0.2403 critic_loss=138845933476.9778 entropy=17.6623 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 48320] reward=-121098011.5 actor_loss=0.2875 critic_loss=184984021178.1818 entropy=17.6790 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 48320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588127.9 mean_steps=13.9
|
|
[Episode 48330] reward=-116301944.9 actor_loss=0.3996 critic_loss=137687975058.2857 entropy=17.6785 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 48340] reward=-113584291.8 actor_loss=0.3382 critic_loss=129213593941.3333 entropy=17.6855 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 48340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506573.4 mean_steps=14.3
|
|
[Episode 48350] reward=-114408265.0 actor_loss=0.2930 critic_loss=139502931148.8000 entropy=17.6859 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 48360] reward=-117240504.4 actor_loss=0.2684 critic_loss=145077077918.4762 entropy=17.6702 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 48360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564758.6 mean_steps=13.5
|
|
[Episode 48370] reward=-114978025.7 actor_loss=0.3453 critic_loss=148170893401.0435 entropy=17.6695 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 48380] reward=-122215599.0 actor_loss=0.2739 critic_loss=147148339609.6000 entropy=17.6809 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 48380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491474.4 mean_steps=15.2
|
|
[Episode 48390] reward=-121508704.0 actor_loss=0.2075 critic_loss=226420132912.7619 entropy=17.6752 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 48400] reward=-112977200.8 actor_loss=0.3513 critic_loss=148408718677.3333 entropy=17.6832 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 48400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-539006.5 mean_steps=12.5
|
|
[Episode 48410] reward=-115924536.6 actor_loss=0.3455 critic_loss=141887150535.1111 entropy=17.6827 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 48420] reward=-122093539.7 actor_loss=0.2445 critic_loss=153995800576.0000 entropy=17.6787 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 48420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-458261.3 mean_steps=16.7
|
|
[Episode 48430] reward=-123470096.1 actor_loss=0.2176 critic_loss=150081414413.4737 entropy=17.6866 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 48440] reward=-121205047.7 actor_loss=0.1643 critic_loss=155432173568.0000 entropy=17.6867 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 48440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500486.3 mean_steps=15.4
|
|
[Episode 48450] reward=-115799961.3 actor_loss=0.3265 critic_loss=146692563907.7647 entropy=17.6938 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 48460] reward=-121777509.3 actor_loss=0.2823 critic_loss=154298891759.4839 entropy=17.6757 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517934.1 mean_steps=14.2
|
|
[Episode 48470] reward=-118486187.0 actor_loss=0.2515 critic_loss=141489190725.8182 entropy=17.6813 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 48480] reward=-114124995.6 actor_loss=0.2868 critic_loss=138583458669.7143 entropy=17.6678 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 48480] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-676230.0 mean_steps=11.4
|
|
[Episode 48490] reward=-117767210.2 actor_loss=0.2681 critic_loss=151228739405.9131 entropy=17.6885 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 48500] reward=-121106387.1 actor_loss=0.3460 critic_loss=150573898137.6000 entropy=17.6726 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 48500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540662.8 mean_steps=13.7
|
|
[Episode 48510] reward=-114864949.4 actor_loss=0.2593 critic_loss=150761786026.6667 entropy=17.6706 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 48520] reward=-117058878.9 actor_loss=0.2754 critic_loss=138328041062.4000 entropy=17.6700 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 48520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542777.2 mean_steps=14.6
|
|
[Episode 48530] reward=-119347304.9 actor_loss=0.3462 critic_loss=142916133228.0889 entropy=17.6613 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 48540] reward=-116810044.3 actor_loss=0.3381 critic_loss=147631015448.3810 entropy=17.6658 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496943.1 mean_steps=14.2
|
|
[Episode 48550] reward=-121403119.1 actor_loss=0.3780 critic_loss=150189090952.5333 entropy=17.6706 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 48560] reward=-115861706.0 actor_loss=0.2629 critic_loss=139412634387.6923 entropy=17.6565 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 48560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426851.8 mean_steps=15.9
|
|
[Episode 48570] reward=-119005504.5 actor_loss=0.2976 critic_loss=139830180522.6667 entropy=17.6700 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 48580] reward=-118521455.0 actor_loss=0.3310 critic_loss=160593647206.4000 entropy=17.6935 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 48580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411128.2 mean_steps=15.2
|
|
[Episode 48590] reward=-113330240.3 actor_loss=0.3597 critic_loss=138365445356.3077 entropy=17.6992 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 48600] reward=-120607224.0 actor_loss=0.3059 critic_loss=146860771901.4400 entropy=17.7011 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 48600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571329.2 mean_steps=12.8
|
|
[Episode 48610] reward=-117767287.4 actor_loss=0.3507 critic_loss=145928706650.3529 entropy=17.6967 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 48620] reward=-116540609.3 actor_loss=0.3451 critic_loss=141111849779.2000 entropy=17.6997 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 48620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484433.3 mean_steps=14.3
|
|
[Episode 48630] reward=-123572346.0 actor_loss=0.2130 critic_loss=151117087500.1905 entropy=17.6896 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 48640] reward=-117190271.8 actor_loss=0.3469 critic_loss=140722215087.5428 entropy=17.7035 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 48640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579999.4 mean_steps=12.9
|
|
[Episode 48650] reward=-118838887.5 actor_loss=0.3171 critic_loss=144721451643.5862 entropy=17.7174 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 48660] reward=-121287076.2 actor_loss=0.3121 critic_loss=153269273486.2222 entropy=17.7087 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 48660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-679467.5 mean_steps=12.3
|
|
[Episode 48670] reward=-115733323.1 actor_loss=0.3656 critic_loss=137826756380.4445 entropy=17.7153 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 48680] reward=-119716694.9 actor_loss=0.3282 critic_loss=142368853125.5652 entropy=17.7010 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 48680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459635.9 mean_steps=15.2
|
|
[Episode 48690] reward=-118674472.2 actor_loss=0.2487 critic_loss=140453940955.4286 entropy=17.6955 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 48700] reward=-117134086.0 actor_loss=0.3291 critic_loss=134787587218.2857 entropy=17.7108 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 48700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-454057.1 mean_steps=14.1
|
|
[Episode 48710] reward=-123186042.3 actor_loss=0.3124 critic_loss=146559301495.4667 entropy=17.7170 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 48720] reward=-118351766.2 actor_loss=0.2973 critic_loss=141913242935.6522 entropy=17.6913 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596874.5 mean_steps=13.8
|
|
[Episode 48730] reward=-119373712.6 actor_loss=0.2696 critic_loss=144855978985.2444 entropy=17.6905 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 48740] reward=-118367657.3 actor_loss=0.2781 critic_loss=138346980649.2903 entropy=17.6983 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-638096.7 mean_steps=12.2
|
|
[Episode 48750] reward=-122238278.1 actor_loss=0.2758 critic_loss=144221886600.5333 entropy=17.6958 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 48760] reward=-119093872.6 actor_loss=0.3213 critic_loss=143327082496.0000 entropy=17.6926 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452860.3 mean_steps=13.7
|
|
[Episode 48770] reward=-121730526.4 actor_loss=0.2252 critic_loss=153097794653.0909 entropy=17.6895 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 48780] reward=-110666079.6 actor_loss=0.3227 critic_loss=132464882408.7273 entropy=17.6868 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 48780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557697.6 mean_steps=13.8
|
|
[Episode 48790] reward=-120514620.2 actor_loss=0.1842 critic_loss=144435607259.4286 entropy=17.6755 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 48800] reward=-115519832.0 actor_loss=0.3617 critic_loss=140335077729.1035 entropy=17.6834 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 48800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-629541.0 mean_steps=12.2
|
|
[Episode 48810] reward=-117864081.4 actor_loss=0.3055 critic_loss=142858333440.0000 entropy=17.6797 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 48820] reward=-113057842.6 actor_loss=0.3076 critic_loss=133161475731.9111 entropy=17.7027 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 48820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-451513.0 mean_steps=13.9
|
|
[Episode 48830] reward=-118416054.7 actor_loss=0.3080 critic_loss=147084788076.0889 entropy=17.7224 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 48840] reward=-118469911.0 actor_loss=0.3178 critic_loss=139596684947.9111 entropy=17.7135 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 48840] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-708085.9 mean_steps=10.7
|
|
[Episode 48850] reward=-118708095.0 actor_loss=0.2817 critic_loss=143836180164.9231 entropy=17.7148 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 48860] reward=-119359695.5 actor_loss=0.3210 critic_loss=144592438067.2000 entropy=17.7191 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 48860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-572572.6 mean_steps=11.8
|
|
[Episode 48870] reward=-119713868.6 actor_loss=0.2509 critic_loss=150789337239.7037 entropy=17.7194 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 48880] reward=-117840003.9 actor_loss=0.2632 critic_loss=145589460601.9048 entropy=17.7189 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 48880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462403.3 mean_steps=15.2
|
|
[Episode 48890] reward=-116956630.4 actor_loss=0.3267 critic_loss=137813190535.5294 entropy=17.7181 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 48900] reward=-119861005.5 actor_loss=0.3087 critic_loss=149656591473.7778 entropy=17.7096 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48900] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-713796.6 mean_steps=10.7
|
|
[Episode 48910] reward=-122997930.7 actor_loss=0.2569 critic_loss=148343312068.9231 entropy=17.7015 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 48920] reward=-118017506.4 actor_loss=0.3277 critic_loss=138599484211.2000 entropy=17.7046 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 48920] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-392533.6 mean_steps=17.6
|
|
[Episode 48930] reward=-119746246.9 actor_loss=0.3291 critic_loss=142132654633.5135 entropy=17.7030 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 48940] reward=-119655884.4 actor_loss=0.3649 critic_loss=148934462841.2632 entropy=17.7048 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 48940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-640816.1 mean_steps=12.2
|
|
[Episode 48950] reward=-118133697.8 actor_loss=0.2592 critic_loss=136480896341.3333 entropy=17.6911 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 48960] reward=-118662154.8 actor_loss=0.2200 critic_loss=137275174291.3939 entropy=17.6951 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 48960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585690.4 mean_steps=12.9
|
|
[Episode 48970] reward=-117598233.7 actor_loss=0.3184 critic_loss=146355858272.7111 entropy=17.6787 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 48980] reward=-120402982.0 actor_loss=0.3000 critic_loss=147311528779.2941 entropy=17.6571 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 48980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509907.2 mean_steps=14.1
|
|
[Episode 48990] reward=-122747348.4 actor_loss=0.1883 critic_loss=147432661447.1111 entropy=17.6491 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 49000] reward=-120302211.8 actor_loss=0.2506 critic_loss=147562894373.9259 entropy=17.6552 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 49000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-620748.4 mean_steps=12.9
|
|
[Episode 49010] reward=-118769594.5 actor_loss=0.3700 critic_loss=143487451477.3333 entropy=17.6382 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 49020] reward=-122212802.7 actor_loss=0.2781 critic_loss=150164139659.6364 entropy=17.6183 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 49020] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-681897.6 mean_steps=11.7
|
|
[Episode 49030] reward=-114518613.7 actor_loss=0.4399 critic_loss=135938186386.2857 entropy=17.6084 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 49040] reward=-121755345.4 actor_loss=0.2512 critic_loss=143234132286.5778 entropy=17.6078 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 49040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489514.3 mean_steps=14.9
|
|
[Episode 49050] reward=-117850500.5 actor_loss=0.2710 critic_loss=137800831249.0667 entropy=17.6035 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 49060] reward=-123874403.3 actor_loss=0.2410 critic_loss=148061403363.5555 entropy=17.6104 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 49060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-483405.1 mean_steps=13.5
|
|
[Episode 49070] reward=-121363814.3 actor_loss=0.3371 critic_loss=144672895522.1333 entropy=17.6159 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 49080] reward=-122141348.8 actor_loss=0.2568 critic_loss=146550560358.4000 entropy=17.6224 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 49080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-601125.2 mean_steps=13.7
|
|
[Episode 49090] reward=-114702001.7 actor_loss=0.3693 critic_loss=137266682733.7143 entropy=17.6302 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 49100] reward=-122846741.8 actor_loss=0.2157 critic_loss=147035175755.2941 entropy=17.6202 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 49100] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-329357.0 mean_steps=18.0
|
|
[Episode 49110] reward=-119421924.7 actor_loss=0.3854 critic_loss=153342627840.0000 entropy=17.6313 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 49120] reward=-118442231.4 actor_loss=0.2406 critic_loss=138947766347.8518 entropy=17.6286 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 49120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-537512.1 mean_steps=12.8
|
|
[Episode 49130] reward=-125292213.1 actor_loss=0.2996 critic_loss=154129762596.5714 entropy=17.6085 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 49140] reward=-118972900.9 actor_loss=0.2338 critic_loss=138945594368.0000 entropy=17.6030 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 49140] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-648029.0 mean_steps=11.4
|
|
[Episode 49150] reward=-121454618.4 actor_loss=0.2377 critic_loss=143556955574.8571 entropy=17.6049 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 49160] reward=-120125277.1 actor_loss=0.2845 critic_loss=141859168069.8182 entropy=17.6003 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 49160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371508.0 mean_steps=16.1
|
|
[Episode 49170] reward=-119455723.5 actor_loss=0.2168 critic_loss=139120302200.4706 entropy=17.6046 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 49180] reward=-115007815.4 actor_loss=0.3182 critic_loss=133852907640.4706 entropy=17.5972 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 49180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-659535.0 mean_steps=13.1
|
|
[Episode 49190] reward=-121183650.7 actor_loss=0.2060 critic_loss=148767107451.2592 entropy=17.6004 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 49200] reward=-113841891.4 actor_loss=0.3726 critic_loss=133162804645.6471 entropy=17.6051 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 49200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-514235.9 mean_steps=15.4
|
|
[Episode 49210] reward=-121674722.1 actor_loss=0.2970 critic_loss=144270580874.3784 entropy=17.6106 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 49220] reward=-117374518.1 actor_loss=0.3583 critic_loss=135310302841.9048 entropy=17.6108 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 49220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-598017.6 mean_steps=14.8
|
|
[Episode 49230] reward=-122784275.3 actor_loss=0.3554 critic_loss=148375304192.0000 entropy=17.5895 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 49240] reward=-117354915.0 actor_loss=0.3961 critic_loss=138577216580.2667 entropy=17.5953 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 49240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424806.4 mean_steps=15.8
|
|
[Episode 49250] reward=-119694265.6 actor_loss=0.3088 critic_loss=140871265315.3103 entropy=17.6039 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 49260] reward=-117959553.6 actor_loss=0.3135 critic_loss=133562788285.2174 entropy=17.5988 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 49260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484867.2 mean_steps=15.3
|
|
[Episode 49270] reward=-126532386.2 actor_loss=0.3131 critic_loss=152743166680.1778 entropy=17.5893 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 49280] reward=-115492351.2 actor_loss=0.4118 critic_loss=138546885099.5200 entropy=17.5749 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 49280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490904.9 mean_steps=14.2
|
|
[Episode 49290] reward=-120127272.3 actor_loss=0.3229 critic_loss=145791366485.3333 entropy=17.5800 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 49300] reward=-122875482.1 actor_loss=0.3415 critic_loss=148923246324.8696 entropy=17.5852 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 49300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501009.5 mean_steps=13.9
|
|
[Episode 49310] reward=-117769516.1 actor_loss=0.2921 critic_loss=140375044505.6000 entropy=17.5858 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 49320] reward=-118518342.2 actor_loss=0.1450 critic_loss=137636363702.8571 entropy=17.5994 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 49320] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-314119.4 mean_steps=16.9
|
|
[Episode 49330] reward=-115380854.3 actor_loss=0.2450 critic_loss=130935198326.1538 entropy=17.6029 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 49340] reward=-121775756.4 actor_loss=0.1793 critic_loss=145467285147.8261 entropy=17.5963 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 49340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-523917.7 mean_steps=15.7
|
|
[Episode 49350] reward=-122239288.3 actor_loss=0.3199 critic_loss=160152841122.9091 entropy=17.5872 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 49360] reward=-121364510.4 actor_loss=0.3197 critic_loss=144972286853.1200 entropy=17.5837 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 49360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480478.0 mean_steps=14.1
|
|
[Episode 49370] reward=-113228481.0 actor_loss=0.2562 critic_loss=136577124165.8182 entropy=17.5906 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 49380] reward=-115040153.2 actor_loss=0.2952 critic_loss=146030572573.2571 entropy=17.5955 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 49380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-534229.6 mean_steps=12.1
|
|
[Episode 49390] reward=-120222472.2 actor_loss=0.1965 critic_loss=141033312256.0000 entropy=17.5975 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 49400] reward=-115497462.1 actor_loss=0.3893 critic_loss=130543794176.0000 entropy=17.6201 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 49400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478608.3 mean_steps=14.0
|
|
[Episode 49410] reward=-121129905.4 actor_loss=0.2056 critic_loss=140319002272.9143 entropy=17.6278 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 49420] reward=-116650333.3 actor_loss=0.1939 critic_loss=134700694771.8095 entropy=17.6310 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 49420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548068.8 mean_steps=14.4
|
|
[Episode 49430] reward=-121442970.6 actor_loss=0.2858 critic_loss=143735494580.1482 entropy=17.6231 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 49440] reward=-143886337.6 actor_loss=0.2736 critic_loss=2599326746760.5332 entropy=17.6243 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 49440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366089.4 mean_steps=16.1
|
|
[Episode 49450] reward=-117491353.3 actor_loss=0.2829 critic_loss=138843902582.1538 entropy=17.6384 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 49460] reward=-158315314.6 actor_loss=0.2528 critic_loss=3835510709101.7144 entropy=17.6452 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 49460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-492101.1 mean_steps=15.4
|
|
[Episode 49470] reward=-121134919.4 actor_loss=0.2195 critic_loss=146969969459.2000 entropy=17.6435 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 49480] reward=-117572691.1 actor_loss=0.2505 critic_loss=139345980074.6667 entropy=17.6506 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 49480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-693532.4 mean_steps=12.5
|
|
[Episode 49490] reward=-117541457.8 actor_loss=0.3266 critic_loss=147994209757.8667 entropy=17.6473 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 49500] reward=-118738759.6 actor_loss=0.2922 critic_loss=151051901701.6889 entropy=17.6484 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 49500] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-626382.4 mean_steps=11.0
|
|
[Episode 49510] reward=-117938809.4 actor_loss=0.3077 critic_loss=139909469525.3333 entropy=17.6632 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 49520] reward=-122697437.8 actor_loss=0.2820 critic_loss=146375754069.3333 entropy=17.6421 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 49520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440909.9 mean_steps=15.4
|
|
[Episode 49530] reward=-115886803.0 actor_loss=0.4181 critic_loss=140147727473.7778 entropy=17.6308 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 49540] reward=-122041379.0 actor_loss=0.3209 critic_loss=151361408099.0968 entropy=17.6291 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 49540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508590.6 mean_steps=12.7
|
|
[Episode 49550] reward=-118892131.6 actor_loss=0.2308 critic_loss=164438513931.1304 entropy=17.6206 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 49560] reward=-115688357.5 actor_loss=0.2701 critic_loss=136273589187.7647 entropy=17.6244 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 49560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451216.8 mean_steps=15.5
|
|
[Episode 49570] reward=-121538377.5 actor_loss=0.2685 critic_loss=163573280085.3333 entropy=17.6298 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 49580] reward=-122543508.3 actor_loss=0.2115 critic_loss=145884417774.9333 entropy=17.6138 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 49580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575707.2 mean_steps=12.2
|
|
[Episode 49590] reward=-120801606.1 actor_loss=0.2062 critic_loss=170500763863.5789 entropy=17.6191 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 49600] reward=-120787535.2 actor_loss=0.3802 critic_loss=142505692910.9333 entropy=17.6120 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 49600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-312036.3 mean_steps=17.1
|
|
[Episode 49610] reward=-119801753.4 actor_loss=0.4057 critic_loss=143378199815.3143 entropy=17.6183 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 49620] reward=-123959957.2 actor_loss=0.2192 critic_loss=148558635752.7273 entropy=17.6087 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 49620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-685735.1 mean_steps=11.4
|
|
[Episode 49630] reward=-120280824.1 actor_loss=0.3519 critic_loss=147046249995.3778 entropy=17.6155 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 49640] reward=-121222003.3 actor_loss=0.2572 critic_loss=144702202985.9310 entropy=17.6187 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 49640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476059.2 mean_steps=13.6
|
|
[Episode 49650] reward=-116114822.1 actor_loss=0.3173 critic_loss=140454136854.7556 entropy=17.6120 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 49660] reward=-122289724.4 actor_loss=0.2357 critic_loss=194706046156.8000 entropy=17.6137 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 49660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484093.1 mean_steps=14.8
|
|
[Episode 49670] reward=-205447136.6 actor_loss=1.4725 critic_loss=19355471341158.3984 entropy=17.6233 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 49680] reward=-119812297.0 actor_loss=0.2193 critic_loss=133809211741.6585 entropy=17.6400 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 49680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-404221.8 mean_steps=16.6
|
|
[Episode 49690] reward=-112500514.5 actor_loss=0.2982 critic_loss=153840653653.3333 entropy=17.6434 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 49700] reward=-116915208.6 actor_loss=0.3587 critic_loss=139469234540.0889 entropy=17.6414 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 49700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540972.0 mean_steps=13.2
|
|
[Episode 49710] reward=-117593372.0 actor_loss=0.4657 critic_loss=145425722026.6667 entropy=17.6404 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1517 front_blocked=0
|
|
[Episode 49720] reward=-117456621.3 actor_loss=0.3104 critic_loss=138330947349.9429 entropy=17.6636 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 49720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505812.3 mean_steps=14.3
|
|
[Episode 49730] reward=-121888493.0 actor_loss=0.2159 critic_loss=148660172920.4706 entropy=17.6704 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 49740] reward=-119093597.6 actor_loss=0.3354 critic_loss=166685091157.3333 entropy=17.6712 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 49740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-388402.5 mean_steps=17.1
|
|
[Episode 49750] reward=-118349144.1 actor_loss=0.2493 critic_loss=142397320760.8889 entropy=17.6824 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 49760] reward=-114708458.5 actor_loss=0.3403 critic_loss=141514817740.8000 entropy=17.6833 approx_kl=0.0117 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 49760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615005.5 mean_steps=12.7
|
|
[Episode 49770] reward=-121757759.7 actor_loss=0.2648 critic_loss=152927433076.3636 entropy=17.6884 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 49780] reward=-121656294.4 actor_loss=0.3344 critic_loss=146648559856.9412 entropy=17.6713 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 49780] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-303143.7 mean_steps=17.6
|
|
[Episode 49790] reward=-123124947.3 actor_loss=0.2311 critic_loss=148647933021.0909 entropy=17.6869 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 49800] reward=-119531438.8 actor_loss=0.2239 critic_loss=134669318371.5556 entropy=17.6851 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 49800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379982.5 mean_steps=15.9
|
|
[Episode 49810] reward=-121241424.5 actor_loss=0.3512 critic_loss=150392904557.7143 entropy=17.6765 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 49820] reward=-124254866.6 actor_loss=0.1466 critic_loss=143923550851.6571 entropy=17.6680 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 49820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567853.6 mean_steps=12.4
|
|
[Episode 49830] reward=-126669090.7 actor_loss=0.2600 critic_loss=223871445530.9474 entropy=17.6600 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 49840] reward=-119968208.4 actor_loss=0.2762 critic_loss=137817423689.9556 entropy=17.6505 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 49840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-437333.9 mean_steps=13.7
|
|
[Episode 49850] reward=-121657287.6 actor_loss=0.3005 critic_loss=139276445575.5294 entropy=17.6627 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 49860] reward=-119310892.2 actor_loss=0.2988 critic_loss=139985351475.2000 entropy=17.6612 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 49860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-470646.9 mean_steps=15.5
|
|
[Episode 49870] reward=-117901834.6 actor_loss=0.2949 critic_loss=135633950515.2000 entropy=17.6558 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 49880] reward=-119993022.7 actor_loss=0.3855 critic_loss=138432804571.4286 entropy=17.6559 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 49880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586103.4 mean_steps=13.2
|
|
[Episode 49890] reward=-115457907.0 actor_loss=0.2752 critic_loss=136512292971.7895 entropy=17.6503 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 49900] reward=-118632587.9 actor_loss=0.3253 critic_loss=138045496433.7778 entropy=17.6452 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 49900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468102.4 mean_steps=14.8
|
|
[Episode 49910] reward=-117808970.0 actor_loss=0.3051 critic_loss=140357773498.1818 entropy=17.6420 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 49920] reward=-121327395.9 actor_loss=0.3763 critic_loss=152359747349.9429 entropy=17.6417 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 49920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537527.4 mean_steps=14.6
|
|
[Episode 49930] reward=-119239134.2 actor_loss=0.3141 critic_loss=170162724278.8571 entropy=17.6473 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 49940] reward=-110854638.0 actor_loss=0.3614 critic_loss=139812980825.0435 entropy=17.6435 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 49940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547737.3 mean_steps=14.8
|
|
[Episode 49950] reward=-117829994.2 actor_loss=0.2861 critic_loss=142568816275.9111 entropy=17.6456 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 49960] reward=-118260940.5 actor_loss=0.2971 critic_loss=134256303217.7778 entropy=17.6581 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 49960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-393951.6 mean_steps=17.3
|
|
[Episode 49970] reward=-114666877.6 actor_loss=0.2465 critic_loss=149274068504.3810 entropy=17.6655 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 49980] reward=-119011474.2 actor_loss=0.2711 critic_loss=154741424128.0000 entropy=17.6748 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 49980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400029.9 mean_steps=15.9
|
|
[Episode 49990] reward=-119101228.9 actor_loss=0.3031 critic_loss=138858414899.2000 entropy=17.6645 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 50000] reward=-121533684.7 actor_loss=0.2873 critic_loss=146583591561.3658 entropy=17.6588 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 50000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371020.2 mean_steps=16.2
|
|
[Episode 50010] reward=-109982639.0 actor_loss=0.3749 critic_loss=132438968858.9474 entropy=17.6526 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 50020] reward=-120847135.0 actor_loss=0.2770 critic_loss=141977998034.8235 entropy=17.6699 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 50020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-405082.3 mean_steps=16.9
|
|
[Episode 50030] reward=-121302907.2 actor_loss=0.3140 critic_loss=145853031517.0909 entropy=17.6702 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 50040] reward=-116254305.1 actor_loss=0.2832 critic_loss=143679314522.3529 entropy=17.6745 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 50040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495827.7 mean_steps=15.0
|
|
[Episode 50050] reward=-117321910.2 actor_loss=0.3304 critic_loss=138262097768.2963 entropy=17.6727 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 50060] reward=-122704789.8 actor_loss=0.2653 critic_loss=159802064896.0000 entropy=17.6646 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 50060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-477091.7 mean_steps=15.7
|
|
[Episode 50070] reward=-116125830.8 actor_loss=0.3173 critic_loss=139022796572.4445 entropy=17.6724 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 50080] reward=-114461872.3 actor_loss=0.3316 critic_loss=163038714643.6923 entropy=17.6810 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 50080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587672.3 mean_steps=12.8
|
|
[Episode 50090] reward=-120261041.3 actor_loss=0.2567 critic_loss=144508115698.5263 entropy=17.6733 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 50100] reward=-117347297.2 actor_loss=0.3591 critic_loss=140672688947.2000 entropy=17.6743 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 50100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-582155.9 mean_steps=11.8
|
|
[Episode 50110] reward=-117825253.3 actor_loss=0.3112 critic_loss=148589357641.1429 entropy=17.6625 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 50120] reward=-122005975.5 actor_loss=0.2422 critic_loss=146029834513.0667 entropy=17.6542 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 50120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-421311.5 mean_steps=14.7
|
|
[Episode 50130] reward=-120698261.5 actor_loss=0.3614 critic_loss=145070756546.2069 entropy=17.6453 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 50140] reward=-116356517.0 actor_loss=0.3301 critic_loss=137096682934.8571 entropy=17.6543 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 50140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573255.0 mean_steps=12.8
|
|
[Episode 50150] reward=-123055144.8 actor_loss=0.2777 critic_loss=150589351526.4000 entropy=17.6566 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 50160] reward=-119454016.3 actor_loss=0.1825 critic_loss=144326335218.5263 entropy=17.6505 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 50160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-603000.9 mean_steps=13.6
|
|
[Episode 50170] reward=-119693779.7 actor_loss=0.2990 critic_loss=137848078020.9231 entropy=17.6557 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 50180] reward=-117235575.2 actor_loss=0.2505 critic_loss=138567283446.5185 entropy=17.6523 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 50180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-380684.6 mean_steps=16.8
|
|
[Episode 50190] reward=-122555232.9 actor_loss=0.2698 critic_loss=148300815902.1176 entropy=17.6464 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 50200] reward=-118850204.8 actor_loss=0.3189 critic_loss=143007767347.2000 entropy=17.6501 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 50200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478845.0 mean_steps=13.8
|
|
[Episode 50210] reward=-120865402.3 actor_loss=0.2654 critic_loss=142016342334.5778 entropy=17.6447 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 50220] reward=-117531876.3 actor_loss=0.3036 critic_loss=138309860101.6889 entropy=17.6462 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 50220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473257.6 mean_steps=13.8
|
|
[Episode 50230] reward=-121551249.5 actor_loss=0.2450 critic_loss=144603193016.3200 entropy=17.6501 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 50240] reward=-118220774.0 actor_loss=0.2836 critic_loss=135213623657.4118 entropy=17.6430 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 50240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-693907.4 mean_steps=12.4
|
|
[Episode 50250] reward=-118975398.6 actor_loss=0.3707 critic_loss=144194221278.6087 entropy=17.6385 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 50260] reward=-122017157.4 actor_loss=0.2948 critic_loss=142296098343.3846 entropy=17.6338 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 50260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528591.3 mean_steps=13.8
|
|
[Episode 50270] reward=-118813498.5 actor_loss=0.2566 critic_loss=141173612885.3333 entropy=17.6482 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 50280] reward=-118272601.3 actor_loss=0.2396 critic_loss=141256705170.2857 entropy=17.6517 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 50280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-566782.6 mean_steps=14.2
|
|
[Episode 50290] reward=-115645724.9 actor_loss=0.2887 critic_loss=139648155320.3200 entropy=17.6471 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 50300] reward=-118687599.4 actor_loss=0.2629 critic_loss=139039196081.2308 entropy=17.6476 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 50300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-577072.9 mean_steps=14.4
|
|
[Episode 50310] reward=-118052272.7 actor_loss=0.3308 critic_loss=139875747196.3429 entropy=17.6622 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 50320] reward=-126805640.7 actor_loss=0.3026 critic_loss=152529842995.2000 entropy=17.6916 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 50320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594873.9 mean_steps=13.1
|
|
[Episode 50330] reward=-118328437.6 actor_loss=0.3398 critic_loss=143010510758.9565 entropy=17.7089 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 50340] reward=-122217751.1 actor_loss=0.3597 critic_loss=148035391605.0286 entropy=17.7115 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 50340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-615849.6 mean_steps=12.0
|
|
[Episode 50350] reward=-119726408.3 actor_loss=0.2301 critic_loss=147093702451.2000 entropy=17.7300 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 50360] reward=-120898074.8 actor_loss=0.2616 critic_loss=145767530496.0000 entropy=17.7301 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 50360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550926.5 mean_steps=13.3
|
|
[Episode 50370] reward=-121621238.8 actor_loss=0.3200 critic_loss=142359868547.2820 entropy=17.7349 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 50380] reward=-117542389.9 actor_loss=0.3502 critic_loss=152312438519.7419 entropy=17.7219 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 50380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492305.7 mean_steps=14.6
|
|
[Episode 50390] reward=-123441066.0 actor_loss=0.3300 critic_loss=174675201228.8000 entropy=17.7249 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 50400] reward=-116602915.4 actor_loss=0.3424 critic_loss=143500256768.0000 entropy=17.7287 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 50400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529002.4 mean_steps=14.2
|
|
[Episode 50410] reward=-120166325.6 actor_loss=0.2309 critic_loss=141715981616.4324 entropy=17.7443 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 50420] reward=-120049301.7 actor_loss=0.3406 critic_loss=139946082668.0889 entropy=17.7525 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 50420] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-694887.9 mean_steps=11.4
|
|
[Episode 50430] reward=-118216940.2 actor_loss=0.2588 critic_loss=139329662113.6842 entropy=17.7469 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 50440] reward=-120372939.1 actor_loss=0.2849 critic_loss=139286734060.3077 entropy=17.7388 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 50440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-570779.6 mean_steps=11.8
|
|
[Episode 50450] reward=-118127890.9 actor_loss=0.3646 critic_loss=155913220986.4348 entropy=17.7341 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 50460] reward=-117176746.8 actor_loss=0.3682 critic_loss=151090742541.4737 entropy=17.7361 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 50460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511734.0 mean_steps=13.8
|
|
[Episode 50470] reward=-120009288.9 actor_loss=0.1906 critic_loss=140006701528.6154 entropy=17.7325 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 50480] reward=-117799699.1 actor_loss=0.3372 critic_loss=146954872890.5143 entropy=17.7327 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 50480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428919.6 mean_steps=15.7
|
|
[Episode 50490] reward=-119833844.5 actor_loss=0.3009 critic_loss=143136479744.0000 entropy=17.7397 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 50500] reward=-124985947.5 actor_loss=0.2049 critic_loss=148666629513.8462 entropy=17.7428 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 50500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368269.1 mean_steps=15.8
|
|
[Episode 50510] reward=-116260644.4 actor_loss=0.2956 critic_loss=137753901056.0000 entropy=17.7422 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 50520] reward=-123428028.6 actor_loss=0.1774 critic_loss=145934904569.0811 entropy=17.7439 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 50520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419696.4 mean_steps=15.3
|
|
[Episode 50530] reward=-116569693.9 actor_loss=0.3830 critic_loss=140572880523.6364 entropy=17.7404 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 50540] reward=-120441177.6 actor_loss=0.1824 critic_loss=139864132686.7692 entropy=17.7284 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 50540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450049.9 mean_steps=15.8
|
|
[Episode 50550] reward=-121597309.0 actor_loss=0.3010 critic_loss=142943382272.0000 entropy=17.7079 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 50560] reward=-111797689.5 actor_loss=0.3332 critic_loss=128384760338.9630 entropy=17.6853 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 50560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481029.4 mean_steps=14.9
|
|
[Episode 50570] reward=-122884570.8 actor_loss=0.2669 critic_loss=181106196844.0889 entropy=17.6629 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 50580] reward=-123619482.2 actor_loss=0.2030 critic_loss=154875412844.0889 entropy=17.6682 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 50580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-391547.2 mean_steps=14.7
|
|
[Episode 50590] reward=-118014092.4 actor_loss=0.2469 critic_loss=139177723904.0000 entropy=17.6647 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 50600] reward=-123417664.7 actor_loss=0.2497 critic_loss=159345215674.1818 entropy=17.6564 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 50600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473358.2 mean_steps=14.4
|
|
[Episode 50610] reward=-118201573.4 actor_loss=0.1342 critic_loss=143770564835.5555 entropy=17.6692 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 50620] reward=-123285983.0 actor_loss=0.3398 critic_loss=148464895537.5484 entropy=17.6570 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 50620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-406173.0 mean_steps=14.1
|
|
[Episode 50630] reward=-118463928.0 actor_loss=0.3384 critic_loss=140299996811.6364 entropy=17.6688 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 50640] reward=-118295791.8 actor_loss=0.3794 critic_loss=158916352097.5238 entropy=17.6456 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 50640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-514990.8 mean_steps=15.6
|
|
[Episode 50650] reward=-122474845.0 actor_loss=0.3447 critic_loss=161511849515.8857 entropy=17.6349 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 50660] reward=-124532609.0 actor_loss=0.2749 critic_loss=148909575736.8889 entropy=17.6229 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 50660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-514807.3 mean_steps=14.6
|
|
[Episode 50670] reward=-119078866.5 actor_loss=0.3392 critic_loss=139137222974.5778 entropy=17.6172 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 50680] reward=-120491383.6 actor_loss=0.3063 critic_loss=151943435605.3333 entropy=17.6175 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 50680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504453.8 mean_steps=13.7
|
|
[Episode 50690] reward=-117129466.3 actor_loss=0.3906 critic_loss=135507926584.8889 entropy=17.6117 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 50700] reward=-119529593.0 actor_loss=0.3100 critic_loss=134329752143.6444 entropy=17.5987 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 50700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426060.8 mean_steps=15.0
|
|
[Episode 50710] reward=-113596077.2 actor_loss=0.3494 critic_loss=131888460823.8139 entropy=17.5909 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 50720] reward=-115195901.4 actor_loss=0.3254 critic_loss=137524437772.1905 entropy=17.6121 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 50720] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-682988.8 mean_steps=11.3
|
|
[Episode 50730] reward=-121424698.7 actor_loss=0.2199 critic_loss=141191342938.8387 entropy=17.6139 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 50740] reward=-121248200.9 actor_loss=0.3332 critic_loss=139986845354.6667 entropy=17.6126 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 50740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-444297.2 mean_steps=15.8
|
|
[Episode 50750] reward=-123450455.9 actor_loss=0.1870 critic_loss=150450667910.0952 entropy=17.6195 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 50760] reward=-120533000.2 actor_loss=0.2625 critic_loss=142724093621.6774 entropy=17.6216 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 50760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452058.4 mean_steps=14.1
|
|
[Episode 50770] reward=-119170468.8 actor_loss=0.3800 critic_loss=140331125485.2683 entropy=17.6248 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 50780] reward=-118137560.7 actor_loss=0.2631 critic_loss=135046525383.1111 entropy=17.6369 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 50780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387384.0 mean_steps=15.2
|
|
[Episode 50790] reward=-117188831.7 actor_loss=0.2380 critic_loss=140743333595.4286 entropy=17.6300 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 50800] reward=-116866444.1 actor_loss=0.2818 critic_loss=136800433493.3333 entropy=17.6291 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 50800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463682.8 mean_steps=14.3
|
|
[Episode 50810] reward=-119291610.0 actor_loss=0.2649 critic_loss=135387200768.0000 entropy=17.6165 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 50820] reward=-120742176.0 actor_loss=0.2040 critic_loss=139191811627.8857 entropy=17.6001 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 50820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417382.1 mean_steps=15.3
|
|
[Episode 50830] reward=-116789840.7 actor_loss=0.2428 critic_loss=132608023483.7333 entropy=17.5770 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 50840] reward=-118632147.8 actor_loss=0.2957 critic_loss=141436746547.2000 entropy=17.5626 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 50840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392054.4 mean_steps=15.8
|
|
[Episode 50850] reward=-123179779.2 actor_loss=0.2402 critic_loss=145410404503.7037 entropy=17.5726 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 50860] reward=-124450809.5 actor_loss=0.2432 critic_loss=193198270366.4762 entropy=17.5902 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 50860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430143.0 mean_steps=14.4
|
|
[Episode 50870] reward=-118460555.0 actor_loss=0.3425 critic_loss=186154123264.0000 entropy=17.5846 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 50880] reward=-121307107.1 actor_loss=0.2704 critic_loss=144854163846.0952 entropy=17.5949 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 50880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-582354.5 mean_steps=13.4
|
|
[Episode 50890] reward=-115869453.6 actor_loss=0.3456 critic_loss=139007763212.1905 entropy=17.5913 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 50900] reward=-120035126.0 actor_loss=0.2633 critic_loss=153125304807.6190 entropy=17.5860 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 50900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483401.6 mean_steps=14.6
|
|
[Episode 50910] reward=-116101886.5 actor_loss=0.2410 critic_loss=138188352625.7778 entropy=17.5868 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 50920] reward=-119718446.0 actor_loss=0.2505 critic_loss=137985186201.6000 entropy=17.5784 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 50920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642049.1 mean_steps=12.8
|
|
[Episode 50930] reward=-116885334.5 actor_loss=0.3157 critic_loss=133040970200.6154 entropy=17.5773 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 50940] reward=-123987512.3 actor_loss=0.2664 critic_loss=174738411520.0000 entropy=17.5810 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 50940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535764.9 mean_steps=13.2
|
|
[Episode 50950] reward=-115861796.1 actor_loss=0.2310 critic_loss=148520710567.7242 entropy=17.5772 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 50960] reward=-111615673.1 actor_loss=0.3696 critic_loss=133519272960.0000 entropy=17.5892 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 50960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422879.9 mean_steps=15.6
|
|
[Episode 50970] reward=-122895764.0 actor_loss=0.2812 critic_loss=152296414439.2258 entropy=17.5793 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 50980] reward=-111285858.2 actor_loss=0.4292 critic_loss=126395456079.6444 entropy=17.5681 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 50980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393598.9 mean_steps=15.7
|
|
[Episode 50990] reward=-123012375.9 actor_loss=0.2837 critic_loss=144830617413.8182 entropy=17.5671 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 51000] reward=-111758811.8 actor_loss=0.3080 critic_loss=128121387987.4783 entropy=17.5674 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 51000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-611183.3 mean_steps=12.0
|
|
[Episode 51010] reward=-123723181.8 actor_loss=0.3090 critic_loss=145487436913.7778 entropy=17.5724 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 51020] reward=-116981048.3 actor_loss=0.2966 critic_loss=146672732754.5807 entropy=17.5552 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 51020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504549.2 mean_steps=14.8
|
|
[Episode 51030] reward=-114256258.0 actor_loss=0.2945 critic_loss=135156996029.9355 entropy=17.5584 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 51040] reward=-123213496.9 actor_loss=0.2559 critic_loss=154819617751.0400 entropy=17.5684 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 51040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560420.9 mean_steps=13.3
|
|
[Episode 51050] reward=-123057746.4 actor_loss=0.2683 critic_loss=336654564647.8222 entropy=17.5841 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 51060] reward=-120225607.0 actor_loss=0.2724 critic_loss=141854675945.2444 entropy=17.5943 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 51060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560808.9 mean_steps=13.4
|
|
[Episode 51070] reward=-121586945.5 actor_loss=0.3302 critic_loss=142855537071.1579 entropy=17.5807 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 51080] reward=-121952024.1 actor_loss=0.2393 critic_loss=143716711719.8222 entropy=17.5814 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 51080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-523460.9 mean_steps=14.9
|
|
[Episode 51090] reward=-124042082.3 actor_loss=0.2324 critic_loss=160656018773.3333 entropy=17.5810 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 51100] reward=-115683944.3 actor_loss=0.2536 critic_loss=138019765479.2258 entropy=17.5832 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 51100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-595719.0 mean_steps=13.8
|
|
[Episode 51110] reward=-118444407.3 actor_loss=0.3116 critic_loss=153172141056.0000 entropy=17.5913 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 51120] reward=-117029140.4 actor_loss=0.3120 critic_loss=134276866479.1579 entropy=17.5954 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 51120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-538252.6 mean_steps=12.4
|
|
[Episode 51130] reward=-113466818.8 actor_loss=0.3431 critic_loss=133406961827.8400 entropy=17.6052 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 51140] reward=-115939165.0 actor_loss=0.2971 critic_loss=141231036643.5555 entropy=17.6123 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 51140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473258.4 mean_steps=14.9
|
|
[Episode 51150] reward=-117758844.1 actor_loss=0.2928 critic_loss=137714130944.0000 entropy=17.6196 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 51160] reward=-123575235.7 actor_loss=0.2348 critic_loss=153888946663.6190 entropy=17.6138 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 51160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-585069.6 mean_steps=14.4
|
|
[Episode 51170] reward=-115836612.1 actor_loss=0.3249 critic_loss=140167526520.4706 entropy=17.6142 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 51180] reward=-113366700.8 actor_loss=0.3484 critic_loss=131638864554.6667 entropy=17.6183 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 51180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-568926.2 mean_steps=14.1
|
|
[Episode 51190] reward=-117473229.8 actor_loss=0.2770 critic_loss=135405882208.7111 entropy=17.6123 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 51200] reward=-115539735.4 actor_loss=0.2132 critic_loss=148222207422.3590 entropy=17.5990 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 51200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612459.5 mean_steps=11.8
|
|
[Episode 51210] reward=-113659240.7 actor_loss=0.3103 critic_loss=138846271260.4445 entropy=17.5844 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 51220] reward=-116512203.4 actor_loss=0.2500 critic_loss=142671662102.7556 entropy=17.5608 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 51220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528522.2 mean_steps=14.1
|
|
[Episode 51230] reward=-119251099.1 actor_loss=0.2159 critic_loss=147588304005.5652 entropy=17.5584 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 51240] reward=-116939140.8 actor_loss=0.3361 critic_loss=143354528654.2222 entropy=17.5639 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 51240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462749.2 mean_steps=15.8
|
|
[Episode 51250] reward=-125232566.6 actor_loss=0.2387 critic_loss=155230838039.2727 entropy=17.5633 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 51260] reward=-120906344.8 actor_loss=0.2864 critic_loss=144749770279.3846 entropy=17.5567 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 51260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367583.7 mean_steps=16.0
|
|
[Episode 51270] reward=-115807815.3 actor_loss=0.2427 critic_loss=134320956652.3077 entropy=17.5502 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 51280] reward=-122240975.1 actor_loss=0.2226 critic_loss=146824009482.2400 entropy=17.5410 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 51280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539202.1 mean_steps=14.1
|
|
[Episode 51290] reward=-122374402.7 actor_loss=0.2569 critic_loss=154804125696.0000 entropy=17.5479 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 51300] reward=-122072516.4 actor_loss=0.2333 critic_loss=156959629942.1538 entropy=17.5429 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 51300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483264.4 mean_steps=15.0
|
|
[Episode 51310] reward=-113521730.0 actor_loss=0.3885 critic_loss=133123395584.0000 entropy=17.5513 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 51320] reward=-117222770.6 actor_loss=0.3029 critic_loss=145869375577.0435 entropy=17.5534 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 51320] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-746693.5 mean_steps=10.8
|
|
[Episode 51330] reward=-118350836.2 actor_loss=0.2304 critic_loss=139256946688.0000 entropy=17.5520 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 51340] reward=-120165779.5 actor_loss=0.2657 critic_loss=143001771121.7778 entropy=17.5435 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 51340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565269.7 mean_steps=13.0
|
|
[Episode 51350] reward=-121425762.2 actor_loss=0.2189 critic_loss=152512445899.0345 entropy=17.5372 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 51360] reward=-118208441.5 actor_loss=0.2971 critic_loss=151568313782.8571 entropy=17.5504 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 51360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656750.6 mean_steps=11.9
|
|
[Episode 51370] reward=-122135267.9 actor_loss=0.3128 critic_loss=143519101513.1429 entropy=17.5500 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 51380] reward=-118201504.6 actor_loss=0.3154 critic_loss=139978672600.6154 entropy=17.5668 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 51380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-673907.1 mean_steps=13.0
|
|
[Episode 51390] reward=-121596541.3 actor_loss=0.2706 critic_loss=144316592500.3636 entropy=17.5569 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 51400] reward=-117904713.3 actor_loss=0.3024 critic_loss=144539926983.1111 entropy=17.5569 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 51400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459467.8 mean_steps=14.4
|
|
[Episode 51410] reward=-119507449.2 actor_loss=0.2773 critic_loss=138365902103.2727 entropy=17.5554 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 51420] reward=-118353175.7 actor_loss=0.3610 critic_loss=136906124219.7333 entropy=17.5501 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 51420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-633748.8 mean_steps=12.2
|
|
[Episode 51430] reward=-117017708.1 actor_loss=0.2592 critic_loss=135622213391.0588 entropy=17.5662 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 51440] reward=-114117186.8 actor_loss=0.2658 critic_loss=129956087125.3333 entropy=17.5614 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 51440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485681.5 mean_steps=13.6
|
|
[Episode 51450] reward=-121614555.7 actor_loss=0.2864 critic_loss=146946767406.5454 entropy=17.5706 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 51460] reward=-120127211.9 actor_loss=0.3164 critic_loss=145675572662.8571 entropy=17.5660 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 51460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531712.6 mean_steps=13.3
|
|
[Episode 51470] reward=-114730050.3 actor_loss=0.2624 critic_loss=136013010261.3333 entropy=17.5667 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 51480] reward=-117240131.1 actor_loss=0.2696 critic_loss=134235344622.9333 entropy=17.5800 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 51480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444439.2 mean_steps=15.7
|
|
[Episode 51490] reward=-123019905.1 actor_loss=0.1207 critic_loss=145353002097.7778 entropy=17.5666 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 51500] reward=-120585470.3 actor_loss=0.2979 critic_loss=139938952338.2857 entropy=17.5923 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 51500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-370107.5 mean_steps=15.6
|
|
[Episode 51510] reward=-117788799.6 actor_loss=0.3637 critic_loss=140629486955.3548 entropy=17.5853 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 51520] reward=-114261480.7 actor_loss=0.2790 critic_loss=130350980407.6522 entropy=17.5799 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 51520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569989.5 mean_steps=12.2
|
|
[Episode 51530] reward=-120373744.1 actor_loss=0.3112 critic_loss=139961681596.6316 entropy=17.5786 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 51540] reward=-122328341.8 actor_loss=0.2288 critic_loss=140814622720.0000 entropy=17.5576 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 51540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442444.7 mean_steps=14.7
|
|
[Episode 51550] reward=-114734961.4 actor_loss=0.3986 critic_loss=143166496768.0000 entropy=17.5605 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 51560] reward=-121775151.9 actor_loss=0.1893 critic_loss=140599591594.6667 entropy=17.5536 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 51560] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-283207.8 mean_steps=16.8
|
|
[Episode 51570] reward=-120340578.5 actor_loss=0.2413 critic_loss=161677932953.6000 entropy=17.5479 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 51580] reward=-120180497.6 actor_loss=0.2869 critic_loss=140552463018.6667 entropy=17.5552 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 51580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429345.6 mean_steps=14.7
|
|
[Episode 51590] reward=-116045783.2 actor_loss=0.2690 critic_loss=134559549253.8182 entropy=17.5682 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 51600] reward=-110303772.6 actor_loss=0.3125 critic_loss=134765616670.1176 entropy=17.5789 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 51600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515506.5 mean_steps=13.6
|
|
[Episode 51610] reward=-119413473.2 actor_loss=0.2616 critic_loss=140019448295.6190 entropy=17.5786 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 51620] reward=-116838735.4 actor_loss=0.3610 critic_loss=141747907840.0000 entropy=17.5782 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 51620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429705.1 mean_steps=15.1
|
|
[Episode 51630] reward=-122748571.6 actor_loss=0.3032 critic_loss=142219749376.0000 entropy=17.5772 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 51640] reward=-123352397.1 actor_loss=0.2932 critic_loss=149387038479.0588 entropy=17.5729 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 51640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540028.6 mean_steps=13.8
|
|
[Episode 51650] reward=-118567429.6 actor_loss=0.3588 critic_loss=141779257088.0000 entropy=17.5822 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 51660] reward=-116473884.7 actor_loss=0.2831 critic_loss=134894680576.0000 entropy=17.5860 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 51660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583231.2 mean_steps=13.1
|
|
[Episode 51670] reward=-117438771.7 actor_loss=0.3076 critic_loss=130846606429.0909 entropy=17.5812 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 51680] reward=-114312716.8 actor_loss=0.2602 critic_loss=134675682099.2000 entropy=17.6025 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 51680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502453.1 mean_steps=15.1
|
|
[Episode 51690] reward=-118855395.6 actor_loss=0.3876 critic_loss=135542347093.3333 entropy=17.5983 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 51700] reward=-115612007.9 actor_loss=0.2725 critic_loss=134130940313.6000 entropy=17.6012 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 51700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534888.5 mean_steps=13.8
|
|
[Episode 51710] reward=-113201313.3 actor_loss=0.3096 critic_loss=129480346925.1765 entropy=17.5952 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 51720] reward=-114430781.7 actor_loss=0.2345 critic_loss=132036848739.0968 entropy=17.5909 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 51720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498132.4 mean_steps=14.8
|
|
[Episode 51730] reward=-111169382.7 actor_loss=0.3203 critic_loss=131338548077.7143 entropy=17.5843 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 51740] reward=-123973270.2 actor_loss=0.2504 critic_loss=151488339577.9048 entropy=17.5856 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 51740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-387850.1 mean_steps=15.6
|
|
[Episode 51750] reward=-114560990.2 actor_loss=0.3015 critic_loss=137540702966.5185 entropy=17.5930 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 51760] reward=-116355692.4 actor_loss=0.1599 critic_loss=141752601320.7273 entropy=17.5853 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 51760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-474334.6 mean_steps=15.2
|
|
[Episode 51770] reward=-114786494.9 actor_loss=0.3530 critic_loss=135914407582.8965 entropy=17.5760 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 51780] reward=-115463730.0 actor_loss=0.2706 critic_loss=135627945691.4286 entropy=17.6062 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 51780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572611.5 mean_steps=12.8
|
|
[Episode 51790] reward=-118052221.8 actor_loss=0.3482 critic_loss=137618292736.0000 entropy=17.6012 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 51800] reward=-123648793.4 actor_loss=0.2890 critic_loss=155943081301.3333 entropy=17.5896 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 51800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-568857.6 mean_steps=11.3
|
|
[Episode 51810] reward=-120408461.8 actor_loss=0.2999 critic_loss=142540647082.6667 entropy=17.5988 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 51820] reward=-120514016.6 actor_loss=0.3291 critic_loss=148833154389.3333 entropy=17.6050 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 51820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502301.3 mean_steps=14.3
|
|
[Episode 51830] reward=-117778417.6 actor_loss=0.2893 critic_loss=140763573248.0000 entropy=17.6025 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 51840] reward=-126800165.5 actor_loss=0.1932 critic_loss=182224042046.0606 entropy=17.6016 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 51840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463902.7 mean_steps=14.4
|
|
[Episode 51850] reward=-116521088.5 actor_loss=0.2790 critic_loss=134991841012.8696 entropy=17.5993 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 51860] reward=-116750624.8 actor_loss=0.3396 critic_loss=134295141888.0000 entropy=17.5908 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 51860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-480183.5 mean_steps=15.1
|
|
[Episode 51870] reward=-119554356.2 actor_loss=0.2876 critic_loss=139349527210.6667 entropy=17.5810 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 51880] reward=-114362166.6 actor_loss=0.3743 critic_loss=135987062411.6364 entropy=17.5816 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 51880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-601690.1 mean_steps=13.8
|
|
[Episode 51890] reward=-117028936.7 actor_loss=0.3164 critic_loss=140521150464.0000 entropy=17.5726 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 51900] reward=-115836936.9 actor_loss=0.3023 critic_loss=131191983405.1765 entropy=17.5646 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 51900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455494.5 mean_steps=14.2
|
|
[Episode 51910] reward=-117752032.6 actor_loss=0.2732 critic_loss=138984932752.6956 entropy=17.5699 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 51920] reward=-116964735.9 actor_loss=0.3474 critic_loss=140992472557.0370 entropy=17.5710 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 51920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471254.0 mean_steps=14.8
|
|
[Episode 51930] reward=-116101574.0 actor_loss=0.2997 critic_loss=134056375543.1724 entropy=17.5766 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 51940] reward=-116361767.2 actor_loss=0.3388 critic_loss=135789581548.3077 entropy=17.5865 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 51940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575468.1 mean_steps=13.2
|
|
[Episode 51950] reward=-114909179.4 actor_loss=0.3411 critic_loss=139409307940.5714 entropy=17.6006 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 51960] reward=-116016113.9 actor_loss=0.2102 critic_loss=136876872681.2444 entropy=17.6002 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 51960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416077.6 mean_steps=15.2
|
|
[Episode 51970] reward=-118456407.6 actor_loss=0.3103 critic_loss=137869980876.8000 entropy=17.6103 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 51980] reward=-119481477.9 actor_loss=0.2611 critic_loss=137089535277.1765 entropy=17.6108 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 51980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632269.0 mean_steps=12.1
|
|
[Episode 51990] reward=-117414322.1 actor_loss=0.3155 critic_loss=137220900930.0645 entropy=17.6132 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 52000] reward=-112980240.4 actor_loss=0.3652 critic_loss=129200692875.6364 entropy=17.6249 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 52000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623633.2 mean_steps=11.8
|
|
[Episode 52010] reward=-120367633.6 actor_loss=0.3241 critic_loss=143335360512.0000 entropy=17.6057 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 52020] reward=-120272058.0 actor_loss=0.3416 critic_loss=146020057088.0000 entropy=17.6181 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 52020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389264.6 mean_steps=15.8
|
|
[Episode 52030] reward=-118321489.1 actor_loss=0.2542 critic_loss=136602902528.0000 entropy=17.6197 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 52040] reward=-118421280.9 actor_loss=0.3965 critic_loss=138782235247.3044 entropy=17.6055 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 52040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456162.1 mean_steps=13.8
|
|
[Episode 52050] reward=-119432860.4 actor_loss=0.3674 critic_loss=145613026099.2000 entropy=17.6105 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 52060] reward=-109609238.3 actor_loss=0.3014 critic_loss=128962734398.5778 entropy=17.6235 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 52060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571718.4 mean_steps=13.4
|
|
[Episode 52070] reward=-116846762.9 actor_loss=0.2098 critic_loss=141196004104.8276 entropy=17.6141 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 52080] reward=-108692133.1 actor_loss=0.4520 critic_loss=129518668276.6222 entropy=17.6252 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 52080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471242.6 mean_steps=14.1
|
|
[Episode 52090] reward=-117188177.2 actor_loss=0.2552 critic_loss=132932759096.8889 entropy=17.6245 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 52100] reward=-116987151.5 actor_loss=0.2355 critic_loss=132969124486.7368 entropy=17.6327 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 52100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442574.9 mean_steps=14.4
|
|
[Episode 52110] reward=-114435844.4 actor_loss=0.2636 critic_loss=135781704893.6296 entropy=17.6189 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 52120] reward=-121161405.0 actor_loss=0.2991 critic_loss=140295888896.0000 entropy=17.6161 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 52120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485397.8 mean_steps=14.6
|
|
[Episode 52130] reward=-110768241.7 actor_loss=0.2550 critic_loss=133809386057.1429 entropy=17.6086 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 52140] reward=-116194389.5 actor_loss=0.3844 critic_loss=136326562702.2222 entropy=17.6022 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 52140] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-738850.8 mean_steps=10.9
|
|
[Episode 52150] reward=-117748912.2 actor_loss=0.3431 critic_loss=138618670213.5652 entropy=17.6028 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 52160] reward=-115163496.1 actor_loss=0.2144 critic_loss=141121696426.6667 entropy=17.6098 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 52160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481621.8 mean_steps=13.6
|
|
[Episode 52170] reward=-118175665.7 actor_loss=0.3111 critic_loss=146036490581.3333 entropy=17.6237 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 52180] reward=-120622991.2 actor_loss=0.2957 critic_loss=139214986353.7778 entropy=17.6245 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 52180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-602485.8 mean_steps=13.7
|
|
[Episode 52190] reward=-121432829.1 actor_loss=0.2855 critic_loss=146947130919.3846 entropy=17.6432 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 52200] reward=-123293288.4 actor_loss=0.2256 critic_loss=144524478691.5555 entropy=17.6377 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 52200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415050.3 mean_steps=16.2
|
|
[Episode 52210] reward=-119398110.1 actor_loss=0.3893 critic_loss=145254124058.9474 entropy=17.6299 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 52220] reward=-115337571.0 actor_loss=0.2258 critic_loss=137709637416.4211 entropy=17.6379 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 52220] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-661668.4 mean_steps=10.3
|
|
[Episode 52230] reward=-119114709.9 actor_loss=0.2984 critic_loss=426908081872.5926 entropy=17.6403 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 52240] reward=-114780240.9 actor_loss=0.2865 critic_loss=134454527772.4444 entropy=17.6269 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 52240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-368923.3 mean_steps=16.9
|
|
[Episode 52250] reward=-112023864.0 actor_loss=0.3557 critic_loss=133680321142.1538 entropy=17.6393 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 52260] reward=-116021024.5 actor_loss=0.3669 critic_loss=135600031402.6667 entropy=17.6365 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 52260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455744.0 mean_steps=15.4
|
|
[Episode 52270] reward=-119999511.8 actor_loss=0.2428 critic_loss=138345820918.5185 entropy=17.6425 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 52280] reward=-113445019.8 actor_loss=0.3178 critic_loss=131404446981.9535 entropy=17.6344 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 52280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-620937.6 mean_steps=13.9
|
|
[Episode 52290] reward=-110977569.4 actor_loss=0.3016 critic_loss=126014983093.0732 entropy=17.6371 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 52300] reward=-119438096.0 actor_loss=0.2212 critic_loss=137425653467.4286 entropy=17.6333 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 52300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499003.5 mean_steps=14.8
|
|
[Episode 52310] reward=-117378910.5 actor_loss=0.2965 critic_loss=136875909120.0000 entropy=17.6271 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 52320] reward=-112226769.3 actor_loss=0.3507 critic_loss=135179629363.2000 entropy=17.6298 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 52320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-489653.4 mean_steps=13.1
|
|
[Episode 52330] reward=-120967976.9 actor_loss=0.2840 critic_loss=141961780385.6842 entropy=17.6227 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 52340] reward=-118351794.4 actor_loss=0.2290 critic_loss=134426946953.8462 entropy=17.6287 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 52340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566957.2 mean_steps=12.3
|
|
[Episode 52350] reward=-116285910.9 actor_loss=0.2502 critic_loss=136061808399.0588 entropy=17.6386 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 52360] reward=-121264074.1 actor_loss=0.2887 critic_loss=141496797866.6667 entropy=17.6433 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 52360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438455.0 mean_steps=15.2
|
|
[Episode 52370] reward=-120032304.1 actor_loss=0.3349 critic_loss=142350366037.3333 entropy=17.6524 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 52380] reward=-120649129.8 actor_loss=0.2813 critic_loss=143700174802.4889 entropy=17.6603 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 52380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493964.0 mean_steps=14.4
|
|
[Episode 52390] reward=-120533262.6 actor_loss=0.2647 critic_loss=142519527742.5778 entropy=17.6631 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 52400] reward=-126798086.9 actor_loss=0.2756 critic_loss=153910926189.7143 entropy=17.6701 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 52400] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-327099.6 mean_steps=17.2
|
|
[Episode 52410] reward=-113599115.3 actor_loss=0.2698 critic_loss=132196288144.4103 entropy=17.6645 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 52420] reward=-117113885.8 actor_loss=0.2436 critic_loss=142919193395.2000 entropy=17.6639 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 52420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451298.4 mean_steps=15.2
|
|
[Episode 52430] reward=-114476561.3 actor_loss=0.2066 critic_loss=131622420662.0444 entropy=17.6531 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 52440] reward=-111855097.5 actor_loss=0.3602 critic_loss=129821157376.0000 entropy=17.6394 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 52440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578513.9 mean_steps=12.6
|
|
[Episode 52450] reward=-117994252.8 actor_loss=0.3005 critic_loss=134621557828.2667 entropy=17.6434 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 52460] reward=-121435536.6 actor_loss=0.2573 critic_loss=143456200068.4138 entropy=17.6413 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 52460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509464.7 mean_steps=13.9
|
|
[Episode 52470] reward=-120005238.1 actor_loss=0.3506 critic_loss=141506098705.6552 entropy=17.6415 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 52480] reward=-112687431.1 actor_loss=0.4535 critic_loss=134820890880.0000 entropy=17.6419 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 52480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451070.2 mean_steps=14.2
|
|
[Episode 52490] reward=-118663698.8 actor_loss=0.3240 critic_loss=138312148157.6296 entropy=17.6300 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 52500] reward=-120470890.4 actor_loss=0.2689 critic_loss=138557260488.3478 entropy=17.6293 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 52500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-406814.9 mean_steps=16.2
|
|
[Episode 52510] reward=-120279288.9 actor_loss=0.3235 critic_loss=144023455334.4000 entropy=17.6342 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 52520] reward=-115981028.8 actor_loss=0.2728 critic_loss=135312582246.4000 entropy=17.6345 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 52520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-489976.1 mean_steps=15.5
|
|
[Episode 52530] reward=-120151916.7 actor_loss=0.3027 critic_loss=141189559182.2222 entropy=17.6106 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 52540] reward=-120236541.0 actor_loss=0.2337 critic_loss=140635844608.0000 entropy=17.6135 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 52540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392231.5 mean_steps=16.2
|
|
[Episode 52550] reward=-122011510.3 actor_loss=0.3537 critic_loss=144286182435.3103 entropy=17.6022 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 52560] reward=-120630482.4 actor_loss=0.2607 critic_loss=141926021839.5676 entropy=17.6037 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 52560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578812.7 mean_steps=12.4
|
|
[Episode 52570] reward=-121336058.1 actor_loss=0.2055 critic_loss=141548240896.0000 entropy=17.6123 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 52580] reward=-120372472.2 actor_loss=0.2741 critic_loss=142646950155.1304 entropy=17.6164 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 52580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525547.9 mean_steps=13.2
|
|
[Episode 52590] reward=-122277137.4 actor_loss=0.2816 critic_loss=139790385152.0000 entropy=17.6285 approx_kl=0.0111 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 52600] reward=-116768449.2 actor_loss=0.2744 critic_loss=137445601006.9333 entropy=17.6273 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 52600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-638776.6 mean_steps=12.1
|
|
[Episode 52610] reward=-116617135.1 actor_loss=0.3349 critic_loss=137532919971.8400 entropy=17.6289 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 52620] reward=-124024608.0 actor_loss=0.1796 critic_loss=146264519168.0000 entropy=17.6269 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 52620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530188.4 mean_steps=13.6
|
|
[Episode 52630] reward=-112853310.4 actor_loss=0.2998 critic_loss=134398814050.4615 entropy=17.6408 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 52640] reward=-121046694.9 actor_loss=0.2690 critic_loss=140996490854.4000 entropy=17.6392 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 52640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428126.5 mean_steps=15.4
|
|
[Episode 52650] reward=-114405904.9 actor_loss=0.2845 critic_loss=135251084824.3810 entropy=17.6374 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 52660] reward=-121845896.1 actor_loss=0.2542 critic_loss=143708257591.6522 entropy=17.6374 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 52660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580885.6 mean_steps=12.4
|
|
[Episode 52670] reward=-116166033.4 actor_loss=0.3445 critic_loss=136554279731.2000 entropy=17.6291 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 52680] reward=-112917334.1 actor_loss=0.3437 critic_loss=140216845777.4546 entropy=17.6226 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 52680] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-349376.3 mean_steps=17.0
|
|
[Episode 52690] reward=-120885585.5 actor_loss=0.1611 critic_loss=141534232576.0000 entropy=17.6287 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 52700] reward=-114520967.2 actor_loss=0.3505 critic_loss=135859121720.8889 entropy=17.6257 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 52700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449206.7 mean_steps=14.2
|
|
[Episode 52710] reward=-109893205.5 actor_loss=0.3248 critic_loss=130835602145.2800 entropy=17.6214 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 52720] reward=-118220744.7 actor_loss=0.1473 critic_loss=139891860868.4138 entropy=17.6268 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 52720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-653315.2 mean_steps=12.2
|
|
[Episode 52730] reward=-111069372.2 actor_loss=0.3290 critic_loss=128035854456.4706 entropy=17.6219 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 52740] reward=-118567585.8 actor_loss=0.2267 critic_loss=146401271125.3333 entropy=17.6355 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 52740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493522.5 mean_steps=13.4
|
|
[Episode 52750] reward=-120327035.8 actor_loss=0.2895 critic_loss=133087113394.0870 entropy=17.6230 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 52760] reward=-118483182.8 actor_loss=0.3493 critic_loss=138058149983.2558 entropy=17.6435 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 52760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604325.2 mean_steps=12.7
|
|
[Episode 52770] reward=-118429121.0 actor_loss=0.3175 critic_loss=149102000481.1035 entropy=17.6493 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 52780] reward=-109323491.8 actor_loss=0.3744 critic_loss=130351549644.8000 entropy=17.6486 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 52780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578742.0 mean_steps=12.2
|
|
[Episode 52790] reward=-119877477.2 actor_loss=0.2388 critic_loss=140890018247.1111 entropy=17.6549 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 52800] reward=-122465512.2 actor_loss=0.2603 critic_loss=143027242052.2667 entropy=17.6601 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 52800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-508488.0 mean_steps=15.7
|
|
[Episode 52810] reward=-121609604.5 actor_loss=0.2995 critic_loss=145374614449.2308 entropy=17.6629 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 52820] reward=-121405582.5 actor_loss=0.2719 critic_loss=143006449664.0000 entropy=17.6657 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 52820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-568469.6 mean_steps=12.8
|
|
[Episode 52830] reward=-118767285.6 actor_loss=0.2894 critic_loss=141356344661.3333 entropy=17.6748 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 52840] reward=-117366947.2 actor_loss=0.3435 critic_loss=133118653235.2000 entropy=17.6673 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 52840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595118.6 mean_steps=13.0
|
|
[Episode 52850] reward=-111359338.9 actor_loss=0.3016 critic_loss=128396416341.3333 entropy=17.6637 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 52860] reward=-121796159.1 actor_loss=0.2632 critic_loss=145391578515.3940 entropy=17.6658 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 52860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-618849.7 mean_steps=12.0
|
|
[Episode 52870] reward=-120129541.5 actor_loss=0.2914 critic_loss=137579154944.0000 entropy=17.6809 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 52880] reward=-116352458.6 actor_loss=0.2715 critic_loss=143344242779.0222 entropy=17.6748 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 52880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591305.7 mean_steps=13.3
|
|
[Episode 52890] reward=-117451214.0 actor_loss=0.3207 critic_loss=137067761664.0000 entropy=17.6830 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 52900] reward=-113528166.8 actor_loss=0.2460 critic_loss=131304805171.2000 entropy=17.6652 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 52900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-664517.5 mean_steps=12.0
|
|
[Episode 52910] reward=-115842660.9 actor_loss=0.2172 critic_loss=138464750909.7931 entropy=17.6718 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 52920] reward=-115746870.5 actor_loss=0.3135 critic_loss=131176997410.1333 entropy=17.6694 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 52920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-337765.1 mean_steps=15.6
|
|
[Episode 52930] reward=-115486530.3 actor_loss=0.2236 critic_loss=137269425493.3333 entropy=17.6629 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 52940] reward=-116754230.9 actor_loss=0.2916 critic_loss=139665765717.3333 entropy=17.6439 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 52940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517472.0 mean_steps=12.8
|
|
[Episode 52950] reward=-113394791.8 actor_loss=0.3483 critic_loss=128782468710.4000 entropy=17.6408 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 52960] reward=-123954234.9 actor_loss=0.3917 critic_loss=145783276544.0000 entropy=17.6251 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 52960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423477.3 mean_steps=16.2
|
|
[Episode 52970] reward=-112088656.1 actor_loss=0.2472 critic_loss=129473549994.6667 entropy=17.6362 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 52980] reward=-119238331.6 actor_loss=0.3644 critic_loss=134143782626.2326 entropy=17.6381 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 52980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463041.1 mean_steps=15.2
|
|
[Episode 52990] reward=-117771276.3 actor_loss=0.3098 critic_loss=147208301954.8445 entropy=17.6382 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 53000] reward=-117696887.0 actor_loss=0.2766 critic_loss=137546738346.6667 entropy=17.6247 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 53000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559499.8 mean_steps=12.2
|
|
[Episode 53010] reward=-115822980.8 actor_loss=0.3155 critic_loss=133216357841.4545 entropy=17.6270 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 53020] reward=-123061423.4 actor_loss=0.2405 critic_loss=141125262118.7879 entropy=17.6246 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 53020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520294.6 mean_steps=14.2
|
|
[Episode 53030] reward=-118050276.0 actor_loss=0.1665 critic_loss=142624632740.9778 entropy=17.6266 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 53040] reward=-114308098.0 actor_loss=0.2092 critic_loss=131189148206.5455 entropy=17.6242 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 53040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479560.5 mean_steps=14.4
|
|
[Episode 53050] reward=-113847643.5 actor_loss=0.2859 critic_loss=139212219824.3556 entropy=17.6276 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 53060] reward=-118067594.4 actor_loss=0.2599 critic_loss=136217629354.6667 entropy=17.6169 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 53060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-566767.5 mean_steps=14.4
|
|
[Episode 53070] reward=-113767746.0 actor_loss=0.3708 critic_loss=136488206336.0000 entropy=17.6120 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 53080] reward=-116770224.5 actor_loss=0.2396 critic_loss=129969841865.6970 entropy=17.6142 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 53080] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-308810.9 mean_steps=17.9
|
|
[Episode 53090] reward=-115487989.6 actor_loss=0.2365 critic_loss=140811555538.8235 entropy=17.6312 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 53100] reward=-113454723.2 actor_loss=0.3423 critic_loss=138184826880.0000 entropy=17.6306 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 53100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545412.6 mean_steps=12.3
|
|
[Episode 53110] reward=-115445249.8 actor_loss=0.2234 critic_loss=136159686356.2927 entropy=17.6186 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 53120] reward=-119855935.6 actor_loss=0.2445 critic_loss=138791274023.3846 entropy=17.6147 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 53120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419484.1 mean_steps=15.1
|
|
[Episode 53130] reward=-113014876.1 actor_loss=0.3043 critic_loss=133427322880.0000 entropy=17.6005 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 53140] reward=-114932558.2 actor_loss=0.2989 critic_loss=136749187816.7273 entropy=17.5950 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 53140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-632726.0 mean_steps=12.8
|
|
[Episode 53150] reward=-116256850.0 actor_loss=0.3401 critic_loss=129749110988.8000 entropy=17.5987 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 53160] reward=-113807337.0 actor_loss=0.3134 critic_loss=143554857984.0000 entropy=17.5999 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 53160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432179.2 mean_steps=14.6
|
|
[Episode 53170] reward=-120569223.7 actor_loss=0.3853 critic_loss=141480558033.4546 entropy=17.5940 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 53180] reward=-126424864.5 actor_loss=0.2861 critic_loss=150223543149.7143 entropy=17.5986 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 53180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567548.2 mean_steps=12.3
|
|
[Episode 53190] reward=-126098851.9 actor_loss=0.2798 critic_loss=146812889195.7895 entropy=17.6053 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 53200] reward=-120659769.6 actor_loss=0.3481 critic_loss=147657497453.7143 entropy=17.6009 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 53200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458895.7 mean_steps=15.4
|
|
[Episode 53210] reward=-123518741.8 actor_loss=0.2480 critic_loss=146881261568.0000 entropy=17.6086 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 53220] reward=-110158396.3 actor_loss=0.3082 critic_loss=125714496275.6923 entropy=17.5985 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 53220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396383.1 mean_steps=16.1
|
|
[Episode 53230] reward=-116875086.7 actor_loss=0.3627 critic_loss=134180523940.9778 entropy=17.5919 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 53240] reward=-116172710.4 actor_loss=0.3994 critic_loss=136680167014.4000 entropy=17.6098 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 53240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427916.7 mean_steps=14.4
|
|
[Episode 53250] reward=-116304706.8 actor_loss=0.3495 critic_loss=132165699216.4103 entropy=17.6109 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 53260] reward=-109382635.5 actor_loss=0.3547 critic_loss=128376568399.6444 entropy=17.6022 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 53260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478249.1 mean_steps=13.8
|
|
[Episode 53270] reward=-116056339.8 actor_loss=0.2294 critic_loss=139439884580.5714 entropy=17.6073 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 53280] reward=-116710921.0 actor_loss=0.2704 critic_loss=139490595011.0476 entropy=17.6094 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 53280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-638219.1 mean_steps=12.8
|
|
[Episode 53290] reward=-117914682.4 actor_loss=0.1698 critic_loss=134326158131.2000 entropy=17.6105 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 53300] reward=-115951530.2 actor_loss=0.3116 critic_loss=137528397965.2414 entropy=17.6005 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 53300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-603662.6 mean_steps=14.6
|
|
[Episode 53310] reward=-117991364.8 actor_loss=0.2533 critic_loss=131885130118.0952 entropy=17.6014 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 53320] reward=-122199876.6 actor_loss=0.2480 critic_loss=140431776881.7778 entropy=17.5980 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 53320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595239.4 mean_steps=12.3
|
|
[Episode 53330] reward=-119652115.8 actor_loss=0.2819 critic_loss=137557308620.8000 entropy=17.6001 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 53340] reward=-118482332.3 actor_loss=0.2344 critic_loss=140454828441.6000 entropy=17.6103 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 53340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502508.9 mean_steps=14.1
|
|
[Episode 53350] reward=-120811866.0 actor_loss=0.2913 critic_loss=143421817378.1333 entropy=17.6248 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 53360] reward=-113577620.7 actor_loss=0.3563 critic_loss=132869462528.0000 entropy=17.6322 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 53360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460776.9 mean_steps=14.6
|
|
[Episode 53370] reward=-124103171.2 actor_loss=0.2392 critic_loss=148267322112.0000 entropy=17.6458 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 53380] reward=-113865975.8 actor_loss=0.2741 critic_loss=136940098901.3333 entropy=17.6503 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 53380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-650703.4 mean_steps=13.1
|
|
[Episode 53390] reward=-124731261.5 actor_loss=0.3545 critic_loss=339487105024.0000 entropy=17.6459 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 53400] reward=-121520918.8 actor_loss=0.3283 critic_loss=141206247936.0000 entropy=17.6437 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 53400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474688.4 mean_steps=14.6
|
|
[Episode 53410] reward=-119932298.8 actor_loss=0.3107 critic_loss=142051801770.6667 entropy=17.6389 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 53420] reward=-113489472.2 actor_loss=0.3582 critic_loss=131441398285.1282 entropy=17.6491 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 53420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498284.5 mean_steps=13.8
|
|
[Episode 53430] reward=-118082492.1 actor_loss=0.2619 critic_loss=137869465005.4193 entropy=17.6443 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 53440] reward=-110706438.9 actor_loss=0.4475 critic_loss=144260846738.2857 entropy=17.6505 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 53440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489518.3 mean_steps=13.8
|
|
[Episode 53450] reward=-117295338.9 actor_loss=0.3152 critic_loss=136787308009.7391 entropy=17.6508 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 53460] reward=-115166611.1 actor_loss=0.3390 critic_loss=136675424768.0000 entropy=17.6444 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 53460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492865.9 mean_steps=14.0
|
|
[Episode 53470] reward=-119434393.4 actor_loss=0.2044 critic_loss=136268559042.2069 entropy=17.6362 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 53480] reward=-114694224.8 actor_loss=0.2885 critic_loss=134137621942.8571 entropy=17.6261 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 53480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-342885.1 mean_steps=16.5
|
|
[Episode 53490] reward=-117449710.1 actor_loss=0.2984 critic_loss=135085880567.1724 entropy=17.6161 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 53500] reward=-116112630.3 actor_loss=0.3283 critic_loss=134889075598.2222 entropy=17.6039 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 53500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524466.8 mean_steps=13.3
|
|
[Episode 53510] reward=-114906939.4 actor_loss=0.2736 critic_loss=133333429452.8000 entropy=17.5970 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 53520] reward=-121497764.9 actor_loss=0.2903 critic_loss=136591879281.7778 entropy=17.5928 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 53520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550984.6 mean_steps=14.2
|
|
[Episode 53530] reward=-122019100.5 actor_loss=0.2255 critic_loss=144162867200.0000 entropy=17.5921 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 53540] reward=-116908321.8 actor_loss=0.2700 critic_loss=133753511025.7778 entropy=17.5946 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 53540] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-711486.8 mean_steps=10.7
|
|
[Episode 53550] reward=-112783973.4 actor_loss=0.2873 critic_loss=131184253883.7333 entropy=17.5802 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 53560] reward=-121098395.4 actor_loss=0.3821 critic_loss=143378839051.3778 entropy=17.5737 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 53560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570417.8 mean_steps=13.5
|
|
[Episode 53570] reward=-124617342.5 actor_loss=0.2670 critic_loss=147940472695.4667 entropy=17.5834 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 53580] reward=-127294886.9 actor_loss=0.2431 critic_loss=150267781120.0000 entropy=17.5694 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 53580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420763.5 mean_steps=15.4
|
|
[Episode 53590] reward=-117473095.0 actor_loss=0.4015 critic_loss=136632684654.7027 entropy=17.5828 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 53600] reward=-117623680.5 actor_loss=0.1759 critic_loss=132897419264.0000 entropy=17.5842 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 53600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-387202.2 mean_steps=15.8
|
|
[Episode 53610] reward=-114819864.1 actor_loss=0.2851 critic_loss=134267263151.5429 entropy=17.5745 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 53620] reward=-118735143.0 actor_loss=0.2421 critic_loss=139339680816.7619 entropy=17.5920 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 53620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464754.5 mean_steps=14.5
|
|
[Episode 53630] reward=-112574414.1 actor_loss=0.3700 critic_loss=133745325397.3333 entropy=17.6087 approx_kl=0.0109 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 53640] reward=-118928228.4 actor_loss=0.2667 critic_loss=141338834613.6774 entropy=17.6019 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 53640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577153.8 mean_steps=12.5
|
|
[Episode 53650] reward=-121819160.1 actor_loss=0.3042 critic_loss=140044959197.8667 entropy=17.5994 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 53660] reward=-123676680.9 actor_loss=0.1882 critic_loss=146668323560.7273 entropy=17.6100 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 53660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549420.2 mean_steps=14.3
|
|
[Episode 53670] reward=-121513184.9 actor_loss=0.3263 critic_loss=149237677169.7778 entropy=17.6232 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 53680] reward=-120636654.8 actor_loss=0.3082 critic_loss=141599942745.0435 entropy=17.6272 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 53680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-576379.2 mean_steps=12.7
|
|
[Episode 53690] reward=-116922235.1 actor_loss=0.2492 critic_loss=138958394163.2000 entropy=17.6303 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 53700] reward=-116808297.9 actor_loss=0.2447 critic_loss=134721873799.5294 entropy=17.6233 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 53700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-617052.9 mean_steps=13.4
|
|
[Episode 53710] reward=-115134988.4 actor_loss=0.2747 critic_loss=136698174668.8000 entropy=17.6324 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 53720] reward=-119752861.3 actor_loss=0.2384 critic_loss=137688313379.7209 entropy=17.6367 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 53720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407344.6 mean_steps=15.8
|
|
[Episode 53730] reward=-124160507.6 actor_loss=0.2288 critic_loss=151385163776.0000 entropy=17.6563 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 53740] reward=-112006298.2 actor_loss=0.3006 critic_loss=133754794224.9412 entropy=17.6515 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 53740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452740.3 mean_steps=14.5
|
|
[Episode 53750] reward=-114844498.3 actor_loss=0.3256 critic_loss=138464366182.4000 entropy=17.6516 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 53760] reward=-117951955.0 actor_loss=0.3479 critic_loss=139575262439.2258 entropy=17.6655 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 53760] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-667627.2 mean_steps=10.8
|
|
[Episode 53770] reward=-118414160.7 actor_loss=0.2628 critic_loss=139841456128.0000 entropy=17.6659 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 53780] reward=-119599984.6 actor_loss=0.2100 critic_loss=139570071079.3846 entropy=17.6680 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 53780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-527715.8 mean_steps=16.1
|
|
[Episode 53790] reward=-119335245.3 actor_loss=0.3316 critic_loss=136877471971.5556 entropy=17.6745 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 53800] reward=-121503790.2 actor_loss=0.3183 critic_loss=140932419584.0000 entropy=17.6795 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 53800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444112.4 mean_steps=14.3
|
|
[Episode 53810] reward=-122328374.2 actor_loss=0.3095 critic_loss=141492361728.0000 entropy=17.6782 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 53820] reward=-117705222.7 actor_loss=0.3431 critic_loss=136645707912.5333 entropy=17.6705 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 53820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-352464.2 mean_steps=15.2
|
|
[Episode 53830] reward=-119016329.7 actor_loss=0.3044 critic_loss=135298340636.4444 entropy=17.6648 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 53840] reward=-117756600.0 actor_loss=0.3022 critic_loss=138357832947.8095 entropy=17.6619 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 53840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538836.9 mean_steps=12.9
|
|
[Episode 53850] reward=-117177030.6 actor_loss=0.2386 critic_loss=130191170721.6842 entropy=17.6598 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 53860] reward=-122587243.8 actor_loss=0.3222 critic_loss=145676503722.6667 entropy=17.6541 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 53860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-523921.3 mean_steps=12.3
|
|
[Episode 53870] reward=-113393246.7 actor_loss=0.3251 critic_loss=126564163993.6000 entropy=17.6598 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 53880] reward=-119203186.6 actor_loss=0.2699 critic_loss=143495149860.5714 entropy=17.6544 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 53880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-332845.8 mean_steps=15.6
|
|
[Episode 53890] reward=-119035825.1 actor_loss=0.2896 critic_loss=136467109205.3333 entropy=17.6463 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 53900] reward=-113304931.0 actor_loss=0.4569 critic_loss=135049622596.2667 entropy=17.6425 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 53900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465651.8 mean_steps=13.6
|
|
[Episode 53910] reward=-116417227.0 actor_loss=0.3286 critic_loss=137338417874.8235 entropy=17.6330 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 53920] reward=-119876539.0 actor_loss=0.3192 critic_loss=139135929548.8000 entropy=17.6408 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 53920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-479033.0 mean_steps=13.2
|
|
[Episode 53930] reward=-118692755.9 actor_loss=0.3889 critic_loss=137543124929.9394 entropy=17.6273 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 53940] reward=-118600294.8 actor_loss=0.2805 critic_loss=139742417957.9259 entropy=17.6227 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 53940] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-333818.1 mean_steps=17.3
|
|
[Episode 53950] reward=-120864636.3 actor_loss=0.3197 critic_loss=141769600860.1600 entropy=17.6187 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 53960] reward=-119360530.4 actor_loss=0.3720 critic_loss=142779061301.8947 entropy=17.6075 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 53960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-485284.7 mean_steps=12.7
|
|
[Episode 53970] reward=-118605577.5 actor_loss=0.2784 critic_loss=141132518377.2444 entropy=17.6093 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 53980] reward=-119338561.6 actor_loss=0.3465 critic_loss=138063897486.2222 entropy=17.6027 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 53980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432480.4 mean_steps=15.2
|
|
[Episode 53990] reward=-117448960.9 actor_loss=0.2795 critic_loss=136760576986.0741 entropy=17.6121 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 54000] reward=-116494592.5 actor_loss=0.2651 critic_loss=131698082536.7273 entropy=17.6195 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 54000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-466075.9 mean_steps=16.1
|
|
[Episode 54010] reward=-116818587.1 actor_loss=0.2918 critic_loss=135670872551.6190 entropy=17.6206 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 54020] reward=-119575353.2 actor_loss=0.2896 critic_loss=138707753697.2800 entropy=17.6286 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 54020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573656.3 mean_steps=12.3
|
|
[Episode 54030] reward=-121007536.5 actor_loss=0.2945 critic_loss=144795461632.0000 entropy=17.6204 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 54040] reward=-118048881.5 actor_loss=0.2775 critic_loss=136140376382.5778 entropy=17.6131 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 54040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404896.1 mean_steps=15.7
|
|
[Episode 54050] reward=-120200650.8 actor_loss=0.2529 critic_loss=137717072112.9412 entropy=17.5959 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 54060] reward=-116206679.2 actor_loss=0.3715 critic_loss=133288759536.9412 entropy=17.5981 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 54060] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-225503.4 mean_steps=17.4
|
|
[Episode 54070] reward=-114496893.5 actor_loss=0.3112 critic_loss=133916704768.0000 entropy=17.5939 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 54080] reward=-118862307.3 actor_loss=0.3036 critic_loss=134624115049.4118 entropy=17.5930 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 54080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439914.1 mean_steps=15.2
|
|
[Episode 54090] reward=-117651361.4 actor_loss=0.2815 critic_loss=131805378470.9565 entropy=17.5933 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 54100] reward=-119850296.2 actor_loss=0.2735 critic_loss=141068763400.2581 entropy=17.5915 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 54100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451529.2 mean_steps=14.4
|
|
[Episode 54110] reward=-118996933.4 actor_loss=0.2484 critic_loss=139985793141.0286 entropy=17.5995 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 54120] reward=-115052560.7 actor_loss=0.3299 critic_loss=127772568234.6667 entropy=17.5904 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 54120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548068.4 mean_steps=13.4
|
|
[Episode 54130] reward=-116705302.9 actor_loss=0.3179 critic_loss=166733089060.5714 entropy=17.5759 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 54140] reward=-116887184.1 actor_loss=0.2866 critic_loss=135969321828.8485 entropy=17.5699 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 54140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531176.3 mean_steps=12.9
|
|
[Episode 54150] reward=-116763471.7 actor_loss=0.3102 critic_loss=135356559360.0000 entropy=17.5623 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 54160] reward=-117606050.1 actor_loss=0.2679 critic_loss=137525821124.9231 entropy=17.5680 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 54160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400927.6 mean_steps=14.9
|
|
[Episode 54170] reward=-119674752.5 actor_loss=0.3099 critic_loss=145367262003.2000 entropy=17.5749 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 54180] reward=-120246256.1 actor_loss=0.3633 critic_loss=136308921685.3333 entropy=17.5697 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 54180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537934.3 mean_steps=14.2
|
|
[Episode 54190] reward=-113192496.6 actor_loss=0.3131 critic_loss=131480793810.8235 entropy=17.5818 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 54200] reward=-121320071.3 actor_loss=0.2712 critic_loss=146771864328.8276 entropy=17.5778 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 54200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532906.4 mean_steps=13.1
|
|
[Episode 54210] reward=-118979212.1 actor_loss=0.2592 critic_loss=138776369076.1482 entropy=17.5881 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 54220] reward=-118791325.0 actor_loss=0.3253 critic_loss=140046324675.7647 entropy=17.5914 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 54220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447313.2 mean_steps=15.1
|
|
[Episode 54230] reward=-120327501.6 actor_loss=0.1563 critic_loss=141645765927.8222 entropy=17.5798 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 54240] reward=-115068635.3 actor_loss=0.3776 critic_loss=130481018260.8372 entropy=17.5806 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 54240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456038.6 mean_steps=14.1
|
|
[Episode 54250] reward=-121617345.1 actor_loss=0.1800 critic_loss=142396536452.7408 entropy=17.5779 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 54260] reward=-112332897.8 actor_loss=0.2338 critic_loss=127027240550.4000 entropy=17.5849 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 54260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458324.7 mean_steps=15.5
|
|
[Episode 54270] reward=-116212507.7 actor_loss=0.3456 critic_loss=144751464903.1111 entropy=17.5809 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 54280] reward=-119515815.4 actor_loss=0.2780 critic_loss=140868605765.8182 entropy=17.5787 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 54280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-585257.3 mean_steps=13.6
|
|
[Episode 54290] reward=-114495096.5 actor_loss=0.3051 critic_loss=130787161781.6774 entropy=17.5744 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 54300] reward=-116204562.9 actor_loss=0.3548 critic_loss=135545334561.3913 entropy=17.5766 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 54300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-397516.1 mean_steps=16.0
|
|
[Episode 54310] reward=-115137818.4 actor_loss=0.3261 critic_loss=138139406870.2609 entropy=17.5781 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 54320] reward=-121714950.5 actor_loss=0.3385 critic_loss=140771294720.0000 entropy=17.5792 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 54320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459582.8 mean_steps=14.6
|
|
[Episode 54330] reward=-125237781.6 actor_loss=0.3044 critic_loss=148541077094.4000 entropy=17.5771 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 54340] reward=-119930958.8 actor_loss=0.2177 critic_loss=147029402893.4737 entropy=17.5867 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 54340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518080.8 mean_steps=14.8
|
|
[Episode 54350] reward=-118608735.3 actor_loss=0.3221 critic_loss=138878394709.3333 entropy=17.5885 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 54360] reward=-115712473.9 actor_loss=0.3810 critic_loss=140120863890.2857 entropy=17.5972 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 54360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-389387.9 mean_steps=15.2
|
|
[Episode 54370] reward=-124306324.1 actor_loss=0.4082 critic_loss=153963419175.3846 entropy=17.6012 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 54380] reward=-117892204.0 actor_loss=0.2362 critic_loss=139539013241.9048 entropy=17.6056 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 54380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506094.7 mean_steps=14.1
|
|
[Episode 54390] reward=-120608114.6 actor_loss=0.2190 critic_loss=141707232300.5217 entropy=17.6072 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 54400] reward=-123450406.3 actor_loss=0.3412 critic_loss=145070631813.1200 entropy=17.6148 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 54400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582427.0 mean_steps=12.5
|
|
[Episode 54410] reward=-122496535.3 actor_loss=0.2613 critic_loss=146341399853.1765 entropy=17.6165 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 54420] reward=-118703801.0 actor_loss=0.4275 critic_loss=142140882124.8000 entropy=17.6159 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 54420] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-716455.2 mean_steps=11.5
|
|
[Episode 54430] reward=-116408193.6 actor_loss=0.3015 critic_loss=133652720753.7778 entropy=17.5995 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 54440] reward=-119119384.0 actor_loss=0.3250 critic_loss=138125570503.1111 entropy=17.6028 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 54440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501997.1 mean_steps=13.7
|
|
[Episode 54450] reward=-118067993.9 actor_loss=0.3454 critic_loss=139285304805.0526 entropy=17.6019 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 54460] reward=-114143209.0 actor_loss=0.2903 critic_loss=132275833514.6667 entropy=17.5969 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 54460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544480.6 mean_steps=13.9
|
|
[Episode 54470] reward=-110035322.0 actor_loss=0.4081 critic_loss=122870072537.2121 entropy=17.6102 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 54480] reward=-120573879.2 actor_loss=0.2777 critic_loss=144882597888.0000 entropy=17.6071 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 54480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413186.7 mean_steps=15.8
|
|
[Episode 54490] reward=-114057824.9 actor_loss=0.3189 critic_loss=130476975250.2857 entropy=17.6040 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 54500] reward=-113532089.0 actor_loss=0.2492 critic_loss=128237512021.3333 entropy=17.5954 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 54500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546017.7 mean_steps=13.0
|
|
[Episode 54510] reward=-120231386.1 actor_loss=0.2753 critic_loss=144441583908.5714 entropy=17.5843 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 54520] reward=-123619634.7 actor_loss=0.1306 critic_loss=144490587557.6471 entropy=17.5883 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 54520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459090.5 mean_steps=15.4
|
|
[Episode 54530] reward=-116824590.0 actor_loss=0.2148 critic_loss=135167421878.8571 entropy=17.6044 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 54540] reward=-117994047.9 actor_loss=0.3805 critic_loss=136698920049.7778 entropy=17.5960 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 54540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-403330.8 mean_steps=16.2
|
|
[Episode 54550] reward=-117276187.4 actor_loss=0.3296 critic_loss=139324896741.0526 entropy=17.6064 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 54560] reward=-112361518.7 actor_loss=0.3830 critic_loss=137247981568.0000 entropy=17.6033 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 54560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-636403.6 mean_steps=11.8
|
|
[Episode 54570] reward=-115560781.7 actor_loss=0.3173 critic_loss=138897752328.2581 entropy=17.6009 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 54580] reward=-115806181.1 actor_loss=0.2871 critic_loss=135486871096.8889 entropy=17.5925 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 54580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558840.7 mean_steps=13.2
|
|
[Episode 54590] reward=-122163921.1 actor_loss=0.3895 critic_loss=155143197617.2308 entropy=17.6139 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 54600] reward=-117643855.8 actor_loss=0.2717 critic_loss=139234157090.1333 entropy=17.6016 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 54600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-365368.5 mean_steps=16.6
|
|
[Episode 54610] reward=-114798056.5 actor_loss=0.3349 critic_loss=143380447232.0000 entropy=17.6079 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 54620] reward=-111832024.8 actor_loss=0.3438 critic_loss=144447419970.7826 entropy=17.6178 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 54620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385699.2 mean_steps=15.4
|
|
[Episode 54630] reward=-117394494.0 actor_loss=0.2698 critic_loss=143070479198.3158 entropy=17.6083 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 54640] reward=-110566484.2 actor_loss=0.3204 critic_loss=129245920162.9091 entropy=17.5946 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 54640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-459981.0 mean_steps=13.8
|
|
[Episode 54650] reward=-118249243.3 actor_loss=0.1908 critic_loss=132868842057.1429 entropy=17.5943 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 54660] reward=-118935939.1 actor_loss=0.3662 critic_loss=141298993227.8518 entropy=17.5937 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 54660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-514881.3 mean_steps=13.1
|
|
[Episode 54670] reward=-117197744.2 actor_loss=0.3169 critic_loss=131969824505.4359 entropy=17.5788 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 54680] reward=-120235310.4 actor_loss=0.2141 critic_loss=140383037895.1111 entropy=17.5726 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 54680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479290.7 mean_steps=13.9
|
|
[Episode 54690] reward=-116198526.3 actor_loss=0.3445 critic_loss=128939458311.7576 entropy=17.5638 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 54700] reward=-121455345.6 actor_loss=0.3363 critic_loss=139885547395.8788 entropy=17.5675 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 54700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477983.7 mean_steps=13.8
|
|
[Episode 54710] reward=-116339633.2 actor_loss=0.2917 critic_loss=129501580366.7692 entropy=17.5651 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 54720] reward=-118786663.7 actor_loss=0.2688 critic_loss=138045884006.4000 entropy=17.5616 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 54720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420989.5 mean_steps=15.2
|
|
[Episode 54730] reward=-119693690.4 actor_loss=0.2700 critic_loss=136553182354.2857 entropy=17.5714 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 54740] reward=-121200797.1 actor_loss=0.2863 critic_loss=142709188608.0000 entropy=17.5676 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 54740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-381972.9 mean_steps=14.8
|
|
[Episode 54750] reward=-122487058.1 actor_loss=0.3173 critic_loss=139764458917.6471 entropy=17.5643 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 54760] reward=-119059097.2 actor_loss=0.1707 critic_loss=134250317004.8000 entropy=17.5824 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 54760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-394986.1 mean_steps=16.6
|
|
[Episode 54770] reward=-124677084.1 actor_loss=0.1913 critic_loss=191421929542.6207 entropy=17.6021 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 54780] reward=-118056138.3 actor_loss=0.3151 critic_loss=137047734857.1429 entropy=17.6072 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 54780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575036.0 mean_steps=12.8
|
|
[Episode 54790] reward=-123514513.3 actor_loss=0.2350 critic_loss=151529999360.0000 entropy=17.6109 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 54800] reward=-120532852.4 actor_loss=0.2596 critic_loss=141127596441.6000 entropy=17.6080 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 54800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423142.8 mean_steps=15.9
|
|
[Episode 54810] reward=-116625119.1 actor_loss=0.4054 critic_loss=136693868664.4706 entropy=17.6234 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 54820] reward=-117606145.1 actor_loss=0.2772 critic_loss=129145030842.1818 entropy=17.6278 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 54820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483948.8 mean_steps=14.7
|
|
[Episode 54830] reward=-115115264.5 actor_loss=0.2898 critic_loss=127165327262.4762 entropy=17.6193 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 54840] reward=-123040377.1 actor_loss=0.2857 critic_loss=138482419302.4000 entropy=17.6179 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 54840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-470101.5 mean_steps=15.4
|
|
[Episode 54850] reward=-120454536.5 actor_loss=0.2496 critic_loss=136461624856.3810 entropy=17.6175 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 54860] reward=-116490619.1 actor_loss=0.2907 critic_loss=134056743253.3333 entropy=17.6092 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 54860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-346709.6 mean_steps=15.6
|
|
[Episode 54870] reward=-114912953.2 actor_loss=0.1977 critic_loss=127488519008.7111 entropy=17.5981 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 54880] reward=-116793591.5 actor_loss=0.2792 critic_loss=134730073429.3333 entropy=17.6074 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 54880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493101.2 mean_steps=13.8
|
|
[Episode 54890] reward=-117681567.6 actor_loss=0.2665 critic_loss=141935742780.9524 entropy=17.6035 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 54900] reward=-117388276.4 actor_loss=0.2940 critic_loss=137976049436.4445 entropy=17.6002 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 54900] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-248185.4 mean_steps=17.1
|
|
[Episode 54910] reward=-118460853.6 actor_loss=0.2819 critic_loss=135348904391.1111 entropy=17.6069 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 54920] reward=-119661363.1 actor_loss=0.3958 critic_loss=137939296886.1538 entropy=17.5921 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 54920] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-226518.1 mean_steps=18.4
|
|
[Episode 54930] reward=-114775176.5 actor_loss=0.3324 critic_loss=127373357547.5200 entropy=17.5868 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 54940] reward=-116479484.6 actor_loss=0.3057 critic_loss=182480844261.0526 entropy=17.5906 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 54940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385792.1 mean_steps=15.8
|
|
[Episode 54950] reward=-121345660.4 actor_loss=0.2774 critic_loss=142469579744.9697 entropy=17.5813 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 54960] reward=-115228081.7 actor_loss=0.3618 critic_loss=137343938068.4800 entropy=17.5703 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 54960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-468357.0 mean_steps=16.6
|
|
[Episode 54970] reward=-119201870.8 actor_loss=0.2298 critic_loss=135818050413.7143 entropy=17.5763 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 54980] reward=-120065665.2 actor_loss=0.2417 critic_loss=136678730956.8000 entropy=17.5717 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 54980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-584106.0 mean_steps=11.8
|
|
[Episode 54990] reward=-117054971.7 actor_loss=0.2094 critic_loss=134259643369.2444 entropy=17.5822 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 55000] reward=-114859874.2 actor_loss=0.3362 critic_loss=139000841557.3333 entropy=17.5653 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 55000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-484098.7 mean_steps=15.7
|
|
[Episode 55010] reward=-119046172.8 actor_loss=0.3211 critic_loss=137641595303.7242 entropy=17.5687 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 55020] reward=-117736761.8 actor_loss=0.2740 critic_loss=135252101120.0000 entropy=17.5531 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 55020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471164.6 mean_steps=14.7
|
|
[Episode 55030] reward=-121992055.7 actor_loss=0.3529 critic_loss=139827774885.6471 entropy=17.5545 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 55040] reward=-117949724.2 actor_loss=0.3346 critic_loss=139408405299.2000 entropy=17.5389 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 55040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376108.3 mean_steps=15.8
|
|
[Episode 55050] reward=-113394480.0 actor_loss=0.2845 critic_loss=134066658017.2800 entropy=17.5485 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 55060] reward=-118976170.9 actor_loss=0.3161 critic_loss=135761150225.0667 entropy=17.5533 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 55060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-331316.9 mean_steps=15.6
|
|
[Episode 55070] reward=-114468718.2 actor_loss=0.2836 critic_loss=130844126697.7391 entropy=17.5327 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 55080] reward=-119983024.6 actor_loss=0.3396 critic_loss=161943189991.6190 entropy=17.5341 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 55080] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-257480.9 mean_steps=18.1
|
|
[Episode 55090] reward=-125201417.3 actor_loss=0.2425 critic_loss=144204502903.4667 entropy=17.5466 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 55100] reward=-121400200.6 actor_loss=0.3276 critic_loss=139139693298.5263 entropy=17.5447 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 55100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396922.0 mean_steps=15.1
|
|
[Episode 55110] reward=-120303207.4 actor_loss=0.2922 critic_loss=137108267008.0000 entropy=17.5418 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 55120] reward=-112164990.4 actor_loss=0.4677 critic_loss=130329820066.9091 entropy=17.5449 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 55120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526589.9 mean_steps=12.9
|
|
[Episode 55130] reward=-112001618.3 actor_loss=0.3141 critic_loss=130329645946.4348 entropy=17.5439 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 55140] reward=-120615759.6 actor_loss=0.3021 critic_loss=146050210722.9091 entropy=17.5556 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 55140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-372615.9 mean_steps=16.6
|
|
[Episode 55150] reward=-113101016.0 actor_loss=0.2509 critic_loss=129241622055.3846 entropy=17.5697 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 55160] reward=-115290720.4 actor_loss=0.4067 critic_loss=135821974807.2727 entropy=17.5719 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 55160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555740.7 mean_steps=13.3
|
|
[Episode 55170] reward=-116215089.1 actor_loss=0.4317 critic_loss=133684226234.1818 entropy=17.5767 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 55180] reward=-115700270.8 actor_loss=0.3897 critic_loss=135165535153.2308 entropy=17.5772 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 55180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501223.6 mean_steps=13.8
|
|
[Episode 55190] reward=-115594833.5 actor_loss=0.3158 critic_loss=136431783794.7586 entropy=17.5726 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 55200] reward=-118350803.1 actor_loss=0.2579 critic_loss=142100722346.6667 entropy=17.5776 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 55200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433525.6 mean_steps=15.4
|
|
[Episode 55210] reward=-119359742.7 actor_loss=0.2285 critic_loss=141722811553.6842 entropy=17.5661 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 55220] reward=-124526430.8 actor_loss=0.3481 critic_loss=167412394345.4118 entropy=17.5635 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 55220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408025.2 mean_steps=15.1
|
|
[Episode 55230] reward=-115224557.0 actor_loss=0.2676 critic_loss=142088766025.1429 entropy=17.5677 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 55240] reward=-118519584.7 actor_loss=0.2570 critic_loss=142681204248.3810 entropy=17.5705 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 55240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588587.5 mean_steps=13.5
|
|
[Episode 55250] reward=-112546610.7 actor_loss=0.3940 critic_loss=190541949220.5714 entropy=17.5620 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 55260] reward=-123802754.3 actor_loss=0.2827 critic_loss=143367561697.8824 entropy=17.5699 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 55260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535185.4 mean_steps=14.1
|
|
[Episode 55270] reward=-118453208.2 actor_loss=0.2351 critic_loss=137466159616.0000 entropy=17.5747 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 55280] reward=-114762472.1 actor_loss=0.3386 critic_loss=133344767759.0588 entropy=17.5748 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 55280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-532390.7 mean_steps=15.2
|
|
[Episode 55290] reward=-120412490.7 actor_loss=0.2285 critic_loss=138082517356.0889 entropy=17.5890 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 55300] reward=-114286211.3 actor_loss=2.7686 critic_loss=136162713088.0000 entropy=17.5969 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 55300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-424903.4 mean_steps=16.5
|
|
[Episode 55310] reward=-118063858.9 actor_loss=0.2121 critic_loss=135154817647.3044 entropy=17.5851 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 55320] reward=-112896178.2 actor_loss=0.2827 critic_loss=127566593417.8462 entropy=17.5824 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 55320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-426605.6 mean_steps=16.1
|
|
[Episode 55330] reward=-116965577.2 actor_loss=0.3769 critic_loss=138344012378.3529 entropy=17.5928 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 55340] reward=-119157766.3 actor_loss=0.2627 critic_loss=140191905430.5882 entropy=17.5932 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 55340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-629859.5 mean_steps=11.8
|
|
[Episode 55350] reward=-118818223.0 actor_loss=0.3067 critic_loss=135426907340.8000 entropy=17.5897 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 55360] reward=-116471387.5 actor_loss=0.3281 critic_loss=133967646915.0476 entropy=17.5907 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 55360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553266.2 mean_steps=13.2
|
|
[Episode 55370] reward=-115514290.1 actor_loss=0.3269 critic_loss=131355220526.5455 entropy=17.6091 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 55380] reward=-116324545.6 actor_loss=0.2959 critic_loss=134547566219.6364 entropy=17.6096 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 55380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348116.3 mean_steps=16.3
|
|
[Episode 55390] reward=-118767834.5 actor_loss=0.3144 critic_loss=140699985042.2857 entropy=17.6145 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 55400] reward=-116705166.4 actor_loss=0.3498 critic_loss=130223597795.5556 entropy=17.6254 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 55400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524618.5 mean_steps=12.9
|
|
[Episode 55410] reward=-119639169.6 actor_loss=0.3172 critic_loss=135510515370.6667 entropy=17.6361 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 55420] reward=-115598026.0 actor_loss=0.2795 critic_loss=132727209719.7419 entropy=17.6438 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 55420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515148.3 mean_steps=13.8
|
|
[Episode 55430] reward=-117775106.0 actor_loss=0.2231 critic_loss=132408587301.9259 entropy=17.6478 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 55440] reward=-115688073.2 actor_loss=0.3314 critic_loss=139801075081.8462 entropy=17.6449 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 55440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-549354.0 mean_steps=12.7
|
|
[Episode 55450] reward=-117264709.2 actor_loss=0.3258 critic_loss=133606457344.0000 entropy=17.6444 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 55460] reward=-113866003.5 actor_loss=0.3244 critic_loss=133678705085.2174 entropy=17.6486 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 55460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-601139.1 mean_steps=12.3
|
|
[Episode 55470] reward=-115201747.7 actor_loss=0.4606 critic_loss=132738240316.9524 entropy=17.6318 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 55480] reward=-110374400.1 actor_loss=0.3953 critic_loss=124928294912.0000 entropy=17.6326 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 55480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526806.8 mean_steps=13.8
|
|
[Episode 55490] reward=-118793986.0 actor_loss=0.2600 critic_loss=148121819818.6667 entropy=17.6476 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 55500] reward=-116495987.5 actor_loss=0.2597 critic_loss=138600502886.4000 entropy=17.6552 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 55500] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-315522.9 mean_steps=17.2
|
|
[Episode 55510] reward=-118676475.3 actor_loss=0.3625 critic_loss=141158233750.5882 entropy=17.6499 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 55520] reward=-122268339.3 actor_loss=0.2492 critic_loss=136944106955.0345 entropy=17.6516 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 55520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488469.0 mean_steps=14.4
|
|
[Episode 55530] reward=-122641499.0 actor_loss=0.2919 critic_loss=140354981546.6667 entropy=17.6613 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 55540] reward=-118733518.1 actor_loss=0.3810 critic_loss=137579567662.5454 entropy=17.6707 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 55540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-536433.7 mean_steps=14.9
|
|
[Episode 55550] reward=-119761533.1 actor_loss=0.2782 critic_loss=142907765813.8947 entropy=17.6893 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 55560] reward=-115264956.0 actor_loss=0.3595 critic_loss=135623081038.7692 entropy=17.6929 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 55560] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-351492.7 mean_steps=16.6
|
|
[Episode 55570] reward=-119296188.4 actor_loss=0.3134 critic_loss=148901703188.4800 entropy=17.6841 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 55580] reward=-107560391.8 actor_loss=0.3667 critic_loss=124700242505.1429 entropy=17.6869 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 55580] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-318663.1 mean_steps=17.4
|
|
[Episode 55590] reward=-117852854.3 actor_loss=0.2983 critic_loss=132737686042.9474 entropy=17.6937 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 55600] reward=-114280792.6 actor_loss=0.3009 critic_loss=137611310421.3333 entropy=17.6820 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 55600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-601865.5 mean_steps=12.9
|
|
[Episode 55610] reward=-116222422.3 actor_loss=0.3679 critic_loss=138555348884.2105 entropy=17.6800 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 55620] reward=-124329080.5 actor_loss=0.3568 critic_loss=486944435404.8000 entropy=17.6773 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 55620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447400.7 mean_steps=15.5
|
|
[Episode 55630] reward=-120779349.6 actor_loss=0.2962 critic_loss=140645505750.7097 entropy=17.6814 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 55640] reward=-115879786.7 actor_loss=0.2076 critic_loss=138765698194.2857 entropy=17.6692 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 55640] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-686423.5 mean_steps=12.3
|
|
[Episode 55650] reward=-115391971.9 actor_loss=0.3589 critic_loss=136022355456.0000 entropy=17.6809 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 55660] reward=-120145201.4 actor_loss=0.2963 critic_loss=140107568049.2308 entropy=17.6759 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 55660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418650.5 mean_steps=15.2
|
|
[Episode 55670] reward=-117700653.6 actor_loss=0.3259 critic_loss=136163238833.2308 entropy=17.6811 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 55680] reward=-121141404.4 actor_loss=0.2679 critic_loss=140555846178.1333 entropy=17.6987 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 55680] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-264117.9 mean_steps=18.2
|
|
[Episode 55690] reward=-120603695.2 actor_loss=0.3215 critic_loss=143803629568.0000 entropy=17.7001 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 55700] reward=-114013886.3 actor_loss=0.1964 critic_loss=136299501795.5556 entropy=17.6963 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 55700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-655640.0 mean_steps=12.6
|
|
[Episode 55710] reward=-117117761.4 actor_loss=0.3081 critic_loss=141497281837.1765 entropy=17.6897 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 55720] reward=-115268499.2 actor_loss=0.3002 critic_loss=151202517916.9032 entropy=17.6967 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 55720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-600377.1 mean_steps=13.6
|
|
[Episode 55730] reward=-115746202.3 actor_loss=0.3178 critic_loss=135400011414.5882 entropy=17.6938 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 55740] reward=-115676536.9 actor_loss=0.3130 critic_loss=130886726815.2889 entropy=17.6964 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 55740] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-719275.9 mean_steps=10.7
|
|
[Episode 55750] reward=-118439006.0 actor_loss=0.3040 critic_loss=139373669580.8000 entropy=17.6992 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 55760] reward=-116377585.5 actor_loss=0.3203 critic_loss=135732474675.2000 entropy=17.7121 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 55760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-397742.5 mean_steps=15.7
|
|
[Episode 55770] reward=-119659844.0 actor_loss=0.2336 critic_loss=142988174872.3810 entropy=17.7246 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 55780] reward=-121377453.2 actor_loss=0.2958 critic_loss=144540781977.6000 entropy=17.7272 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 55780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582748.0 mean_steps=12.2
|
|
[Episode 55790] reward=-114983474.9 actor_loss=0.2858 critic_loss=130282159737.9048 entropy=17.7293 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 55800] reward=-119898411.8 actor_loss=0.2190 critic_loss=143413073664.0000 entropy=17.7308 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 55800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392997.5 mean_steps=15.8
|
|
[Episode 55810] reward=-119471807.4 actor_loss=0.3078 critic_loss=139703222089.9556 entropy=17.7331 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 55820] reward=-119370549.3 actor_loss=0.3425 critic_loss=139189073627.4286 entropy=17.7399 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 55820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459953.8 mean_steps=14.6
|
|
[Episode 55830] reward=-116971995.9 actor_loss=0.2178 critic_loss=140404456586.3784 entropy=17.7441 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 55840] reward=-120454475.8 actor_loss=0.3277 critic_loss=181229263827.4783 entropy=17.7461 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 55840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-475631.0 mean_steps=15.3
|
|
[Episode 55850] reward=-115503563.2 actor_loss=0.3181 critic_loss=132971220536.8889 entropy=17.7311 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 55860] reward=-118532765.5 actor_loss=0.1849 critic_loss=139467163306.6667 entropy=17.7214 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 55860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-378053.8 mean_steps=16.9
|
|
[Episode 55870] reward=-116181242.7 actor_loss=0.2561 critic_loss=136391632141.4737 entropy=17.7097 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 55880] reward=-115431552.4 actor_loss=0.3714 critic_loss=136450998587.0769 entropy=17.7122 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 55880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538162.8 mean_steps=13.6
|
|
[Episode 55890] reward=-119404896.4 actor_loss=0.2728 critic_loss=137323142204.2353 entropy=17.6976 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 55900] reward=-115798015.3 actor_loss=0.2993 critic_loss=139611787479.5789 entropy=17.7017 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 55900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-447173.7 mean_steps=13.4
|
|
[Episode 55910] reward=-121145214.4 actor_loss=0.1999 critic_loss=141718607345.3714 entropy=17.6933 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 55920] reward=-120323501.4 actor_loss=0.2203 critic_loss=136993220705.5238 entropy=17.6893 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 55920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497361.9 mean_steps=13.9
|
|
[Episode 55930] reward=-123509454.3 actor_loss=0.2738 critic_loss=139443270509.7143 entropy=17.6980 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 55940] reward=-121822730.6 actor_loss=0.2162 critic_loss=143753165027.5555 entropy=17.6748 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 55940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482871.6 mean_steps=14.8
|
|
[Episode 55950] reward=-118350236.7 actor_loss=0.1753 critic_loss=135841147723.2941 entropy=17.6759 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 55960] reward=-123841595.6 actor_loss=0.1943 critic_loss=148871478814.1176 entropy=17.6817 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 55960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517806.6 mean_steps=14.1
|
|
[Episode 55970] reward=-119390758.8 actor_loss=0.2906 critic_loss=137762343470.5454 entropy=17.6672 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 55980] reward=-115676666.9 actor_loss=0.2840 critic_loss=135148624827.7333 entropy=17.6556 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 55980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534331.2 mean_steps=14.4
|
|
[Episode 55990] reward=-118575335.8 actor_loss=0.3633 critic_loss=138073945268.7059 entropy=17.6579 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 56000] reward=-123478185.7 actor_loss=0.2195 critic_loss=147552118253.0370 entropy=17.6496 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 56000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-651320.8 mean_steps=12.2
|
|
[Episode 56010] reward=-121418536.1 actor_loss=0.0930 critic_loss=139537917074.2857 entropy=17.6535 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 56020] reward=-117424882.6 actor_loss=0.2793 critic_loss=142270101317.8182 entropy=17.6600 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 56020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502905.6 mean_steps=14.0
|
|
[Episode 56030] reward=-115830316.9 actor_loss=0.2575 critic_loss=137244940288.0000 entropy=17.6729 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 56040] reward=-120121486.3 actor_loss=0.3240 critic_loss=144630159360.0000 entropy=17.6757 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 56040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511913.0 mean_steps=14.2
|
|
[Episode 56050] reward=-117518890.4 actor_loss=0.3986 critic_loss=138853544027.0222 entropy=17.6747 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 56060] reward=-117460651.8 actor_loss=0.2607 critic_loss=138260647563.6364 entropy=17.6844 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 56060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-378558.0 mean_steps=16.9
|
|
[Episode 56070] reward=-121264649.6 actor_loss=0.2180 critic_loss=139377136150.2609 entropy=17.6710 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 56080] reward=-113552309.6 actor_loss=0.1859 critic_loss=145353094758.4000 entropy=17.6750 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 56080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-341147.7 mean_steps=16.6
|
|
[Episode 56090] reward=-122030556.6 actor_loss=0.3227 critic_loss=144360189756.9524 entropy=17.6833 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 56100] reward=-113545431.6 actor_loss=0.3202 critic_loss=140123234304.0000 entropy=17.6754 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 56100] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-416701.3 mean_steps=17.2
|
|
[Episode 56110] reward=-120154855.7 actor_loss=0.2174 critic_loss=142037501197.4737 entropy=17.6654 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 56120] reward=-122551016.9 actor_loss=0.1436 critic_loss=144254844135.2258 entropy=17.6639 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 56120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562577.7 mean_steps=13.4
|
|
[Episode 56130] reward=-110448639.9 actor_loss=0.3811 critic_loss=128543399389.8667 entropy=17.6609 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 56140] reward=-115173815.6 actor_loss=0.2358 critic_loss=136750824015.6444 entropy=17.6528 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 56140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503327.0 mean_steps=14.8
|
|
[Episode 56150] reward=-119009745.0 actor_loss=0.2689 critic_loss=139172232825.9048 entropy=17.6364 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 56160] reward=-121706652.4 actor_loss=0.2277 critic_loss=142016043008.0000 entropy=17.6526 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 56160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521147.8 mean_steps=14.2
|
|
[Episode 56170] reward=-119171485.7 actor_loss=0.2631 critic_loss=137015215148.5217 entropy=17.6550 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 56180] reward=-116589484.1 actor_loss=0.2940 critic_loss=132013915553.1852 entropy=17.6454 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 56180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-522223.4 mean_steps=14.9
|
|
[Episode 56190] reward=-122594144.0 actor_loss=0.3157 critic_loss=142355841380.1739 entropy=17.6403 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 56200] reward=-114777585.4 actor_loss=0.3165 critic_loss=130383331737.6000 entropy=17.6477 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 56200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550008.7 mean_steps=13.5
|
|
[Episode 56210] reward=-119865659.7 actor_loss=0.3577 critic_loss=138088426574.7692 entropy=17.6555 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 56220] reward=-118516803.4 actor_loss=0.2500 critic_loss=141448803550.6087 entropy=17.6610 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 56220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631852.8 mean_steps=12.2
|
|
[Episode 56230] reward=-119449272.9 actor_loss=0.2763 critic_loss=139386986040.8889 entropy=17.6815 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 56240] reward=-114657604.9 actor_loss=0.4376 critic_loss=133420883148.8000 entropy=17.6793 approx_kl=0.0118 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 56240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-608421.9 mean_steps=12.1
|
|
[Episode 56250] reward=-120133288.0 actor_loss=0.2283 critic_loss=137151719014.4000 entropy=17.6801 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 56260] reward=-124033008.1 actor_loss=0.2125 critic_loss=151782230308.5714 entropy=17.6954 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 56260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462904.9 mean_steps=15.8
|
|
[Episode 56270] reward=-122730021.4 actor_loss=0.2132 critic_loss=147761005909.3333 entropy=17.7112 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 56280] reward=-120764720.0 actor_loss=0.2165 critic_loss=146667205973.3333 entropy=17.7191 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 56280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564375.9 mean_steps=13.7
|
|
[Episode 56290] reward=-121457664.8 actor_loss=0.2960 critic_loss=148413701895.7576 entropy=17.7161 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 56300] reward=-122121018.2 actor_loss=0.3132 critic_loss=136894864444.2353 entropy=17.7201 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 56300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-393706.1 mean_steps=15.2
|
|
[Episode 56310] reward=-115297585.2 actor_loss=0.3137 critic_loss=130808883411.8621 entropy=17.7028 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 56320] reward=-115111344.3 actor_loss=0.3852 critic_loss=133844250445.9130 entropy=17.6907 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 56320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489317.6 mean_steps=13.8
|
|
[Episode 56330] reward=-119633579.8 actor_loss=0.2862 critic_loss=140467087360.0000 entropy=17.6995 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 56340] reward=-118242362.9 actor_loss=0.3240 critic_loss=138138358169.6000 entropy=17.6927 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 56340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-379196.7 mean_steps=16.9
|
|
[Episode 56350] reward=-116512093.4 actor_loss=0.2547 critic_loss=140822047305.1429 entropy=17.6800 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 56360] reward=-122102397.5 actor_loss=0.1844 critic_loss=160569441043.6923 entropy=17.6840 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 56360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484581.9 mean_steps=14.2
|
|
[Episode 56370] reward=-117265858.6 actor_loss=0.2633 critic_loss=141105038677.3333 entropy=17.6779 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 56380] reward=-119751563.7 actor_loss=0.2410 critic_loss=134206511104.0000 entropy=17.6716 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 56380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545367.1 mean_steps=13.1
|
|
[Episode 56390] reward=-111503894.5 actor_loss=0.3361 critic_loss=137439902651.7333 entropy=17.6642 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 56400] reward=-122167561.9 actor_loss=0.2722 critic_loss=166147052758.3256 entropy=17.6559 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 56400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592246.0 mean_steps=13.7
|
|
[Episode 56410] reward=-120587153.2 actor_loss=0.2127 critic_loss=142822000453.8182 entropy=17.6447 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 56420] reward=-116955332.7 actor_loss=0.2345 critic_loss=137440280327.7576 entropy=17.6308 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 56420] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-339016.8 mean_steps=17.7
|
|
[Episode 56430] reward=-123974463.6 actor_loss=0.3236 critic_loss=153054064955.0769 entropy=17.6206 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 56440] reward=-122676059.6 actor_loss=0.3356 critic_loss=143530013044.3636 entropy=17.6122 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 56440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-484251.2 mean_steps=15.8
|
|
[Episode 56450] reward=-123463552.8 actor_loss=0.2393 critic_loss=143414228036.2667 entropy=17.6092 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 56460] reward=-119430160.7 actor_loss=0.2228 critic_loss=133895828821.3333 entropy=17.6005 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 56460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386749.5 mean_steps=15.9
|
|
[Episode 56470] reward=-119439258.7 actor_loss=0.2068 critic_loss=134575850216.7273 entropy=17.5924 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 56480] reward=-119961175.4 actor_loss=0.3199 critic_loss=139205018344.7273 entropy=17.5878 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 56480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529244.7 mean_steps=13.3
|
|
[Episode 56490] reward=-123287676.2 actor_loss=0.1370 critic_loss=138384949729.8824 entropy=17.5932 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 56500] reward=-122428012.7 actor_loss=0.3469 critic_loss=151804494686.3158 entropy=17.5891 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 56500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-596042.4 mean_steps=11.8
|
|
[Episode 56510] reward=-117015085.9 actor_loss=0.4307 critic_loss=132987064873.5135 entropy=17.5830 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 56520] reward=-121587999.8 actor_loss=0.3571 critic_loss=139218045501.4400 entropy=17.5800 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 56520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526165.2 mean_steps=14.4
|
|
[Episode 56530] reward=-124761778.4 actor_loss=0.2156 critic_loss=142645911161.9048 entropy=17.5843 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 56540] reward=-117275152.8 actor_loss=0.3322 critic_loss=133929821558.6341 entropy=17.5928 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 56540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429195.8 mean_steps=14.4
|
|
[Episode 56550] reward=-123608490.5 actor_loss=0.3602 critic_loss=142267087725.7143 entropy=17.6054 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 56560] reward=-118087790.0 actor_loss=0.3002 critic_loss=134529255911.6190 entropy=17.6108 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 56560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-409257.7 mean_steps=14.0
|
|
[Episode 56570] reward=-127257826.3 actor_loss=0.2587 critic_loss=151969964942.2222 entropy=17.6175 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 56580] reward=-120476649.8 actor_loss=0.2790 critic_loss=149899035066.8108 entropy=17.6343 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 56580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554389.1 mean_steps=12.3
|
|
[Episode 56590] reward=-115109343.4 actor_loss=0.1949 critic_loss=131650625179.8261 entropy=17.6359 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 56600] reward=-117506106.5 actor_loss=0.2778 critic_loss=139554828288.0000 entropy=17.6426 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 56600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575152.5 mean_steps=12.6
|
|
[Episode 56610] reward=-116900607.6 actor_loss=0.2249 critic_loss=142819981357.5111 entropy=17.6338 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 56620] reward=-118750918.7 actor_loss=0.2186 critic_loss=138303934122.6667 entropy=17.6244 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 56620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452331.3 mean_steps=14.4
|
|
[Episode 56630] reward=-122164325.4 actor_loss=0.3065 critic_loss=148549180711.8222 entropy=17.6059 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 56640] reward=-123809465.2 actor_loss=0.2839 critic_loss=153243634892.8000 entropy=17.6160 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 56640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495915.6 mean_steps=13.8
|
|
[Episode 56650] reward=-114092024.8 actor_loss=0.3228 critic_loss=126017606997.3333 entropy=17.6078 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 56660] reward=-116455508.1 actor_loss=0.3205 critic_loss=142089531483.0222 entropy=17.6030 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 56660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489955.7 mean_steps=14.0
|
|
[Episode 56670] reward=-116553180.6 actor_loss=0.2362 critic_loss=133299305813.3333 entropy=17.6000 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 56680] reward=-120562120.9 actor_loss=0.2242 critic_loss=139288087210.6667 entropy=17.6009 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 56680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555771.5 mean_steps=13.6
|
|
[Episode 56690] reward=-116914504.2 actor_loss=0.3657 critic_loss=141696852388.1026 entropy=17.5951 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 56700] reward=-123166458.1 actor_loss=0.3138 critic_loss=145532032415.1351 entropy=17.5871 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 56700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560905.1 mean_steps=13.5
|
|
[Episode 56710] reward=-116565769.2 actor_loss=0.3760 critic_loss=130931023689.9556 entropy=17.5882 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 56720] reward=-118711879.3 actor_loss=0.2456 critic_loss=136509171302.4000 entropy=17.6209 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 56720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521207.6 mean_steps=14.1
|
|
[Episode 56730] reward=-117926686.4 actor_loss=0.3312 critic_loss=134317894678.7556 entropy=17.6293 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 56740] reward=-115718389.7 actor_loss=0.3550 critic_loss=135466421816.8889 entropy=17.6274 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 56740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-560090.7 mean_steps=14.3
|
|
[Episode 56750] reward=-115920265.3 actor_loss=0.2877 critic_loss=130374978437.1200 entropy=17.6373 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 56760] reward=-124179098.4 actor_loss=0.2695 critic_loss=144561779986.7317 entropy=17.6421 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 56760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462763.4 mean_steps=15.5
|
|
[Episode 56770] reward=-117292067.7 actor_loss=0.3800 critic_loss=140999701578.9268 entropy=17.6426 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 56780] reward=-119975086.6 actor_loss=0.3088 critic_loss=141717233195.8857 entropy=17.6506 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 56780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531097.0 mean_steps=14.0
|
|
[Episode 56790] reward=-116435974.2 actor_loss=0.2321 critic_loss=132553135269.1613 entropy=17.6587 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 56800] reward=-124783549.9 actor_loss=0.2786 critic_loss=153660115033.0435 entropy=17.6628 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 56800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512736.1 mean_steps=13.4
|
|
[Episode 56810] reward=-120639972.7 actor_loss=0.2636 critic_loss=146945349339.4286 entropy=17.6579 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 56820] reward=-122203895.3 actor_loss=0.3124 critic_loss=153062623056.4572 entropy=17.6533 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 56820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533109.9 mean_steps=14.2
|
|
[Episode 56830] reward=-118521225.2 actor_loss=0.2522 critic_loss=141133724603.7333 entropy=17.6520 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 56840] reward=-117974905.3 actor_loss=0.3509 critic_loss=138344456756.9655 entropy=17.6509 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 56840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-515642.7 mean_steps=15.2
|
|
[Episode 56850] reward=-120661613.6 actor_loss=0.2833 critic_loss=138226648945.1163 entropy=17.6587 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 56860] reward=-117454815.4 actor_loss=0.2604 critic_loss=135668199150.9333 entropy=17.6448 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 56860] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-359411.8 mean_steps=17.9
|
|
[Episode 56870] reward=-118752948.6 actor_loss=0.3625 critic_loss=154626802974.7200 entropy=17.6587 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 56880] reward=-119138884.1 actor_loss=0.2309 critic_loss=150122477688.4706 entropy=17.6513 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 56880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382391.7 mean_steps=15.1
|
|
[Episode 56890] reward=-121077693.5 actor_loss=0.3597 critic_loss=144322104097.3913 entropy=17.6563 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 56900] reward=-119669305.6 actor_loss=0.2982 critic_loss=138331514125.4737 entropy=17.6411 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 56900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-370618.7 mean_steps=16.1
|
|
[Episode 56910] reward=-117829708.3 actor_loss=0.3115 critic_loss=141478653952.0000 entropy=17.6254 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 56920] reward=-117979135.8 actor_loss=0.2776 critic_loss=134923788053.9429 entropy=17.6274 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 56920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514683.6 mean_steps=13.8
|
|
[Episode 56930] reward=-115708841.2 actor_loss=0.3276 critic_loss=129206390374.4000 entropy=17.6274 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 56940] reward=-115564012.9 actor_loss=0.2504 critic_loss=143118399380.2105 entropy=17.6340 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 56940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-570105.3 mean_steps=14.2
|
|
[Episode 56950] reward=-127193746.0 actor_loss=0.2625 critic_loss=280900540308.2105 entropy=17.6312 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 56960] reward=-120419879.6 actor_loss=0.3718 critic_loss=137911628361.1429 entropy=17.6443 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 56960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492832.9 mean_steps=13.9
|
|
[Episode 56970] reward=-119158501.9 actor_loss=0.3636 critic_loss=194264834958.2222 entropy=17.6489 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 56980] reward=-119761074.3 actor_loss=0.2751 critic_loss=147457444886.7556 entropy=17.6532 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 56980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521100.2 mean_steps=14.3
|
|
[Episode 56990] reward=-120275646.0 actor_loss=0.2747 critic_loss=143081050567.1111 entropy=17.6441 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 57000] reward=-122896677.9 actor_loss=0.3191 critic_loss=140566165679.5428 entropy=17.6391 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 57000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-541668.4 mean_steps=12.7
|
|
[Episode 57010] reward=-125143837.4 actor_loss=0.1855 critic_loss=162695034341.0526 entropy=17.6211 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 57020] reward=-113266288.2 actor_loss=0.3351 critic_loss=130908710320.3556 entropy=17.6358 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 57020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-439874.5 mean_steps=13.8
|
|
[Episode 57030] reward=-120551607.1 actor_loss=0.2624 critic_loss=134210780391.2258 entropy=17.6221 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 57040] reward=-119114619.5 actor_loss=0.2631 critic_loss=140467382744.6154 entropy=17.6167 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 57040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-513731.1 mean_steps=14.8
|
|
[Episode 57050] reward=-119398464.7 actor_loss=0.2915 critic_loss=136514740788.9655 entropy=17.6093 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 57060] reward=-115836913.4 actor_loss=0.3145 critic_loss=134738424530.8235 entropy=17.6224 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 57060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-634478.9 mean_steps=12.2
|
|
[Episode 57070] reward=-120363601.8 actor_loss=0.2370 critic_loss=140066163825.7778 entropy=17.6214 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 57080] reward=-121334122.6 actor_loss=0.1919 critic_loss=145187210386.2857 entropy=17.6152 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 57080] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-337331.7 mean_steps=17.8
|
|
[Episode 57090] reward=-119848798.1 actor_loss=0.1841 critic_loss=195280673905.7778 entropy=17.6172 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 57100] reward=-113369632.8 actor_loss=0.2780 critic_loss=155340619776.0000 entropy=17.6202 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 57100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-572943.4 mean_steps=14.3
|
|
[Episode 57110] reward=-112370771.2 actor_loss=0.2949 critic_loss=134567638220.8000 entropy=17.6235 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 57120] reward=-118093076.0 actor_loss=0.2373 critic_loss=141591386714.3529 entropy=17.6211 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 57120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569712.9 mean_steps=13.1
|
|
[Episode 57130] reward=-118711941.7 actor_loss=0.2294 critic_loss=134508081896.7273 entropy=17.6131 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 57140] reward=-115284451.1 actor_loss=0.2599 critic_loss=131065217024.0000 entropy=17.6065 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 57140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-442621.2 mean_steps=16.7
|
|
[Episode 57150] reward=-119626898.0 actor_loss=0.3741 critic_loss=136238671644.4444 entropy=17.5946 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 57160] reward=-123821182.2 actor_loss=0.2649 critic_loss=145194656225.8824 entropy=17.5979 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 57160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491637.0 mean_steps=14.4
|
|
[Episode 57170] reward=-121085321.7 actor_loss=0.2767 critic_loss=143355657377.6842 entropy=17.5862 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 57180] reward=-120872558.6 actor_loss=0.2893 critic_loss=136577352424.7273 entropy=17.5898 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 57180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-327853.4 mean_steps=17.1
|
|
[Episode 57190] reward=-118791680.9 actor_loss=0.2965 critic_loss=139214508754.8235 entropy=17.5782 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 57200] reward=-122814775.7 actor_loss=0.2813 critic_loss=138694453930.6667 entropy=17.5836 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 57200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-406384.6 mean_steps=17.6
|
|
[Episode 57210] reward=-115443349.2 actor_loss=0.3129 critic_loss=134213768533.3333 entropy=17.5929 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 57220] reward=-122236152.1 actor_loss=0.3433 critic_loss=141701814272.0000 entropy=17.6003 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 57220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577889.0 mean_steps=13.8
|
|
[Episode 57230] reward=-122686519.7 actor_loss=0.4129 critic_loss=154983556365.4737 entropy=17.6040 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 57240] reward=-121953966.2 actor_loss=0.2066 critic_loss=168960699050.6667 entropy=17.6167 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 57240] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-734647.5 mean_steps=10.3
|
|
[Episode 57250] reward=-115271362.6 actor_loss=0.3158 critic_loss=128934005230.3448 entropy=17.6317 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 57260] reward=-116090720.0 actor_loss=0.2553 critic_loss=131916129348.2667 entropy=17.6283 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 57260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-477273.8 mean_steps=16.6
|
|
[Episode 57270] reward=-122475044.8 actor_loss=0.3224 critic_loss=140316284791.4667 entropy=17.6445 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 57280] reward=-122112842.5 actor_loss=0.1984 critic_loss=147791289275.7333 entropy=17.6486 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 57280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479526.1 mean_steps=15.4
|
|
[Episode 57290] reward=-119721296.9 actor_loss=0.2952 critic_loss=138848698974.8148 entropy=17.6615 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 57300] reward=-120514252.6 actor_loss=0.3383 critic_loss=174221781254.5641 entropy=17.6741 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 57300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547200.6 mean_steps=13.7
|
|
[Episode 57310] reward=-117460899.5 actor_loss=0.2902 critic_loss=134857117891.0476 entropy=17.6768 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 57320] reward=-114250789.9 actor_loss=0.3389 critic_loss=142190729984.0000 entropy=17.6673 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 57320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495928.6 mean_steps=14.3
|
|
[Episode 57330] reward=-119829783.1 actor_loss=0.2928 critic_loss=134101058355.2000 entropy=17.6738 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 57340] reward=-119083987.4 actor_loss=0.3142 critic_loss=136241133795.5556 entropy=17.6709 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 57340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449235.6 mean_steps=15.6
|
|
[Episode 57350] reward=-119943179.2 actor_loss=0.2110 critic_loss=139716620379.0222 entropy=17.6785 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 57360] reward=-115351209.7 actor_loss=0.3177 critic_loss=131944985941.3333 entropy=17.6825 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 57360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-554193.3 mean_steps=14.7
|
|
[Episode 57370] reward=-121487514.5 actor_loss=0.4275 critic_loss=143594824176.4849 entropy=17.6872 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 57380] reward=-123638407.1 actor_loss=0.2483 critic_loss=197399853283.5555 entropy=17.6839 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 57380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498202.6 mean_steps=14.2
|
|
[Episode 57390] reward=-118121957.4 actor_loss=0.2420 critic_loss=139849276146.5263 entropy=17.6927 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 57400] reward=-112418383.8 actor_loss=0.3585 critic_loss=130488441390.5455 entropy=17.6920 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 57400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498761.8 mean_steps=15.2
|
|
[Episode 57410] reward=-116404454.6 actor_loss=0.3427 critic_loss=133713518110.1176 entropy=17.6964 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 57420] reward=-111822833.8 actor_loss=0.2886 critic_loss=127264359262.3158 entropy=17.6965 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 57420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583486.6 mean_steps=13.7
|
|
[Episode 57430] reward=-117057551.2 actor_loss=0.2819 critic_loss=140265925765.5652 entropy=17.6987 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 57440] reward=-114900652.1 actor_loss=0.2038 critic_loss=130473903860.8696 entropy=17.6952 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 57440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-515916.0 mean_steps=15.4
|
|
[Episode 57450] reward=-119020632.4 actor_loss=0.1730 critic_loss=137122473398.8571 entropy=17.7073 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 57460] reward=-119391395.5 actor_loss=0.2998 critic_loss=146296241664.0000 entropy=17.7097 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 57460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-628937.8 mean_steps=13.2
|
|
[Episode 57470] reward=-119614012.4 actor_loss=0.2046 critic_loss=135283577651.2000 entropy=17.7075 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 57480] reward=-119472841.8 actor_loss=0.2114 critic_loss=143433308842.6667 entropy=17.7279 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 57480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631933.0 mean_steps=12.1
|
|
[Episode 57490] reward=-120001515.3 actor_loss=0.1979 critic_loss=141742275409.1707 entropy=17.7171 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 57500] reward=-120205809.0 actor_loss=0.1839 critic_loss=139432677961.1429 entropy=17.7000 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 57500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570395.7 mean_steps=13.8
|
|
[Episode 57510] reward=-113952443.1 actor_loss=0.3450 critic_loss=129883111706.4828 entropy=17.6904 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 57520] reward=-119281283.1 actor_loss=0.3008 critic_loss=133482882480.3556 entropy=17.6925 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 57520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489154.0 mean_steps=15.1
|
|
[Episode 57530] reward=-115069260.8 actor_loss=0.2938 critic_loss=133494171784.5333 entropy=17.7035 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 57540] reward=-115942092.8 actor_loss=0.2511 critic_loss=145622452390.0540 entropy=17.6789 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 57540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513474.0 mean_steps=13.6
|
|
[Episode 57550] reward=-120813019.9 actor_loss=0.2347 critic_loss=136907860286.5778 entropy=17.6627 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 57560] reward=-116512047.3 actor_loss=0.2930 critic_loss=134926400807.8222 entropy=17.6676 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 57560] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-718085.0 mean_steps=10.9
|
|
[Episode 57570] reward=-116970014.9 actor_loss=0.2915 critic_loss=133731358134.8571 entropy=17.6603 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 57580] reward=-114411564.6 actor_loss=0.3086 critic_loss=129794214570.6667 entropy=17.6688 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 57580] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-313910.6 mean_steps=19.1
|
|
[Episode 57590] reward=-117486461.7 actor_loss=0.2311 critic_loss=132935525912.3810 entropy=17.6644 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 57600] reward=-118963505.7 actor_loss=0.3915 critic_loss=138541051725.9131 entropy=17.6764 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 57600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-667605.3 mean_steps=13.3
|
|
[Episode 57610] reward=-119146441.9 actor_loss=0.2835 critic_loss=143279677755.0769 entropy=17.6722 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 57620] reward=-116252157.6 actor_loss=0.3514 critic_loss=140943109903.0588 entropy=17.6713 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 57620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394915.4 mean_steps=16.6
|
|
[Episode 57630] reward=-113868764.8 actor_loss=0.2889 critic_loss=127241502537.9556 entropy=17.6703 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 57640] reward=-114194307.7 actor_loss=0.2949 critic_loss=133906511579.4286 entropy=17.6525 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 57640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482303.3 mean_steps=14.2
|
|
[Episode 57650] reward=-112741649.0 actor_loss=0.3289 critic_loss=127689434726.4000 entropy=17.6392 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 57660] reward=-116547739.9 actor_loss=0.2640 critic_loss=128090123132.7179 entropy=17.6385 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 57660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426099.8 mean_steps=15.9
|
|
[Episode 57670] reward=-116506751.0 actor_loss=0.2728 critic_loss=129543408218.3529 entropy=17.6394 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 57680] reward=-115634945.5 actor_loss=0.3769 critic_loss=128806320384.0000 entropy=17.6363 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 57680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366211.1 mean_steps=16.4
|
|
[Episode 57690] reward=-114248065.1 actor_loss=0.2978 critic_loss=131661901163.3548 entropy=17.6294 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 57700] reward=-114704543.4 actor_loss=0.3766 critic_loss=129951513959.7838 entropy=17.6277 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 57700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644859.1 mean_steps=12.4
|
|
[Episode 57710] reward=-116143999.1 actor_loss=0.3877 critic_loss=132973323934.8965 entropy=17.6321 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 57720] reward=-110553511.2 actor_loss=0.4546 critic_loss=123476040448.0000 entropy=17.6293 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 57720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481901.1 mean_steps=14.2
|
|
[Episode 57730] reward=-124767644.5 actor_loss=0.2918 critic_loss=223438804805.8182 entropy=17.6415 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 57740] reward=-118267502.3 actor_loss=0.3670 critic_loss=136603551646.4762 entropy=17.6322 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 57740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382136.6 mean_steps=16.6
|
|
[Episode 57750] reward=-118505676.6 actor_loss=0.3611 critic_loss=134925563648.0000 entropy=17.6361 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 57760] reward=-119648448.2 actor_loss=0.2753 critic_loss=143159625728.0000 entropy=17.6465 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 57760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-668468.7 mean_steps=11.9
|
|
[Episode 57770] reward=-117903486.3 actor_loss=0.4038 critic_loss=140834216448.0000 entropy=17.6454 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 57780] reward=-119467306.3 actor_loss=0.2025 critic_loss=138893238272.0000 entropy=17.6446 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 57780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-422205.2 mean_steps=14.7
|
|
[Episode 57790] reward=-117217189.9 actor_loss=0.2961 critic_loss=169482349869.1765 entropy=17.6376 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 57800] reward=-113508323.8 actor_loss=0.4230 critic_loss=145100424078.2222 entropy=17.6465 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 57800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519059.7 mean_steps=14.7
|
|
[Episode 57810] reward=-116296417.7 actor_loss=0.3957 critic_loss=136154925843.6923 entropy=17.6525 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 57820] reward=-113547608.9 actor_loss=0.2412 critic_loss=128823478044.4444 entropy=17.6610 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 57820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590796.2 mean_steps=13.1
|
|
[Episode 57830] reward=-115644661.1 actor_loss=0.3051 critic_loss=145872006106.0741 entropy=17.6663 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 57840] reward=-116348488.9 actor_loss=0.2391 critic_loss=130325185457.2308 entropy=17.6619 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 57840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523467.3 mean_steps=14.6
|
|
[Episode 57850] reward=-124988295.6 actor_loss=0.1727 critic_loss=142533859425.5238 entropy=17.6584 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 57860] reward=-121853350.5 actor_loss=0.2254 critic_loss=140456183632.4572 entropy=17.6518 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 57860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-446836.8 mean_steps=14.1
|
|
[Episode 57870] reward=-116809203.0 actor_loss=0.2734 critic_loss=132804151637.3333 entropy=17.6504 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 57880] reward=-125147688.9 actor_loss=0.2883 critic_loss=145972895744.0000 entropy=17.6598 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 57880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430108.6 mean_steps=15.8
|
|
[Episode 57890] reward=-117167939.7 actor_loss=0.2975 critic_loss=139934102089.1429 entropy=17.6698 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 57900] reward=-119055813.0 actor_loss=0.1919 critic_loss=146087038390.8571 entropy=17.6721 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 57900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488336.4 mean_steps=14.2
|
|
[Episode 57910] reward=-117470070.8 actor_loss=0.3021 critic_loss=136210656304.7619 entropy=17.6765 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 57920] reward=-118994245.5 actor_loss=0.2566 critic_loss=142233898387.3940 entropy=17.6650 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 57920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456022.4 mean_steps=14.7
|
|
[Episode 57930] reward=-120330171.6 actor_loss=0.3040 critic_loss=139238403549.8667 entropy=17.6687 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 57940] reward=-115533731.9 actor_loss=0.2784 critic_loss=134973101959.5294 entropy=17.6847 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 57940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623328.4 mean_steps=12.2
|
|
[Episode 57950] reward=-117299953.1 actor_loss=0.2699 critic_loss=136083362514.8235 entropy=17.6841 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 57960] reward=-119951498.1 actor_loss=0.2101 critic_loss=141186728448.0000 entropy=17.6885 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 57960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505404.2 mean_steps=14.4
|
|
[Episode 57970] reward=-117917483.8 actor_loss=0.2743 critic_loss=133196882154.0571 entropy=17.6959 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 57980] reward=-118349207.5 actor_loss=0.3235 critic_loss=135828950505.7391 entropy=17.7114 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 57980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361311.0 mean_steps=16.8
|
|
[Episode 57990] reward=-116348533.5 actor_loss=0.2980 critic_loss=134100550424.7742 entropy=17.7114 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 58000] reward=-116324941.0 actor_loss=0.2772 critic_loss=129586745148.9524 entropy=17.6973 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 58000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529622.4 mean_steps=13.5
|
|
[Episode 58010] reward=-116839341.0 actor_loss=0.3228 critic_loss=132991649382.4000 entropy=17.6894 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 58020] reward=-119760398.7 actor_loss=0.3562 critic_loss=132057880985.6000 entropy=17.6731 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 58020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491275.1 mean_steps=15.1
|
|
[Episode 58030] reward=-115162042.1 actor_loss=0.3366 critic_loss=128574503891.4783 entropy=17.6690 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 58040] reward=-115280556.5 actor_loss=0.2405 critic_loss=134822208118.1538 entropy=17.6620 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 58040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-463433.0 mean_steps=17.1
|
|
[Episode 58050] reward=-114315619.0 actor_loss=0.2380 critic_loss=131607248622.9333 entropy=17.6495 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 58060] reward=-122677046.6 actor_loss=0.1764 critic_loss=139019268649.5135 entropy=17.6437 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 58060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-605507.7 mean_steps=12.7
|
|
[Episode 58070] reward=-118644513.0 actor_loss=0.4163 critic_loss=139014632168.7273 entropy=17.6510 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 58080] reward=-117313416.5 actor_loss=0.3489 critic_loss=136247039414.8571 entropy=17.6507 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 58080] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-293214.6 mean_steps=17.6
|
|
[Episode 58090] reward=-139390304.0 actor_loss=0.1949 critic_loss=2747722416492.0889 entropy=17.6472 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 58100] reward=-111934135.1 actor_loss=0.2771 critic_loss=135179957826.7826 entropy=17.6610 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 58100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-618196.7 mean_steps=13.1
|
|
[Episode 58110] reward=-116637963.6 actor_loss=0.2658 critic_loss=134738355244.5217 entropy=17.6569 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 58120] reward=-119653965.2 actor_loss=0.3554 critic_loss=142554716569.6000 entropy=17.6453 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 58120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468875.7 mean_steps=14.9
|
|
[Episode 58130] reward=-113134078.3 actor_loss=0.1697 critic_loss=126758794581.3333 entropy=17.6427 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 58140] reward=-116315681.8 actor_loss=0.3855 critic_loss=137881154166.1538 entropy=17.6235 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 58140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556926.9 mean_steps=13.6
|
|
[Episode 58150] reward=-114618741.4 actor_loss=0.3080 critic_loss=127965071360.0000 entropy=17.6153 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 58160] reward=-117004284.2 actor_loss=0.2227 critic_loss=129339514697.9556 entropy=17.6049 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 58160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507031.8 mean_steps=14.3
|
|
[Episode 58170] reward=-116001432.4 actor_loss=0.3197 critic_loss=137734881280.0000 entropy=17.5907 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 58180] reward=-114112867.6 actor_loss=0.3436 critic_loss=129694541141.3333 entropy=17.5744 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 58180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-356561.6 mean_steps=16.4
|
|
[Episode 58190] reward=-116834956.7 actor_loss=0.3151 critic_loss=135551144755.2000 entropy=17.5684 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 58200] reward=-121142880.2 actor_loss=0.1937 critic_loss=145814173013.3333 entropy=17.5657 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 58200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619609.8 mean_steps=13.0
|
|
[Episode 58210] reward=-115423452.8 actor_loss=0.2904 critic_loss=134027481251.8400 entropy=17.5564 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 58220] reward=-117824678.8 actor_loss=0.2358 critic_loss=138227471701.3333 entropy=17.5563 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 58220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615575.9 mean_steps=12.9
|
|
[Episode 58230] reward=-115655708.0 actor_loss=0.1928 critic_loss=127282136405.3333 entropy=17.5527 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 58240] reward=-121242893.9 actor_loss=0.2242 critic_loss=141516331998.9677 entropy=17.5561 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 58240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441988.0 mean_steps=14.9
|
|
[Episode 58250] reward=-114663331.8 actor_loss=0.2929 critic_loss=137375486589.1555 entropy=17.5466 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 58260] reward=-117055450.3 actor_loss=0.2817 critic_loss=134097993296.8421 entropy=17.5485 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 58260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466202.0 mean_steps=14.8
|
|
[Episode 58270] reward=-108723492.3 actor_loss=0.3477 critic_loss=125897773986.9091 entropy=17.5263 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 58280] reward=-116885087.7 actor_loss=0.1904 critic_loss=131975509858.4615 entropy=17.5327 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 58280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527741.9 mean_steps=14.3
|
|
[Episode 58290] reward=-115541961.6 actor_loss=0.3664 critic_loss=141734948352.0000 entropy=17.5226 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 58300] reward=-119946021.2 actor_loss=0.2885 critic_loss=142450744551.2258 entropy=17.5160 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 58300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507418.0 mean_steps=14.0
|
|
[Episode 58310] reward=-117909592.5 actor_loss=0.2536 critic_loss=135089604608.0000 entropy=17.5244 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 58320] reward=-111393449.8 actor_loss=0.4112 critic_loss=127298868317.0909 entropy=17.5271 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 58320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-647992.3 mean_steps=12.2
|
|
[Episode 58330] reward=-116050443.2 actor_loss=0.3019 critic_loss=139633232029.5385 entropy=17.5359 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 58340] reward=-117505447.4 actor_loss=0.1624 critic_loss=136551767153.7778 entropy=17.5244 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 58340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-458976.7 mean_steps=14.0
|
|
[Episode 58350] reward=-111278545.9 actor_loss=0.2214 critic_loss=122241940957.8667 entropy=17.5372 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 58360] reward=-121170786.7 actor_loss=0.2730 critic_loss=141135260027.2592 entropy=17.5498 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 58360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542801.5 mean_steps=13.3
|
|
[Episode 58370] reward=-118941035.3 actor_loss=0.2359 critic_loss=133262535884.8000 entropy=17.5309 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 58380] reward=-118817971.8 actor_loss=0.2847 critic_loss=135287687668.6222 entropy=17.5449 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 58380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597746.6 mean_steps=12.6
|
|
[Episode 58390] reward=-112989796.5 actor_loss=0.3679 critic_loss=127904766976.0000 entropy=17.5339 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 58400] reward=-127750817.9 actor_loss=0.2667 critic_loss=452675189294.5455 entropy=17.5313 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 58400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504725.1 mean_steps=15.0
|
|
[Episode 58410] reward=-831248343.8 actor_loss=81.7690 critic_loss=837100868550291.8750 entropy=17.5341 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 58420] reward=-169795672.2 actor_loss=0.3289 critic_loss=5342719377408.0000 entropy=17.5548 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 58420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413975.8 mean_steps=16.8
|
|
[Episode 58430] reward=-122230761.1 actor_loss=0.3019 critic_loss=139347395470.2222 entropy=17.5585 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 58440] reward=-116635413.8 actor_loss=0.2987 critic_loss=135296934109.4054 entropy=17.5684 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 58440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486957.9 mean_steps=13.9
|
|
[Episode 58450] reward=-118821137.7 actor_loss=0.2602 critic_loss=134527343346.5263 entropy=17.5681 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 58460] reward=-118501355.1 actor_loss=0.2771 critic_loss=152929508465.7778 entropy=17.5677 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 58460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-466708.1 mean_steps=16.0
|
|
[Episode 58470] reward=-122501342.9 actor_loss=0.3792 critic_loss=177782426072.6154 entropy=17.5752 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 58480] reward=-119613185.5 actor_loss=0.2950 critic_loss=141565067792.5161 entropy=17.5790 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 58480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536453.9 mean_steps=14.3
|
|
[Episode 58490] reward=-119550195.5 actor_loss=0.3637 critic_loss=138663225662.5778 entropy=17.5802 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 58500] reward=-125743172.9 actor_loss=0.3225 critic_loss=262067482491.8710 entropy=17.5735 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 58500] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-334801.7 mean_steps=18.7
|
|
[Episode 58510] reward=-116778905.1 actor_loss=0.1878 critic_loss=132169759363.6572 entropy=17.5718 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 58520] reward=-121506030.1 actor_loss=0.3079 critic_loss=143564471500.8000 entropy=17.5684 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 58520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-576184.0 mean_steps=12.7
|
|
[Episode 58530] reward=-116740516.8 actor_loss=0.3382 critic_loss=181763315029.3333 entropy=17.5717 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 58540] reward=-110043508.3 actor_loss=0.3747 critic_loss=124645245911.0400 entropy=17.5720 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 58540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563425.1 mean_steps=13.0
|
|
[Episode 58550] reward=-116823319.0 actor_loss=0.3165 critic_loss=132181930316.1081 entropy=17.5829 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 58560] reward=-117582623.0 actor_loss=0.3518 critic_loss=131061901312.0000 entropy=17.5917 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 58560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441625.6 mean_steps=15.1
|
|
[Episode 58570] reward=-120082103.6 actor_loss=0.1573 critic_loss=135649871117.4737 entropy=17.6051 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 58580] reward=-116867336.5 actor_loss=0.3144 critic_loss=133349640704.0000 entropy=17.6014 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 58580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-486076.5 mean_steps=13.2
|
|
[Episode 58590] reward=-118439798.1 actor_loss=0.3274 critic_loss=140064489289.9556 entropy=17.5937 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 58600] reward=-117266739.8 actor_loss=0.3181 critic_loss=135129294028.8000 entropy=17.5960 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 58600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-301353.1 mean_steps=16.8
|
|
[Episode 58610] reward=-117195051.8 actor_loss=0.2869 critic_loss=135688352745.2444 entropy=17.5877 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 58620] reward=-116409261.7 actor_loss=0.2818 critic_loss=151173277104.3556 entropy=17.5758 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 58620] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-342678.3 mean_steps=17.9
|
|
[Episode 58630] reward=-114609826.0 actor_loss=0.2563 critic_loss=126823142453.8947 entropy=17.5701 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 58640] reward=-118920014.2 actor_loss=0.2593 critic_loss=130133177503.2889 entropy=17.5692 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 58640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-318502.0 mean_steps=16.9
|
|
[Episode 58650] reward=-119118024.7 actor_loss=0.3461 critic_loss=138020975252.6452 entropy=17.5667 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 58660] reward=-119509796.4 actor_loss=0.2617 critic_loss=155306350268.6316 entropy=17.5661 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 58660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-453517.2 mean_steps=14.1
|
|
[Episode 58670] reward=-118716725.2 actor_loss=0.3335 critic_loss=139279063147.7895 entropy=17.5658 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 58680] reward=-114667378.3 actor_loss=0.3025 critic_loss=136892134111.1795 entropy=17.5685 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 58680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480493.0 mean_steps=15.0
|
|
[Episode 58690] reward=-113981486.4 actor_loss=0.3395 critic_loss=136580524441.6000 entropy=17.5726 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 58700] reward=-109507686.1 actor_loss=0.3190 critic_loss=122548277106.7586 entropy=17.5672 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 58700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437339.6 mean_steps=15.8
|
|
[Episode 58710] reward=-123983444.3 actor_loss=0.1781 critic_loss=203482873359.5151 entropy=17.5655 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 58720] reward=-119768912.0 actor_loss=0.2799 critic_loss=168703044266.6667 entropy=17.5681 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 58720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458950.7 mean_steps=15.2
|
|
[Episode 58730] reward=-115911716.0 actor_loss=0.3094 critic_loss=131583641246.8965 entropy=17.5679 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 58740] reward=-112641512.6 actor_loss=0.2710 critic_loss=139419457243.4286 entropy=17.5772 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 58740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490499.1 mean_steps=14.0
|
|
[Episode 58750] reward=-115422289.3 actor_loss=0.3354 critic_loss=134932656850.8235 entropy=17.5692 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 58760] reward=-113419963.6 actor_loss=0.4114 critic_loss=123920865882.3529 entropy=17.5693 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 58760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-609468.0 mean_steps=12.2
|
|
[Episode 58770] reward=-116975318.6 actor_loss=0.2308 critic_loss=133572432406.2609 entropy=17.5564 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 58780] reward=-111598645.9 actor_loss=0.3888 critic_loss=120730035121.2308 entropy=17.5552 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 58780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494698.3 mean_steps=15.6
|
|
[Episode 58790] reward=-112299184.0 actor_loss=0.3828 critic_loss=141568558545.4546 entropy=17.5670 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 58800] reward=-115607433.7 actor_loss=0.2789 critic_loss=142626242560.0000 entropy=17.5688 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 58800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-672862.0 mean_steps=12.3
|
|
[Episode 58810] reward=-114161920.4 actor_loss=0.2812 critic_loss=132124495165.7931 entropy=17.5858 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 58820] reward=-116404710.6 actor_loss=0.3301 critic_loss=129761965149.0909 entropy=17.5847 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 58820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456818.3 mean_steps=14.9
|
|
[Episode 58830] reward=-111760722.9 actor_loss=0.3042 critic_loss=130461798578.0870 entropy=17.5795 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 58840] reward=-118845938.5 actor_loss=0.3746 critic_loss=140763675761.7778 entropy=17.5832 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 58840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528544.3 mean_steps=14.3
|
|
[Episode 58850] reward=-118211746.7 actor_loss=0.2277 critic_loss=131888564516.5714 entropy=17.5900 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 58860] reward=-114023795.9 actor_loss=0.4185 critic_loss=128265994240.0000 entropy=17.5910 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 58860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541506.3 mean_steps=13.8
|
|
[Episode 58870] reward=-117670188.9 actor_loss=0.3442 critic_loss=139506781146.0741 entropy=17.5883 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 58880] reward=-116968538.5 actor_loss=0.2008 critic_loss=134116648839.5294 entropy=17.5922 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 58880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517802.4 mean_steps=14.2
|
|
[Episode 58890] reward=-119261519.1 actor_loss=0.3419 critic_loss=138130497877.3333 entropy=17.6032 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 58900] reward=-117908177.5 actor_loss=0.2614 critic_loss=143590796234.1053 entropy=17.6082 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 58900] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-371828.9 mean_steps=17.1
|
|
[Episode 58910] reward=-118743588.8 actor_loss=0.3442 critic_loss=142197698659.9024 entropy=17.6024 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 58920] reward=-121066464.6 actor_loss=0.1719 critic_loss=141025963622.4000 entropy=17.6085 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 58920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480797.2 mean_steps=13.9
|
|
[Episode 58930] reward=-115644850.4 actor_loss=0.2825 critic_loss=126024016262.0952 entropy=17.6272 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 58940] reward=-115584381.1 actor_loss=0.3481 critic_loss=129335928422.4000 entropy=17.6316 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 58940] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-285458.5 mean_steps=18.4
|
|
[Episode 58950] reward=-112119538.5 actor_loss=0.2945 critic_loss=125547959296.0000 entropy=17.6349 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 58960] reward=-119689443.9 actor_loss=0.2319 critic_loss=137776482411.7895 entropy=17.6379 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 58960] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-333008.0 mean_steps=17.9
|
|
[Episode 58970] reward=-118592197.2 actor_loss=0.2748 critic_loss=138156348136.7273 entropy=17.6507 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 58980] reward=-113613617.2 actor_loss=0.4158 critic_loss=128825425920.0000 entropy=17.6519 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 58980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-630642.6 mean_steps=12.5
|
|
[Episode 58990] reward=-114077431.7 actor_loss=0.2557 critic_loss=129205787579.7333 entropy=17.6467 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 59000] reward=-113059525.8 actor_loss=0.2995 critic_loss=126594511088.9412 entropy=17.6459 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 59000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500210.3 mean_steps=13.2
|
|
[Episode 59010] reward=-120103908.9 actor_loss=0.3662 critic_loss=133199237654.2609 entropy=17.6601 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 59020] reward=-121117331.8 actor_loss=0.1812 critic_loss=138411352436.3636 entropy=17.6412 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 59020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471645.4 mean_steps=15.0
|
|
[Episode 59030] reward=-120885883.0 actor_loss=0.4580 critic_loss=138204746605.7143 entropy=17.6405 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 59040] reward=-119800688.0 actor_loss=0.2894 critic_loss=137591283321.9048 entropy=17.6488 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 59040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493995.5 mean_steps=14.8
|
|
[Episode 59050] reward=-120877312.2 actor_loss=0.2402 critic_loss=137425502863.3600 entropy=17.6393 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 59060] reward=-119060216.2 actor_loss=0.3246 critic_loss=145617072713.1429 entropy=17.6514 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 59060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444181.0 mean_steps=14.8
|
|
[Episode 59070] reward=-118149605.5 actor_loss=0.2724 critic_loss=131899007853.7143 entropy=17.6550 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 59080] reward=-124650518.7 actor_loss=0.2201 critic_loss=148103709557.6216 entropy=17.6517 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 59080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479496.3 mean_steps=14.9
|
|
[Episode 59090] reward=-119368787.0 actor_loss=0.3085 critic_loss=135212926708.8696 entropy=17.6569 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 59100] reward=-117287236.8 actor_loss=0.4329 critic_loss=130952589312.0000 entropy=17.6449 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 59100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-614760.1 mean_steps=13.9
|
|
[Episode 59110] reward=-117553464.1 actor_loss=0.3060 critic_loss=133919761901.0370 entropy=17.6341 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 59120] reward=-120996329.1 actor_loss=0.1791 critic_loss=134574461383.1111 entropy=17.6386 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 59120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465522.0 mean_steps=14.9
|
|
[Episode 59130] reward=-116739626.2 actor_loss=0.2541 critic_loss=135846607406.5455 entropy=17.6299 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 59140] reward=-116823485.2 actor_loss=0.2613 critic_loss=134608479027.2000 entropy=17.6216 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 59140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401073.9 mean_steps=15.8
|
|
[Episode 59150] reward=-122992317.0 actor_loss=0.3517 critic_loss=139165432832.0000 entropy=17.6314 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 59160] reward=-119909799.4 actor_loss=0.3148 critic_loss=186306822963.2000 entropy=17.6332 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 59160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-538654.1 mean_steps=15.4
|
|
[Episode 59170] reward=-119453011.9 actor_loss=0.2428 critic_loss=135872323219.9111 entropy=17.6424 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 59180] reward=-118005435.0 actor_loss=0.2886 critic_loss=128372338414.9333 entropy=17.6328 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 59180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442890.2 mean_steps=14.8
|
|
[Episode 59190] reward=-115015923.8 actor_loss=0.3081 critic_loss=181236916224.0000 entropy=17.6331 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 59200] reward=-117955676.9 actor_loss=0.3798 critic_loss=134487261184.0000 entropy=17.6201 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 59200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541657.3 mean_steps=14.7
|
|
[Episode 59210] reward=-119620651.6 actor_loss=0.2516 critic_loss=156525475653.8182 entropy=17.6078 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 59220] reward=-120748198.0 actor_loss=0.2863 critic_loss=139024947336.5333 entropy=17.6154 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 59220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479460.6 mean_steps=13.8
|
|
[Episode 59230] reward=-114683248.0 actor_loss=0.2677 critic_loss=134239139682.4615 entropy=17.6240 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 59240] reward=-119701257.7 actor_loss=0.3409 critic_loss=132872630272.0000 entropy=17.6205 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 59240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460200.3 mean_steps=15.2
|
|
[Episode 59250] reward=-119506587.9 actor_loss=0.3220 critic_loss=132002327315.6923 entropy=17.6134 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 59260] reward=-118984376.6 actor_loss=0.2834 critic_loss=137430889192.7273 entropy=17.6058 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 59260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383625.9 mean_steps=16.5
|
|
[Episode 59270] reward=-119833996.5 actor_loss=0.3382 critic_loss=137772336924.4445 entropy=17.6157 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 59280] reward=-120024042.5 actor_loss=0.3191 critic_loss=137142409079.4667 entropy=17.6297 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 59280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-599530.9 mean_steps=12.3
|
|
[Episode 59290] reward=-116914346.1 actor_loss=0.3426 critic_loss=131041179420.4444 entropy=17.6424 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 59300] reward=-125495943.3 actor_loss=0.1906 critic_loss=170710729068.0889 entropy=17.6395 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 59300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440633.0 mean_steps=14.9
|
|
[Episode 59310] reward=-115762902.0 actor_loss=0.2670 critic_loss=129929240824.2424 entropy=17.6394 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 59320] reward=-115345612.4 actor_loss=0.2385 critic_loss=130329922218.6667 entropy=17.6346 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 59320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533947.4 mean_steps=13.6
|
|
[Episode 59330] reward=-124805381.1 actor_loss=0.1360 critic_loss=143755794477.5111 entropy=17.6406 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 59340] reward=-118100720.8 actor_loss=0.3098 critic_loss=133135144277.3333 entropy=17.6437 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 59340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-554600.2 mean_steps=14.2
|
|
[Episode 59350] reward=-119502507.1 actor_loss=0.2226 critic_loss=132370204113.4545 entropy=17.6281 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 59360] reward=-114687863.2 actor_loss=0.3749 critic_loss=127180292551.1111 entropy=17.6448 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 59360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557860.2 mean_steps=13.3
|
|
[Episode 59370] reward=-118659363.9 actor_loss=0.3096 critic_loss=141167625102.2222 entropy=17.6545 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 59380] reward=-116654749.4 actor_loss=0.2372 critic_loss=130330814382.0800 entropy=17.6584 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 59380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-404465.1 mean_steps=15.2
|
|
[Episode 59390] reward=-115015718.1 actor_loss=0.3481 critic_loss=133178299460.2667 entropy=17.6471 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 59400] reward=-120113010.8 actor_loss=0.2474 critic_loss=136757398062.5455 entropy=17.6396 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 59400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-299734.9 mean_steps=15.8
|
|
[Episode 59410] reward=-120971759.4 actor_loss=0.3161 critic_loss=137549713167.0588 entropy=17.6347 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 59420] reward=-118079331.8 actor_loss=0.2889 critic_loss=148522497117.0909 entropy=17.6342 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 59420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533941.8 mean_steps=14.4
|
|
[Episode 59430] reward=-114328914.1 actor_loss=0.2216 critic_loss=127768459673.6000 entropy=17.6345 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 59440] reward=-118184082.2 actor_loss=0.2502 critic_loss=139299723855.6444 entropy=17.6251 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 59440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413140.3 mean_steps=15.5
|
|
[Episode 59450] reward=-121420664.7 actor_loss=0.1967 critic_loss=142303286613.3333 entropy=17.6308 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 59460] reward=-122413305.9 actor_loss=0.2207 critic_loss=141451013828.9231 entropy=17.6345 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 59460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490916.8 mean_steps=14.1
|
|
[Episode 59470] reward=-118045723.2 actor_loss=0.2987 critic_loss=131636874444.8000 entropy=17.6298 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 59480] reward=-114477685.8 actor_loss=0.3306 critic_loss=126535249464.8889 entropy=17.6246 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 59480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-598848.6 mean_steps=13.6
|
|
[Episode 59490] reward=-115406107.9 actor_loss=0.3729 critic_loss=130714462297.0435 entropy=17.6148 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 59500] reward=-114459802.1 actor_loss=0.3817 critic_loss=129578342507.7895 entropy=17.6191 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 59500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-581585.5 mean_steps=11.8
|
|
[Episode 59510] reward=-119991867.5 actor_loss=0.2303 critic_loss=144359738936.8889 entropy=17.6226 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 59520] reward=-119466246.9 actor_loss=0.2874 critic_loss=131170013696.0000 entropy=17.6421 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 59520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-616080.3 mean_steps=14.0
|
|
[Episode 59530] reward=-115548663.0 actor_loss=0.4017 critic_loss=150336507084.8000 entropy=17.6454 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 59540] reward=-119582614.6 actor_loss=0.2990 critic_loss=149817672499.2000 entropy=17.6497 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 59540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525812.9 mean_steps=13.6
|
|
[Episode 59550] reward=-119829669.3 actor_loss=0.3077 critic_loss=143523029854.3158 entropy=17.6508 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 59560] reward=-116318503.3 actor_loss=0.3308 critic_loss=131742516838.4000 entropy=17.6471 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 59560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-579292.3 mean_steps=12.2
|
|
[Episode 59570] reward=-120988812.4 actor_loss=0.2829 critic_loss=143732770257.4546 entropy=17.6534 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 59580] reward=-121580304.2 actor_loss=0.2264 critic_loss=141078051498.6667 entropy=17.6537 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 59580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474084.7 mean_steps=14.9
|
|
[Episode 59590] reward=-119652208.7 actor_loss=0.2594 critic_loss=137149601382.4000 entropy=17.6516 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 59600] reward=-122754007.7 actor_loss=0.2866 critic_loss=142739508480.0000 entropy=17.6635 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 59600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516678.4 mean_steps=14.4
|
|
[Episode 59610] reward=-116527916.5 actor_loss=0.1630 critic_loss=128449894107.4286 entropy=17.6578 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 59620] reward=-117312310.4 actor_loss=0.2921 critic_loss=135459296870.4000 entropy=17.6566 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 59620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545526.1 mean_steps=13.3
|
|
[Episode 59630] reward=-118082282.9 actor_loss=0.2626 critic_loss=131871854787.0476 entropy=17.6626 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 59640] reward=-121601318.2 actor_loss=0.2310 critic_loss=143167558778.8800 entropy=17.6671 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 59640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401045.3 mean_steps=15.2
|
|
[Episode 59650] reward=-121143203.6 actor_loss=0.3567 critic_loss=140163199171.0476 entropy=17.6821 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 59660] reward=-115055712.3 actor_loss=0.3215 critic_loss=131878690304.0000 entropy=17.6791 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 59660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397760.2 mean_steps=15.5
|
|
[Episode 59670] reward=-117206459.9 actor_loss=0.3070 critic_loss=137689002803.2000 entropy=17.6777 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 59680] reward=-118009401.6 actor_loss=0.3667 critic_loss=139175691059.2000 entropy=17.6727 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 59680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578733.3 mean_steps=12.7
|
|
[Episode 59690] reward=-120460409.4 actor_loss=0.3281 critic_loss=140066122698.1053 entropy=17.6783 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 59700] reward=-118825529.9 actor_loss=0.2591 critic_loss=138314708650.6667 entropy=17.6966 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 59700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385348.7 mean_steps=16.6
|
|
[Episode 59710] reward=-122458925.5 actor_loss=0.2076 critic_loss=138635276146.7586 entropy=17.7026 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 59720] reward=-117100698.7 actor_loss=0.3270 critic_loss=130755350660.1290 entropy=17.6900 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 59720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489093.6 mean_steps=14.4
|
|
[Episode 59730] reward=-117594456.4 actor_loss=0.3179 critic_loss=130417841796.7407 entropy=17.6991 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 59740] reward=-118405963.1 actor_loss=0.3072 critic_loss=134472709213.0909 entropy=17.7056 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 59740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-362320.0 mean_steps=17.1
|
|
[Episode 59750] reward=-119488970.3 actor_loss=0.2538 critic_loss=139929640329.8462 entropy=17.7003 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 59760] reward=-118618584.7 actor_loss=0.2985 critic_loss=134815365650.9630 entropy=17.6950 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 59760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614496.6 mean_steps=13.3
|
|
[Episode 59770] reward=-113128254.3 actor_loss=0.2639 critic_loss=129184335872.0000 entropy=17.6964 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 59780] reward=-113454552.2 actor_loss=0.3087 critic_loss=129335753491.6923 entropy=17.6914 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 59780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525246.1 mean_steps=13.5
|
|
[Episode 59790] reward=-116321195.8 actor_loss=0.2872 critic_loss=130130434315.1304 entropy=17.7178 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 59800] reward=-111794059.9 actor_loss=0.3592 critic_loss=127970540800.0000 entropy=17.7112 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 59800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-554058.4 mean_steps=14.2
|
|
[Episode 59810] reward=-119204728.9 actor_loss=0.3239 critic_loss=137559525320.6487 entropy=17.7165 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 59820] reward=-121336629.7 actor_loss=0.2744 critic_loss=183680898194.2857 entropy=17.7322 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 59820] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-165714.2 mean_steps=19.1
|
|
[Episode 59830] reward=-118546107.3 actor_loss=0.3797 critic_loss=135688295219.2000 entropy=17.7319 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 59840] reward=-122222991.4 actor_loss=0.2659 critic_loss=142309735725.1765 entropy=17.7199 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 59840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515988.0 mean_steps=14.5
|
|
[Episode 59850] reward=-116587230.4 actor_loss=0.3543 critic_loss=135813929005.5111 entropy=17.7224 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 59860] reward=-113502462.0 actor_loss=0.3416 critic_loss=134090893044.8696 entropy=17.7221 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 59860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-677193.9 mean_steps=13.3
|
|
[Episode 59870] reward=-118666243.5 actor_loss=0.2869 critic_loss=132124711204.5714 entropy=17.7302 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 59880] reward=-119492543.0 actor_loss=0.2557 critic_loss=177412243876.1026 entropy=17.7249 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 59880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-650180.3 mean_steps=12.4
|
|
[Episode 59890] reward=-125228708.5 actor_loss=0.2322 critic_loss=144064276070.4000 entropy=17.7076 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 59900] reward=-125037554.5 actor_loss=0.2926 critic_loss=228799501276.6897 entropy=17.7116 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 59900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442268.9 mean_steps=15.0
|
|
[Episode 59910] reward=-124372128.4 actor_loss=0.2032 critic_loss=167129121938.2857 entropy=17.7223 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 59920] reward=-123230351.4 actor_loss=0.2526 critic_loss=197038838813.2571 entropy=17.7163 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 59920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518503.8 mean_steps=14.8
|
|
[Episode 59930] reward=-118579942.8 actor_loss=0.3351 critic_loss=139088652760.6154 entropy=17.7121 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 59940] reward=-118525644.3 actor_loss=0.4364 critic_loss=137520081826.9091 entropy=17.7049 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 59940] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-237719.0 mean_steps=19.6
|
|
[Episode 59950] reward=-119981285.8 actor_loss=0.3895 critic_loss=139766616064.0000 entropy=17.7008 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 59960] reward=-118801886.9 actor_loss=0.2986 critic_loss=139607576908.1081 entropy=17.6911 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 59960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515533.1 mean_steps=13.7
|
|
[Episode 59970] reward=-115990080.6 actor_loss=0.2415 critic_loss=137384458285.5111 entropy=17.6852 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 59980] reward=-110841602.7 actor_loss=0.3840 critic_loss=134829186715.8261 entropy=17.6869 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 59980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-620260.3 mean_steps=13.2
|
|
[Episode 59990] reward=-121414215.4 actor_loss=0.3668 critic_loss=142942524038.7368 entropy=17.6843 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 60000] reward=-116205870.3 actor_loss=0.2762 critic_loss=133668310016.0000 entropy=17.6663 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 60000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600600.5 mean_steps=13.0
|
|
[Episode 60010] reward=-118295277.4 actor_loss=0.3982 critic_loss=150744385438.4762 entropy=17.6635 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 60020] reward=-124510794.1 actor_loss=0.2281 critic_loss=155906869540.5714 entropy=17.6723 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 60020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450415.3 mean_steps=14.9
|
|
[Episode 60030] reward=-115159211.8 actor_loss=0.2923 critic_loss=132461176508.6316 entropy=17.6716 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 60040] reward=-117954248.8 actor_loss=0.2713 critic_loss=135029066714.0741 entropy=17.6818 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 60040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471226.8 mean_steps=15.0
|
|
[Episode 60050] reward=-121269665.5 actor_loss=0.2780 critic_loss=163226230393.9048 entropy=17.6931 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 60060] reward=-108820460.8 actor_loss=0.4758 critic_loss=138484636402.5263 entropy=17.6998 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 60060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463522.1 mean_steps=13.9
|
|
[Episode 60070] reward=-118099559.7 actor_loss=0.2716 critic_loss=137942368737.8824 entropy=17.6961 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 60080] reward=-117733620.9 actor_loss=0.2382 critic_loss=137193278013.4400 entropy=17.7053 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 60080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458812.3 mean_steps=14.8
|
|
[Episode 60090] reward=-115183582.9 actor_loss=0.3489 critic_loss=131766889910.8571 entropy=17.7005 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 60100] reward=-116950088.7 actor_loss=0.3058 critic_loss=136199051866.3529 entropy=17.7025 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 60100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535395.0 mean_steps=14.4
|
|
[Episode 60110] reward=-122780454.2 actor_loss=0.2879 critic_loss=142809644714.6667 entropy=17.7101 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 60120] reward=-124510845.2 actor_loss=0.2722 critic_loss=144322730251.1304 entropy=17.7057 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 60120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461425.9 mean_steps=13.9
|
|
[Episode 60130] reward=-112508657.2 actor_loss=0.3164 critic_loss=131086187763.8095 entropy=17.7014 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 60140] reward=-120207481.6 actor_loss=0.3252 critic_loss=151434746880.0000 entropy=17.7014 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 60140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524698.4 mean_steps=14.1
|
|
[Episode 60150] reward=-120463550.1 actor_loss=0.2222 critic_loss=132673933019.4286 entropy=17.7087 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 60160] reward=-125042206.6 actor_loss=0.2197 critic_loss=154725303485.6296 entropy=17.7101 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 60160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-408296.4 mean_steps=17.6
|
|
[Episode 60170] reward=-119378244.2 actor_loss=0.3175 critic_loss=137646376667.4286 entropy=17.7064 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 60180] reward=-116302638.1 actor_loss=0.3579 critic_loss=134975613190.5641 entropy=17.6990 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 60180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575936.7 mean_steps=12.8
|
|
[Episode 60190] reward=-119577906.0 actor_loss=0.2603 critic_loss=138768080183.6522 entropy=17.7100 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 60200] reward=-118110436.3 actor_loss=0.4130 critic_loss=132380718694.4000 entropy=17.7119 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 60200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579373.9 mean_steps=12.8
|
|
[Episode 60210] reward=-112914132.3 actor_loss=0.2949 critic_loss=139336043958.8571 entropy=17.7026 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 60220] reward=-116961203.4 actor_loss=0.1653 critic_loss=131377474608.7619 entropy=17.7029 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 60220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-486468.1 mean_steps=16.1
|
|
[Episode 60230] reward=-117321148.1 actor_loss=0.3470 critic_loss=127936192993.8824 entropy=17.7060 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 60240] reward=-126562430.4 actor_loss=0.2639 critic_loss=146179629641.1429 entropy=17.6946 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 60240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-564138.3 mean_steps=12.8
|
|
[Episode 60250] reward=-116113010.0 actor_loss=0.4338 critic_loss=129615303826.2857 entropy=17.7056 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 60260] reward=-121845093.2 actor_loss=0.2887 critic_loss=146196367360.0000 entropy=17.7056 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 60260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506561.7 mean_steps=13.2
|
|
[Episode 60270] reward=-119112410.5 actor_loss=0.3640 critic_loss=134984600877.1765 entropy=17.7030 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 60280] reward=-121021628.2 actor_loss=0.3272 critic_loss=139755080523.2941 entropy=17.7038 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 60280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-673551.2 mean_steps=12.0
|
|
[Episode 60290] reward=-118525983.5 actor_loss=0.2964 critic_loss=139481261707.6364 entropy=17.7122 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 60300] reward=-113803505.3 actor_loss=0.3160 critic_loss=131043503542.8571 entropy=17.7116 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 60300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-545001.6 mean_steps=15.6
|
|
[Episode 60310] reward=-124117460.4 actor_loss=0.3553 critic_loss=145031293771.2941 entropy=17.7131 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 60320] reward=-121055925.6 actor_loss=0.3178 critic_loss=140486353481.1429 entropy=17.7081 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 60320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477973.8 mean_steps=14.1
|
|
[Episode 60330] reward=-121133052.9 actor_loss=0.3634 critic_loss=140715045104.9412 entropy=17.7124 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 60340] reward=-117179515.4 actor_loss=0.3436 critic_loss=134870294016.0000 entropy=17.7146 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 60340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623810.6 mean_steps=13.2
|
|
[Episode 60350] reward=-120223144.7 actor_loss=0.3381 critic_loss=137897202483.2000 entropy=17.7117 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 60360] reward=-125004526.5 actor_loss=0.2053 critic_loss=204336552813.7143 entropy=17.7129 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 60360] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-334441.8 mean_steps=17.1
|
|
[Episode 60370] reward=-110397634.5 actor_loss=0.3909 critic_loss=130555171962.8800 entropy=17.7127 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 60380] reward=-123031467.9 actor_loss=0.2586 critic_loss=396638318413.9130 entropy=17.6980 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 60380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-351453.8 mean_steps=17.1
|
|
[Episode 60390] reward=-120841712.9 actor_loss=0.2917 critic_loss=144985158836.7059 entropy=17.6952 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 60400] reward=-116085387.0 actor_loss=0.3413 critic_loss=131990901009.0667 entropy=17.6942 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 60400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-629856.0 mean_steps=13.7
|
|
[Episode 60410] reward=-121099070.0 actor_loss=0.2622 critic_loss=148487535001.6000 entropy=17.7003 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 60420] reward=-117840831.5 actor_loss=0.2080 critic_loss=132720425642.6667 entropy=17.6981 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 60420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573027.0 mean_steps=13.8
|
|
[Episode 60430] reward=-117764792.8 actor_loss=0.3274 critic_loss=129976795136.0000 entropy=17.7034 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 60440] reward=-122670498.5 actor_loss=0.2507 critic_loss=140818091520.0000 entropy=17.7055 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 60440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528067.2 mean_steps=13.4
|
|
[Episode 60450] reward=-116667295.5 actor_loss=0.3209 critic_loss=135488932274.4242 entropy=17.7048 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 60460] reward=-118267441.4 actor_loss=0.2487 critic_loss=137477310600.5333 entropy=17.7021 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 60460] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-317627.8 mean_steps=17.8
|
|
[Episode 60470] reward=-120892143.4 actor_loss=0.3059 critic_loss=136194506187.0345 entropy=17.6978 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 60480] reward=-116854834.6 actor_loss=0.2980 critic_loss=140896387072.0000 entropy=17.7052 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 60480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553070.5 mean_steps=13.7
|
|
[Episode 60490] reward=-112616395.8 actor_loss=0.3108 critic_loss=136529126195.2000 entropy=17.6993 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 60500] reward=-118203625.4 actor_loss=0.3378 critic_loss=131710384713.1429 entropy=17.6963 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 60500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543834.3 mean_steps=13.5
|
|
[Episode 60510] reward=-116699732.2 actor_loss=0.3068 critic_loss=135919044900.5714 entropy=17.6911 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 60520] reward=-115742775.9 actor_loss=0.3457 critic_loss=127847958714.1818 entropy=17.6837 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 60520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511647.1 mean_steps=14.4
|
|
[Episode 60530] reward=-123064849.5 actor_loss=0.3058 critic_loss=138911031296.0000 entropy=17.6783 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 60540] reward=-122257587.7 actor_loss=0.2381 critic_loss=156733629484.5217 entropy=17.6830 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 60540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475069.4 mean_steps=15.2
|
|
[Episode 60550] reward=-111628376.8 actor_loss=0.2605 critic_loss=127022746464.7111 entropy=17.6867 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 60560] reward=-119704387.1 actor_loss=0.2371 critic_loss=139641730513.4546 entropy=17.6734 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 60560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510371.9 mean_steps=13.3
|
|
[Episode 60570] reward=-118569408.9 actor_loss=0.2333 critic_loss=134648448614.4000 entropy=17.6728 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 60580] reward=-115921748.8 actor_loss=0.3832 critic_loss=150015723110.4000 entropy=17.6735 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 60580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423560.0 mean_steps=15.6
|
|
[Episode 60590] reward=-115669975.4 actor_loss=0.2967 critic_loss=126335848106.6667 entropy=17.6793 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 60600] reward=-119532607.2 actor_loss=0.3331 critic_loss=134209768561.7778 entropy=17.6806 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 60600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-470342.9 mean_steps=16.1
|
|
[Episode 60610] reward=-116708123.3 actor_loss=0.2794 critic_loss=137864725913.6000 entropy=17.6778 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 60620] reward=-118188980.0 actor_loss=0.2756 critic_loss=129870305962.6667 entropy=17.6767 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 60620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537931.4 mean_steps=14.5
|
|
[Episode 60630] reward=-117168335.5 actor_loss=0.2114 critic_loss=135751493950.5778 entropy=17.6727 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 60640] reward=-116444738.3 actor_loss=0.3332 critic_loss=156900464113.3714 entropy=17.6834 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 60640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-555091.4 mean_steps=14.4
|
|
[Episode 60650] reward=-117684142.2 actor_loss=0.4105 critic_loss=144906415672.8889 entropy=17.7039 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 60660] reward=-120799985.7 actor_loss=0.2591 critic_loss=179217338859.5200 entropy=17.7115 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 60660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-565590.6 mean_steps=14.4
|
|
[Episode 60670] reward=-117569162.9 actor_loss=0.2675 critic_loss=135057463873.6410 entropy=17.7011 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 60680] reward=-115564802.3 actor_loss=0.4245 critic_loss=139366693546.6667 entropy=17.7123 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 60680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-549777.3 mean_steps=15.2
|
|
[Episode 60690] reward=-122180096.3 actor_loss=0.2257 critic_loss=147002225911.1724 entropy=17.7119 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 60700] reward=-120811220.4 actor_loss=0.2695 critic_loss=137579849318.4000 entropy=17.7174 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 60700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555220.5 mean_steps=13.4
|
|
[Episode 60710] reward=-112692398.7 actor_loss=0.3906 critic_loss=130296668615.1111 entropy=17.7187 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 60720] reward=-114028261.3 actor_loss=0.2220 critic_loss=130844345958.4000 entropy=17.7265 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 60720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538895.8 mean_steps=13.4
|
|
[Episode 60730] reward=-117552669.9 actor_loss=0.3144 critic_loss=135393768501.8947 entropy=17.7306 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 60740] reward=-116713819.7 actor_loss=0.3916 critic_loss=131929780224.0000 entropy=17.7462 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 60740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429311.6 mean_steps=16.1
|
|
[Episode 60750] reward=-114396516.8 actor_loss=0.3411 critic_loss=133333399040.0000 entropy=17.7515 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 60760] reward=-121221958.9 actor_loss=0.2512 critic_loss=140564579287.0400 entropy=17.7487 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 60760] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-618189.8 mean_steps=11.3
|
|
[Episode 60770] reward=-120849164.9 actor_loss=0.3092 critic_loss=139729644046.6286 entropy=17.7502 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 60780] reward=-114571151.4 actor_loss=0.3626 critic_loss=135165683957.7600 entropy=17.7467 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 60780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611365.8 mean_steps=12.8
|
|
[Episode 60790] reward=-118266293.7 actor_loss=0.2748 critic_loss=136298255226.4348 entropy=17.7418 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 60800] reward=-114454053.7 actor_loss=0.3890 critic_loss=125489822675.4783 entropy=17.7386 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 60800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568445.5 mean_steps=14.0
|
|
[Episode 60810] reward=-116836070.7 actor_loss=0.2924 critic_loss=133850493337.6000 entropy=17.7381 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 60820] reward=-115373078.3 actor_loss=0.3798 critic_loss=131583648591.4483 entropy=17.7397 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 60820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529547.5 mean_steps=13.4
|
|
[Episode 60830] reward=-113981068.2 actor_loss=0.3520 critic_loss=132941105408.0000 entropy=17.7465 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 60840] reward=-116489820.0 actor_loss=0.2191 critic_loss=132880614238.3158 entropy=17.7487 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 60840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548480.4 mean_steps=14.5
|
|
[Episode 60850] reward=-115835919.9 actor_loss=0.2900 critic_loss=133096770379.2941 entropy=17.7316 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 60860] reward=-118305993.4 actor_loss=0.3308 critic_loss=133042466360.8889 entropy=17.7241 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 60860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462681.6 mean_steps=16.1
|
|
[Episode 60870] reward=-122279871.1 actor_loss=0.1966 critic_loss=139656216029.8667 entropy=17.7191 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 60880] reward=-125292159.5 actor_loss=0.1477 critic_loss=143420329164.8000 entropy=17.7147 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 60880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500424.1 mean_steps=13.4
|
|
[Episode 60890] reward=-112732309.6 actor_loss=0.2318 critic_loss=124401959367.1111 entropy=17.7007 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 60900] reward=-121847840.4 actor_loss=0.2868 critic_loss=152120954424.8889 entropy=17.6862 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 60900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493514.1 mean_steps=14.3
|
|
[Episode 60910] reward=-123066900.9 actor_loss=0.3947 critic_loss=364318733956.7407 entropy=17.6817 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 60920] reward=-120344089.7 actor_loss=0.2333 critic_loss=135915875474.2857 entropy=17.6799 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 60920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-502986.8 mean_steps=12.2
|
|
[Episode 60930] reward=-116472960.0 actor_loss=0.3503 critic_loss=131191854080.0000 entropy=17.6824 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 60940] reward=-119224392.2 actor_loss=0.2933 critic_loss=134006621866.6667 entropy=17.6817 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 60940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394291.3 mean_steps=16.6
|
|
[Episode 60950] reward=-115686593.8 actor_loss=0.4324 critic_loss=133210444310.2609 entropy=17.6945 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 60960] reward=-116729161.9 actor_loss=0.3656 critic_loss=136023723401.8462 entropy=17.6907 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 60960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-490164.6 mean_steps=13.4
|
|
[Episode 60970] reward=-118979562.3 actor_loss=0.2411 critic_loss=135727025307.1515 entropy=17.7020 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 60980] reward=-116008804.4 actor_loss=0.1849 critic_loss=130063786894.2222 entropy=17.7004 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 60980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592406.1 mean_steps=14.0
|
|
[Episode 60990] reward=-120954480.4 actor_loss=0.2303 critic_loss=138465819614.9677 entropy=17.6931 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 61000] reward=-118646677.8 actor_loss=0.2269 critic_loss=131998886934.7556 entropy=17.6943 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 61000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396313.4 mean_steps=15.6
|
|
[Episode 61010] reward=-115990916.8 actor_loss=0.3565 critic_loss=166697781127.5294 entropy=17.6798 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 61020] reward=-120242035.1 actor_loss=0.2642 critic_loss=148209756754.5807 entropy=17.6765 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 61020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580288.5 mean_steps=12.8
|
|
[Episode 61030] reward=-117698511.6 actor_loss=0.2562 critic_loss=144600821225.7391 entropy=17.6853 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 61040] reward=-115858136.4 actor_loss=0.2985 critic_loss=131223978530.1333 entropy=17.6849 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 61040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538782.1 mean_steps=14.7
|
|
[Episode 61050] reward=-114340541.1 actor_loss=0.3390 critic_loss=133001796219.5862 entropy=17.7004 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 61060] reward=-120746438.7 actor_loss=0.2751 critic_loss=139149523778.3704 entropy=17.6978 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 61060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-392883.0 mean_steps=17.4
|
|
[Episode 61070] reward=-124491787.3 actor_loss=0.2236 critic_loss=145127852526.3448 entropy=17.6875 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 61080] reward=-120011843.2 actor_loss=0.2797 critic_loss=133785759470.9333 entropy=17.6950 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 61080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-600705.4 mean_steps=13.8
|
|
[Episode 61090] reward=-116245226.8 actor_loss=0.3263 critic_loss=134148516522.6667 entropy=17.7133 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 61100] reward=-114901521.9 actor_loss=0.2800 critic_loss=127922654776.8889 entropy=17.7148 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 61100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604035.0 mean_steps=12.9
|
|
[Episode 61110] reward=-119059668.2 actor_loss=0.3218 critic_loss=136479553693.5385 entropy=17.7238 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 61120] reward=-124492400.9 actor_loss=0.3629 critic_loss=148027491728.6956 entropy=17.7201 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 61120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506608.9 mean_steps=14.3
|
|
[Episode 61130] reward=-122767880.9 actor_loss=0.2881 critic_loss=140231148944.6956 entropy=17.7119 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 61140] reward=-116586726.5 actor_loss=0.2974 critic_loss=130974628378.9474 entropy=17.7113 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 61140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399903.9 mean_steps=15.2
|
|
[Episode 61150] reward=-118437758.2 actor_loss=0.2979 critic_loss=138281425498.3529 entropy=17.6996 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 61160] reward=-120702001.7 actor_loss=0.3542 critic_loss=171698760557.7143 entropy=17.6986 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 61160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491389.6 mean_steps=14.0
|
|
[Episode 61170] reward=-115890515.2 actor_loss=0.4157 critic_loss=130125592985.6000 entropy=17.7069 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 61180] reward=-118934462.0 actor_loss=0.2702 critic_loss=134414740684.8000 entropy=17.7109 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 61180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560797.8 mean_steps=12.4
|
|
[Episode 61190] reward=-119555852.5 actor_loss=0.2827 critic_loss=134777038661.8182 entropy=17.7158 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 61200] reward=-117272889.2 actor_loss=0.2897 critic_loss=151285738154.6667 entropy=17.7164 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 61200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516909.5 mean_steps=13.5
|
|
[Episode 61210] reward=-122508936.2 actor_loss=0.2762 critic_loss=142567485952.0000 entropy=17.7278 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 61220] reward=-113178711.2 actor_loss=0.3085 critic_loss=130115118836.8696 entropy=17.7284 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 61220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554308.0 mean_steps=13.6
|
|
[Episode 61230] reward=-122850893.9 actor_loss=0.2053 critic_loss=138512074484.8696 entropy=17.7291 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 61240] reward=-120231544.8 actor_loss=0.2842 critic_loss=137480739439.3044 entropy=17.7279 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 61240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485191.0 mean_steps=14.9
|
|
[Episode 61250] reward=-123337361.4 actor_loss=0.2452 critic_loss=146897833728.0000 entropy=17.7403 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 61260] reward=-115206679.3 actor_loss=0.4047 critic_loss=129119073484.8000 entropy=17.7444 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 61260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570233.9 mean_steps=13.7
|
|
[Episode 61270] reward=-123944568.2 actor_loss=0.1689 critic_loss=141522393770.6667 entropy=17.7520 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 61280] reward=-118841055.5 actor_loss=0.3206 critic_loss=132113609728.0000 entropy=17.7486 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 61280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445028.0 mean_steps=15.6
|
|
[Episode 61290] reward=-118336658.7 actor_loss=0.3054 critic_loss=134728905765.9259 entropy=17.7496 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 61300] reward=-117258935.0 actor_loss=0.3745 critic_loss=132976403602.2857 entropy=17.7562 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 61300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594231.5 mean_steps=12.7
|
|
[Episode 61310] reward=-121194725.0 actor_loss=0.2120 critic_loss=140758482703.0588 entropy=17.7566 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 61320] reward=-114300687.8 actor_loss=0.2738 critic_loss=130667790020.9231 entropy=17.7536 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 61320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554179.5 mean_steps=13.1
|
|
[Episode 61330] reward=-124954595.4 actor_loss=0.2170 critic_loss=140955221625.9048 entropy=17.7683 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 61340] reward=-120781579.0 actor_loss=0.2386 critic_loss=152597171404.8000 entropy=17.7533 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 61340] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-299394.6 mean_steps=17.9
|
|
[Episode 61350] reward=-116843921.4 actor_loss=0.3184 critic_loss=141013793792.0000 entropy=17.7530 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 61360] reward=-120670856.0 actor_loss=0.2759 critic_loss=181718653458.9630 entropy=17.7517 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 61360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423398.8 mean_steps=15.2
|
|
[Episode 61370] reward=-117297766.4 actor_loss=0.3131 critic_loss=135672245899.6364 entropy=17.7464 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 61380] reward=-119764730.3 actor_loss=0.2981 critic_loss=138677984870.4000 entropy=17.7339 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 61380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391062.8 mean_steps=16.1
|
|
[Episode 61390] reward=-115161428.8 actor_loss=0.2051 critic_loss=131114910302.8148 entropy=17.7426 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 61400] reward=-116186034.9 actor_loss=0.2402 critic_loss=166837449679.2381 entropy=17.7434 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 61400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-486135.4 mean_steps=12.2
|
|
[Episode 61410] reward=-121607779.3 actor_loss=0.2778 critic_loss=190456432851.8621 entropy=17.7270 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 61420] reward=-115568498.0 actor_loss=0.3097 critic_loss=132120067549.8667 entropy=17.7047 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 61420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577905.7 mean_steps=12.7
|
|
[Episode 61430] reward=-122143092.2 actor_loss=0.2085 critic_loss=137132604809.8462 entropy=17.6992 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 61440] reward=-115285914.5 actor_loss=0.2864 critic_loss=150784131387.0769 entropy=17.7213 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 61440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-627808.6 mean_steps=12.8
|
|
[Episode 61450] reward=-124650695.5 actor_loss=0.2491 critic_loss=141900749141.3333 entropy=17.7423 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 61460] reward=-121491674.0 actor_loss=0.2059 critic_loss=138447210086.4000 entropy=17.7526 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 61460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532844.9 mean_steps=14.2
|
|
[Episode 61470] reward=-115769725.9 actor_loss=0.2973 critic_loss=131529263786.6667 entropy=17.7223 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 61480] reward=-119456678.2 actor_loss=0.3087 critic_loss=135555648534.7556 entropy=17.7176 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 61480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415330.9 mean_steps=15.8
|
|
[Episode 61490] reward=-118237062.5 actor_loss=0.2591 critic_loss=129766941302.1538 entropy=17.7192 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 61500] reward=-118424730.0 actor_loss=0.3096 critic_loss=135357006994.2857 entropy=17.7226 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 61500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479386.3 mean_steps=14.9
|
|
[Episode 61510] reward=-116201148.1 actor_loss=0.3035 critic_loss=130394904868.5714 entropy=17.7210 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 61520] reward=-121569242.4 actor_loss=0.3125 critic_loss=138764822766.1395 entropy=17.7252 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 61520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465206.7 mean_steps=15.2
|
|
[Episode 61530] reward=-124380229.7 actor_loss=0.2941 critic_loss=141591883403.6364 entropy=17.7259 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 61540] reward=-120708052.1 actor_loss=0.3398 critic_loss=134477972120.2162 entropy=17.7218 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 61540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-577031.8 mean_steps=11.8
|
|
[Episode 61550] reward=-116076295.4 actor_loss=0.2397 critic_loss=131921544260.2667 entropy=17.7246 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 61560] reward=-119378129.3 actor_loss=0.2718 critic_loss=137223183391.0303 entropy=17.7245 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 61560] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-647530.2 mean_steps=11.4
|
|
[Episode 61570] reward=-124631328.3 actor_loss=0.2692 critic_loss=159117898898.2857 entropy=17.7272 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 61580] reward=-150572556.1 actor_loss=0.2531 critic_loss=2290616412842.6665 entropy=17.7305 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 61580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-567446.2 mean_steps=14.7
|
|
[Episode 61590] reward=-128053644.2 actor_loss=0.2888 critic_loss=254471130489.2632 entropy=17.7221 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 61600] reward=-122628545.2 actor_loss=0.3598 critic_loss=145462379479.0400 entropy=17.7127 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 61600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467331.8 mean_steps=14.8
|
|
[Episode 61610] reward=-118333061.5 actor_loss=0.3579 critic_loss=139723670216.3478 entropy=17.7241 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 61620] reward=-131094536.9 actor_loss=0.2902 critic_loss=306091104326.6207 entropy=17.7507 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 61620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-338564.5 mean_steps=16.8
|
|
[Episode 61630] reward=-2752214961.5 actor_loss=7.3433 critic_loss=14256240802893368.0000 entropy=17.7628 approx_kl=0.0044 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 61640] reward=-17673759781.5 actor_loss=0.1550 critic_loss=276612710663681920.0000 entropy=17.7772 approx_kl=0.0042 kl_stop=0 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 61640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523930.4 mean_steps=14.1
|
|
[Episode 61650] reward=-125844067.3 actor_loss=0.2711 critic_loss=165307891712.0000 entropy=17.7810 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 61660] reward=-122380555.1 actor_loss=0.2292 critic_loss=142758861391.6444 entropy=17.7758 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 61660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580627.9 mean_steps=12.7
|
|
[Episode 61670] reward=-109858673.0 actor_loss=0.3047 critic_loss=129378319109.6889 entropy=17.7975 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 61680] reward=-116921070.4 actor_loss=0.3482 critic_loss=133785526072.1951 entropy=17.7895 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 61680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-671030.8 mean_steps=13.3
|
|
[Episode 61690] reward=-120168295.3 actor_loss=0.2751 critic_loss=139042907750.4000 entropy=17.8021 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 61700] reward=-124487254.5 actor_loss=0.3021 critic_loss=726111217254.4000 entropy=17.7858 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 61700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-532493.5 mean_steps=14.9
|
|
[Episode 61710] reward=-117026244.0 actor_loss=0.3669 critic_loss=145394198869.3333 entropy=17.7719 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 61720] reward=-5643877896.6 actor_loss=34.4141 critic_loss=25688776094348380.0000 entropy=17.7892 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 61720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471149.2 mean_steps=14.8
|
|
[Episode 61730] reward=-144176439.9 actor_loss=5.8535 critic_loss=2608169418752.0000 entropy=17.8019 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 61740] reward=-11842682335.5 actor_loss=94.4212 critic_loss=101849713427699024.0000 entropy=17.8141 approx_kl=0.0334 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 61740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511168.6 mean_steps=15.2
|
|
[Episode 61750] reward=-120968802.2 actor_loss=0.3199 critic_loss=148573396523.8857 entropy=17.8171 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 61760] reward=-118720717.8 actor_loss=0.3343 critic_loss=135731297211.7333 entropy=17.8050 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 61760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-313573.7 mean_steps=15.9
|
|
[Episode 61770] reward=-169876739.3 actor_loss=0.3651 critic_loss=9493463602153.2441 entropy=17.8032 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 61780] reward=-368894095.1 actor_loss=0.2259 critic_loss=166860149233345.4375 entropy=17.8236 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 61780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449367.3 mean_steps=15.1
|
|
[Episode 61790] reward=-133258575.8 actor_loss=0.1794 critic_loss=450554004255.2195 entropy=17.8468 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 61800] reward=-130666279.4 actor_loss=0.3847 critic_loss=1461475192923.0222 entropy=17.8472 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 61800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467185.6 mean_steps=14.8
|
|
[Episode 61810] reward=-131913964.9 actor_loss=0.3137 critic_loss=754533756928.0000 entropy=17.8489 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 61820] reward=-124680888.4 actor_loss=0.3762 critic_loss=143356984433.7778 entropy=17.8404 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 61820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-654908.0 mean_steps=11.9
|
|
[Episode 61830] reward=-120905948.7 actor_loss=0.3155 critic_loss=285904956341.0732 entropy=17.8394 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 61840] reward=-115403856.9 actor_loss=0.3591 critic_loss=131049519149.5111 entropy=17.8378 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 61840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507348.5 mean_steps=13.2
|
|
[Episode 61850] reward=-121827613.7 actor_loss=0.2402 critic_loss=137619750183.8222 entropy=17.8250 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 61860] reward=-120271492.6 actor_loss=0.2537 critic_loss=135138629524.2105 entropy=17.8190 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 61860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512129.6 mean_steps=14.2
|
|
[Episode 61870] reward=-120735440.1 actor_loss=0.2917 critic_loss=137863017995.3778 entropy=17.8276 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 61880] reward=-119728444.0 actor_loss=0.3003 critic_loss=138207321019.7333 entropy=17.8087 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 61880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-446805.1 mean_steps=16.8
|
|
[Episode 61890] reward=-136136536.7 actor_loss=0.2780 critic_loss=836977861591.0400 entropy=17.8133 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 61900] reward=-122993780.3 actor_loss=0.4024 critic_loss=145640015579.4286 entropy=17.8063 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 61900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-636192.8 mean_steps=12.2
|
|
[Episode 61910] reward=-119054429.3 actor_loss=0.2387 critic_loss=133987182182.4000 entropy=17.7961 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 61920] reward=-121572272.9 actor_loss=0.2169 critic_loss=141604447653.6471 entropy=17.7950 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 61920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552546.0 mean_steps=13.7
|
|
[Episode 61930] reward=-126187236.7 actor_loss=0.3505 critic_loss=329885075456.0000 entropy=17.7973 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 61940] reward=-120270296.1 actor_loss=0.2833 critic_loss=135355926260.8696 entropy=17.7914 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 61940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479591.1 mean_steps=15.6
|
|
[Episode 61950] reward=-115422414.7 actor_loss=0.2828 critic_loss=131099326936.6154 entropy=17.7941 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 61960] reward=-115768583.6 actor_loss=0.3411 critic_loss=136084255744.0000 entropy=17.8003 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 61960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499995.7 mean_steps=14.6
|
|
[Episode 61970] reward=-158286199.5 actor_loss=0.2945 critic_loss=6516727611392.0000 entropy=17.7967 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 61980] reward=-141126027.2 actor_loss=0.3228 critic_loss=2235334912318.5776 entropy=17.8020 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 61980] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-263383.1 mean_steps=17.7
|
|
[Episode 61990] reward=-215323655.3 actor_loss=0.2111 critic_loss=31386719480126.5781 entropy=17.8263 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 62000] reward=-118849861.6 actor_loss=0.2765 critic_loss=139030182297.6000 entropy=17.8427 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 62000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-679342.0 mean_steps=11.7
|
|
[Episode 62010] reward=-273168823.6 actor_loss=0.2725 critic_loss=73551807262082.8438 entropy=17.8386 approx_kl=-0.0006 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 62020] reward=-134866896.3 actor_loss=0.2922 critic_loss=1169569076090.4348 entropy=17.8703 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 62020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520011.7 mean_steps=14.2
|
|
[Episode 62030] reward=-115252672.1 actor_loss=0.3595 critic_loss=128734712003.0476 entropy=17.8744 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 62040] reward=-113166576.3 actor_loss=0.3310 critic_loss=151848092818.2857 entropy=17.8776 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 62040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498117.7 mean_steps=14.9
|
|
[Episode 62050] reward=-116054481.5 actor_loss=0.2654 critic_loss=132667352064.0000 entropy=17.8636 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 62060] reward=-122858659.6 actor_loss=0.3439 critic_loss=138522818883.3684 entropy=17.8565 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 62060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613074.4 mean_steps=13.2
|
|
[Episode 62070] reward=-122607192.3 actor_loss=0.2050 critic_loss=141229930359.4667 entropy=17.8554 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 62080] reward=-117104474.2 actor_loss=0.2656 critic_loss=135516233728.0000 entropy=17.8413 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 62080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471436.7 mean_steps=14.2
|
|
[Episode 62090] reward=-122411720.6 actor_loss=0.2920 critic_loss=158031842816.0000 entropy=17.8315 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 62100] reward=-121435826.3 actor_loss=0.2698 critic_loss=142827684750.2222 entropy=17.8326 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 62100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590587.1 mean_steps=13.3
|
|
[Episode 62110] reward=-120738517.0 actor_loss=0.2622 critic_loss=139728876690.2857 entropy=17.8249 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 62120] reward=-120867132.6 actor_loss=0.2141 critic_loss=146034729704.7273 entropy=17.8258 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 62120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492361.0 mean_steps=14.2
|
|
[Episode 62130] reward=-120532567.5 actor_loss=0.2949 critic_loss=136666039149.7143 entropy=17.8127 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 62140] reward=-120462702.4 actor_loss=0.3002 critic_loss=135836914781.0909 entropy=17.8045 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 62140] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-661388.1 mean_steps=11.3
|
|
[Episode 62150] reward=-120282001.3 actor_loss=0.3391 critic_loss=143784866019.5555 entropy=17.7909 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 62160] reward=-122598132.5 actor_loss=0.2820 critic_loss=141884663001.2121 entropy=17.7895 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 62160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589836.5 mean_steps=12.7
|
|
[Episode 62170] reward=-118976068.5 actor_loss=0.2323 critic_loss=132822396446.1176 entropy=17.7664 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 62180] reward=-122435125.2 actor_loss=0.2342 critic_loss=136197972178.0513 entropy=17.7614 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 62180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541793.2 mean_steps=14.2
|
|
[Episode 62190] reward=-120626510.5 actor_loss=0.2789 critic_loss=169737928704.0000 entropy=17.7564 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 62200] reward=-111116015.1 actor_loss=0.3677 critic_loss=128829764767.2889 entropy=17.7588 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 62200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465217.8 mean_steps=14.8
|
|
[Episode 62210] reward=-123161659.6 actor_loss=0.1721 critic_loss=140370896169.2903 entropy=17.7503 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 62220] reward=-114796713.2 actor_loss=0.1546 critic_loss=130609323667.9111 entropy=17.7531 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 62220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471842.6 mean_steps=15.3
|
|
[Episode 62230] reward=-116035937.2 actor_loss=0.3002 critic_loss=132421764710.4000 entropy=17.7429 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 62240] reward=-118111995.1 actor_loss=0.2005 critic_loss=135934588973.5111 entropy=17.7404 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 62240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642169.9 mean_steps=13.0
|
|
[Episode 62250] reward=-121001238.6 actor_loss=0.3110 critic_loss=141272298030.5454 entropy=17.7499 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 62260] reward=-114930447.6 actor_loss=0.3146 critic_loss=127677904119.1724 entropy=17.7430 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 62260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-388927.8 mean_steps=16.2
|
|
[Episode 62270] reward=-120077168.7 actor_loss=0.3938 critic_loss=146540335718.4000 entropy=17.7484 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 62280] reward=-119601692.1 actor_loss=0.2816 critic_loss=137836946152.7273 entropy=17.7371 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 62280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514918.5 mean_steps=14.2
|
|
[Episode 62290] reward=-121551582.5 actor_loss=0.2801 critic_loss=132815487332.1739 entropy=17.7264 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 62300] reward=-118788917.3 actor_loss=0.4325 critic_loss=138921383377.4546 entropy=17.7276 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 62300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550316.8 mean_steps=14.3
|
|
[Episode 62310] reward=-118223417.8 actor_loss=0.3341 critic_loss=136902270429.8667 entropy=17.7281 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 62320] reward=-117866220.0 actor_loss=0.3070 critic_loss=133532891818.6667 entropy=17.7346 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 62320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626567.2 mean_steps=11.8
|
|
[Episode 62330] reward=-121974213.4 actor_loss=0.2573 critic_loss=143561343795.2000 entropy=17.7369 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 62340] reward=-116772476.0 actor_loss=0.3703 critic_loss=137723485115.7333 entropy=17.7323 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 62340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584782.5 mean_steps=12.8
|
|
[Episode 62350] reward=-187792932.2 actor_loss=0.2520 critic_loss=13761464447795.1992 entropy=17.7131 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 62360] reward=-125212062.6 actor_loss=0.1764 critic_loss=145004174230.0690 entropy=17.7165 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 62360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378341.7 mean_steps=15.9
|
|
[Episode 62370] reward=-117790567.1 actor_loss=0.2571 critic_loss=131984805165.1765 entropy=17.7219 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 62380] reward=-112712847.6 actor_loss=0.4013 critic_loss=124270325668.9778 entropy=17.7304 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 62380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-451392.2 mean_steps=16.6
|
|
[Episode 62390] reward=-118479245.6 actor_loss=0.2977 critic_loss=137714037145.6000 entropy=17.7268 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 62400] reward=-129634290.1 actor_loss=0.3282 critic_loss=786940318418.8235 entropy=17.7193 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 62400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461577.0 mean_steps=14.8
|
|
[Episode 62410] reward=-118228075.2 actor_loss=0.2308 critic_loss=135626708309.3333 entropy=17.7163 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 62420] reward=-118759716.9 actor_loss=0.2211 critic_loss=133775747915.2941 entropy=17.7088 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 62420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571962.7 mean_steps=13.6
|
|
[Episode 62430] reward=-118891268.4 actor_loss=0.2694 critic_loss=137395226214.4000 entropy=17.7090 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 62440] reward=-119620499.5 actor_loss=0.2894 critic_loss=140227026124.8000 entropy=17.7004 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 62440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503646.3 mean_steps=15.1
|
|
[Episode 62450] reward=-120063829.0 actor_loss=0.2935 critic_loss=141101092119.2727 entropy=17.7058 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 62460] reward=-122516329.6 actor_loss=0.3282 critic_loss=150470875428.5714 entropy=17.7161 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 62460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-599699.8 mean_steps=11.9
|
|
[Episode 62470] reward=-120919035.6 actor_loss=0.2961 critic_loss=140821271645.0909 entropy=17.7051 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 62480] reward=-121554548.9 actor_loss=0.2463 critic_loss=134726838272.0000 entropy=17.7086 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 62480] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-673765.1 mean_steps=11.6
|
|
[Episode 62490] reward=-113222170.1 actor_loss=0.3790 critic_loss=139301965965.2414 entropy=17.7085 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 62500] reward=-117431208.4 actor_loss=0.1944 critic_loss=130810176545.0323 entropy=17.7082 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 62500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-397514.4 mean_steps=14.6
|
|
[Episode 62510] reward=-118733283.3 actor_loss=0.2264 critic_loss=131006377369.6000 entropy=17.7060 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 62520] reward=-116602292.5 actor_loss=0.3223 critic_loss=129959436852.9655 entropy=17.7051 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 62520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537668.4 mean_steps=14.2
|
|
[Episode 62530] reward=-122616153.2 actor_loss=0.3004 critic_loss=140983684042.1053 entropy=17.7031 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 62540] reward=-121605082.7 actor_loss=0.1787 critic_loss=139151117516.8000 entropy=17.6995 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 62540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507907.0 mean_steps=14.3
|
|
[Episode 62550] reward=-115277209.2 actor_loss=0.2472 critic_loss=130451975372.8000 entropy=17.6817 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 62560] reward=-117782581.1 actor_loss=0.4066 critic_loss=132514344313.2632 entropy=17.6889 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 62560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-383845.0 mean_steps=14.2
|
|
[Episode 62570] reward=-126201634.3 actor_loss=0.2321 critic_loss=169446100718.9333 entropy=17.6869 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 62580] reward=-117801117.8 actor_loss=0.3916 critic_loss=160212514726.9565 entropy=17.6722 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 62580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498031.0 mean_steps=14.2
|
|
[Episode 62590] reward=-121205127.2 actor_loss=0.4062 critic_loss=141834396818.2857 entropy=17.6780 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 62600] reward=-119280196.8 actor_loss=0.2883 critic_loss=136002285275.4286 entropy=17.6778 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 62600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558576.8 mean_steps=13.7
|
|
[Episode 62610] reward=-125205213.0 actor_loss=0.2326 critic_loss=173735332522.6667 entropy=17.6894 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 62620] reward=-118340553.4 actor_loss=0.2050 critic_loss=132491887047.1111 entropy=17.6975 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 62620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498877.1 mean_steps=15.1
|
|
[Episode 62630] reward=-121432233.9 actor_loss=0.2357 critic_loss=141785798097.4546 entropy=17.6831 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 62640] reward=-114433459.8 actor_loss=0.2619 critic_loss=128870359040.0000 entropy=17.6721 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 62640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439661.7 mean_steps=15.3
|
|
[Episode 62650] reward=-120332332.1 actor_loss=0.1715 critic_loss=136666625024.0000 entropy=17.6720 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 62660] reward=-121151947.3 actor_loss=0.2963 critic_loss=143937677498.1818 entropy=17.6891 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 62660] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-251637.1 mean_steps=17.9
|
|
[Episode 62670] reward=-116857321.9 actor_loss=0.3369 critic_loss=133931051495.6190 entropy=17.6842 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 62680] reward=-113002401.4 actor_loss=0.3701 critic_loss=126874331709.4400 entropy=17.6601 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 62680] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-690702.0 mean_steps=11.2
|
|
[Episode 62690] reward=-120541615.0 actor_loss=0.1865 critic_loss=135547839311.4483 entropy=17.6604 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 62700] reward=-118763911.7 actor_loss=0.2483 critic_loss=151374562918.4000 entropy=17.6718 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 62700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419377.2 mean_steps=15.1
|
|
[Episode 62710] reward=-120596160.9 actor_loss=0.2356 critic_loss=136883952128.0000 entropy=17.6930 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 62720] reward=-116051565.0 actor_loss=0.3847 critic_loss=130638292894.4762 entropy=17.6820 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 62720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-426965.5 mean_steps=13.4
|
|
[Episode 62730] reward=-121931231.5 actor_loss=0.2827 critic_loss=139069770020.5714 entropy=17.6735 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 62740] reward=-119774766.1 actor_loss=0.2320 critic_loss=134582765410.4615 entropy=17.6740 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 62740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516225.5 mean_steps=14.2
|
|
[Episode 62750] reward=-125007925.9 actor_loss=0.2526 critic_loss=179642281038.7692 entropy=17.6756 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 62760] reward=-121825566.6 actor_loss=0.2860 critic_loss=134584086528.0000 entropy=17.6840 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 62760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433392.4 mean_steps=15.7
|
|
[Episode 62770] reward=-138812657.4 actor_loss=0.2719 critic_loss=1345836748800.0000 entropy=17.6706 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 62780] reward=-124919760.2 actor_loss=0.2801 critic_loss=146390979470.2222 entropy=17.6769 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 62780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-601002.9 mean_steps=12.1
|
|
[Episode 62790] reward=-114749289.5 actor_loss=0.3989 critic_loss=126778493758.2703 entropy=17.6891 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 62800] reward=-119586767.1 actor_loss=0.1326 critic_loss=134074306969.6000 entropy=17.6877 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 62800] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-648536.6 mean_steps=10.4
|
|
[Episode 62810] reward=-122160156.1 actor_loss=0.2879 critic_loss=190908259669.3333 entropy=17.6839 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 62820] reward=-121446668.6 actor_loss=0.2639 critic_loss=133470505026.0645 entropy=17.6773 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 62820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524468.9 mean_steps=13.7
|
|
[Episode 62830] reward=-118431369.9 actor_loss=0.3190 critic_loss=134429883105.2800 entropy=17.6776 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 62840] reward=-117293228.0 actor_loss=0.2107 critic_loss=134984633150.2703 entropy=17.6780 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 62840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432696.0 mean_steps=16.6
|
|
[Episode 62850] reward=-119799655.8 actor_loss=0.2375 critic_loss=134841545350.7368 entropy=17.6831 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 62860] reward=-123925214.7 actor_loss=0.1918 critic_loss=138347553560.7742 entropy=17.6924 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 62860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453319.8 mean_steps=14.6
|
|
[Episode 62870] reward=-122149682.0 actor_loss=0.2527 critic_loss=152153844121.6000 entropy=17.6844 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 62880] reward=-119551634.4 actor_loss=0.2870 critic_loss=139074395769.9048 entropy=17.6724 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 62880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-639807.0 mean_steps=13.3
|
|
[Episode 62890] reward=-116420614.8 actor_loss=0.2266 critic_loss=136983123533.5758 entropy=17.6638 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 62900] reward=-115990137.9 actor_loss=0.2489 critic_loss=130384787298.4615 entropy=17.6550 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 62900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-625305.1 mean_steps=14.6
|
|
[Episode 62910] reward=-117996855.9 actor_loss=0.2228 critic_loss=133652093155.5556 entropy=17.6459 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 62920] reward=-112159985.0 actor_loss=0.2607 critic_loss=125943137385.9310 entropy=17.6401 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 62920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-386707.5 mean_steps=15.3
|
|
[Episode 62930] reward=-118614177.7 actor_loss=0.2210 critic_loss=131043640206.2222 entropy=17.6155 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 62940] reward=-117380089.1 actor_loss=0.3155 critic_loss=135617615028.7059 entropy=17.6157 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 62940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427196.8 mean_steps=15.4
|
|
[Episode 62950] reward=-117778779.2 actor_loss=0.2535 critic_loss=127418638700.0889 entropy=17.6069 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 62960] reward=-117221960.2 actor_loss=0.1754 critic_loss=128991933053.1555 entropy=17.5999 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 62960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418371.4 mean_steps=15.3
|
|
[Episode 62970] reward=-117822662.7 actor_loss=0.2404 critic_loss=132436371817.4118 entropy=17.6012 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 62980] reward=-115065485.8 actor_loss=0.3425 critic_loss=129401134375.8222 entropy=17.6085 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 62980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-615435.9 mean_steps=10.8
|
|
[Episode 62990] reward=-122258525.5 actor_loss=0.2650 critic_loss=138296978809.2632 entropy=17.5975 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 63000] reward=-115695989.3 actor_loss=0.2285 critic_loss=131092214761.2444 entropy=17.6059 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 63000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592158.7 mean_steps=12.8
|
|
[Episode 63010] reward=-120134020.7 actor_loss=0.2915 critic_loss=133653135360.0000 entropy=17.6027 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 63020] reward=-115198255.4 actor_loss=0.2807 critic_loss=131785414519.4667 entropy=17.5956 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 63020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536011.6 mean_steps=14.2
|
|
[Episode 63030] reward=-120261527.3 actor_loss=0.3253 critic_loss=143255997402.0741 entropy=17.5852 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 63040] reward=-117665850.2 actor_loss=0.2201 critic_loss=132907609916.9524 entropy=17.5947 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 63040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459990.3 mean_steps=14.7
|
|
[Episode 63050] reward=-117694792.4 actor_loss=0.2999 critic_loss=135262362062.4516 entropy=17.5881 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 63060] reward=-123403803.5 actor_loss=0.3176 critic_loss=138988156245.3333 entropy=17.5902 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 63060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642787.3 mean_steps=11.8
|
|
[Episode 63070] reward=-121613737.9 actor_loss=0.2467 critic_loss=133610366020.2667 entropy=17.5907 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 63080] reward=-116187087.2 actor_loss=0.3379 critic_loss=128484353092.2667 entropy=17.5887 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 63080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-411227.7 mean_steps=14.4
|
|
[Episode 63090] reward=-118671936.9 actor_loss=0.2350 critic_loss=128079102186.0571 entropy=17.5806 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 63100] reward=-119246649.2 actor_loss=0.3480 critic_loss=129776175968.7111 entropy=17.5860 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 63100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-499424.3 mean_steps=16.3
|
|
[Episode 63110] reward=-118453246.3 actor_loss=0.2857 critic_loss=134624074051.3684 entropy=17.5831 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 63120] reward=-115932943.3 actor_loss=0.2979 critic_loss=127422957410.4615 entropy=17.5731 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 63120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561948.0 mean_steps=13.4
|
|
[Episode 63130] reward=-120865876.3 actor_loss=0.2790 critic_loss=135579952014.2222 entropy=17.5720 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 63140] reward=-115510012.0 actor_loss=0.2937 critic_loss=129708484421.8182 entropy=17.5696 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 63140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477607.7 mean_steps=14.7
|
|
[Episode 63150] reward=-115601337.7 actor_loss=0.3183 critic_loss=126361981650.8235 entropy=17.5682 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 63160] reward=-120846651.8 actor_loss=0.3314 critic_loss=136364195840.0000 entropy=17.5644 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 63160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457297.1 mean_steps=15.9
|
|
[Episode 63170] reward=-115946798.5 actor_loss=0.3180 critic_loss=134888874639.3600 entropy=17.5735 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 63180] reward=-117201338.0 actor_loss=0.3436 critic_loss=129192423765.3333 entropy=17.5692 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 63180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572750.8 mean_steps=12.6
|
|
[Episode 63190] reward=-122953333.8 actor_loss=0.1960 critic_loss=137713077248.0000 entropy=17.5624 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 63200] reward=-121048916.8 actor_loss=0.2800 critic_loss=135260434913.8824 entropy=17.5532 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 63200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580344.6 mean_steps=12.8
|
|
[Episode 63210] reward=-123563979.8 actor_loss=0.2345 critic_loss=136845347802.0741 entropy=17.5542 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 63220] reward=-116832674.1 actor_loss=0.2637 critic_loss=130230030576.9412 entropy=17.5566 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 63220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363530.5 mean_steps=15.9
|
|
[Episode 63230] reward=-122140203.8 actor_loss=0.2472 critic_loss=136009620138.6667 entropy=17.5562 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 63240] reward=-114559722.5 actor_loss=0.3269 critic_loss=126954576851.4783 entropy=17.5599 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 63240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-355990.2 mean_steps=16.7
|
|
[Episode 63250] reward=-116895096.8 actor_loss=0.3116 critic_loss=128972051154.8235 entropy=17.5556 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 63260] reward=-117154590.0 actor_loss=0.2619 critic_loss=129548329740.1905 entropy=17.5601 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 63260] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-757412.7 mean_steps=11.0
|
|
[Episode 63270] reward=-118642700.2 actor_loss=0.3312 critic_loss=130963361336.8889 entropy=17.5473 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 63280] reward=-117621521.6 actor_loss=0.2914 critic_loss=126984398740.2105 entropy=17.5519 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 63280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439555.0 mean_steps=15.9
|
|
[Episode 63290] reward=-116382015.5 actor_loss=0.3347 critic_loss=126482693051.7333 entropy=17.5575 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 63300] reward=-117986914.0 actor_loss=0.2625 critic_loss=134987593652.1481 entropy=17.5514 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 63300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516055.8 mean_steps=14.2
|
|
[Episode 63310] reward=-125045389.9 actor_loss=0.2185 critic_loss=138738613840.8421 entropy=17.5525 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 63320] reward=-118139530.7 actor_loss=0.3186 critic_loss=127201634798.3448 entropy=17.5577 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 63320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482655.9 mean_steps=14.8
|
|
[Episode 63330] reward=-111282069.0 actor_loss=0.2314 critic_loss=122632187793.2973 entropy=17.5570 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 63340] reward=-122038036.6 actor_loss=0.3499 critic_loss=135098945945.6000 entropy=17.5613 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 63340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-336033.9 mean_steps=16.9
|
|
[Episode 63350] reward=-114807849.2 actor_loss=0.2467 critic_loss=129665548509.4054 entropy=17.5669 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 63360] reward=-121579412.1 actor_loss=0.2331 critic_loss=139407969848.8889 entropy=17.5656 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 63360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398398.8 mean_steps=16.2
|
|
[Episode 63370] reward=-120018038.5 actor_loss=0.2267 critic_loss=133515621717.3333 entropy=17.5641 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 63380] reward=-117217586.2 actor_loss=0.3634 critic_loss=131899538432.0000 entropy=17.5666 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 63380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-555824.8 mean_steps=14.3
|
|
[Episode 63390] reward=-117219522.0 actor_loss=0.2270 critic_loss=135352872667.4286 entropy=17.5595 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 63400] reward=-116088868.0 actor_loss=0.3118 critic_loss=126537338060.8000 entropy=17.5515 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 63400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385360.2 mean_steps=16.4
|
|
[Episode 63410] reward=-117482300.0 actor_loss=0.2747 critic_loss=129814363249.7778 entropy=17.5531 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 63420] reward=-116865147.0 actor_loss=0.3295 critic_loss=131327849881.6000 entropy=17.5667 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 63420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361017.7 mean_steps=16.7
|
|
[Episode 63430] reward=-119067061.2 actor_loss=0.3530 critic_loss=135837682005.3333 entropy=17.5607 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 63440] reward=-119781309.3 actor_loss=0.2261 critic_loss=136454162311.5294 entropy=17.5639 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 63440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455215.0 mean_steps=15.8
|
|
[Episode 63450] reward=-115726839.5 actor_loss=0.3369 critic_loss=125393306770.2857 entropy=17.5768 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 63460] reward=-119733302.9 actor_loss=0.2453 critic_loss=133020378908.4444 entropy=17.5731 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 63460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485283.0 mean_steps=14.4
|
|
[Episode 63470] reward=-113529944.6 actor_loss=0.3318 critic_loss=128722934547.6923 entropy=17.5787 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 63480] reward=-122217827.3 actor_loss=0.2279 critic_loss=136704775509.3333 entropy=17.5827 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 63480] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-659955.6 mean_steps=11.5
|
|
[Episode 63490] reward=-114003750.0 actor_loss=0.2528 critic_loss=129821987328.0000 entropy=17.5879 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 63500] reward=-115902150.2 actor_loss=0.3085 critic_loss=127087088253.1555 entropy=17.5722 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 63500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-683253.4 mean_steps=12.7
|
|
[Episode 63510] reward=-122217216.5 actor_loss=0.2777 critic_loss=135638871667.6129 entropy=17.5711 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 63520] reward=-122225495.7 actor_loss=0.2725 critic_loss=137182897664.0000 entropy=17.5624 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 63520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351956.9 mean_steps=16.1
|
|
[Episode 63530] reward=-120420745.2 actor_loss=0.2948 critic_loss=134888267093.3333 entropy=17.5626 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 63540] reward=-123324090.1 actor_loss=0.3741 critic_loss=140756823341.1765 entropy=17.5735 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 63540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503789.1 mean_steps=14.2
|
|
[Episode 63550] reward=-119063706.0 actor_loss=0.3029 critic_loss=137280294539.6364 entropy=17.5737 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 63560] reward=-120347982.3 actor_loss=0.3446 critic_loss=139528871389.8667 entropy=17.5919 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 63560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-352074.7 mean_steps=16.2
|
|
[Episode 63570] reward=-108668013.0 actor_loss=0.3944 critic_loss=123645503806.5778 entropy=17.5970 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 63580] reward=-117646742.0 actor_loss=0.3037 critic_loss=129560887113.9556 entropy=17.6078 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 63580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570471.2 mean_steps=13.7
|
|
[Episode 63590] reward=-118558519.7 actor_loss=0.2591 critic_loss=134116664064.0000 entropy=17.6092 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 63600] reward=-113346611.5 actor_loss=0.3683 critic_loss=124934550141.1555 entropy=17.6099 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 63600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422104.1 mean_steps=16.8
|
|
[Episode 63610] reward=-120122609.1 actor_loss=0.2296 critic_loss=137302960030.4762 entropy=17.5960 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 63620] reward=-114386875.8 actor_loss=0.3028 critic_loss=129843948690.2857 entropy=17.6064 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 63620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515085.4 mean_steps=13.3
|
|
[Episode 63630] reward=-116796047.4 actor_loss=0.2633 critic_loss=133327001061.0526 entropy=17.6148 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 63640] reward=-119009163.9 actor_loss=0.3269 critic_loss=135183017808.4571 entropy=17.6146 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 63640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466639.8 mean_steps=14.7
|
|
[Episode 63650] reward=-119916934.2 actor_loss=0.2898 critic_loss=134105831287.4667 entropy=17.6240 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 63660] reward=-117403820.6 actor_loss=0.3675 critic_loss=134065046134.1538 entropy=17.6183 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 63660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367332.9 mean_steps=16.1
|
|
[Episode 63670] reward=-120545066.4 actor_loss=0.2432 critic_loss=135793990769.7778 entropy=17.6286 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 63680] reward=-120777765.3 actor_loss=0.2622 critic_loss=134945201927.7576 entropy=17.6228 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 63680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641775.3 mean_steps=12.2
|
|
[Episode 63690] reward=-124756954.9 actor_loss=0.4271 critic_loss=148751933796.1739 entropy=17.6096 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 63700] reward=-125283036.1 actor_loss=0.3236 critic_loss=172630680917.3333 entropy=17.6033 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 63700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573261.2 mean_steps=13.6
|
|
[Episode 63710] reward=-124396972.5 actor_loss=0.2660 critic_loss=142730215424.0000 entropy=17.6015 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 63720] reward=-124262777.6 actor_loss=0.2313 critic_loss=142130172723.2000 entropy=17.6103 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 63720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540864.5 mean_steps=13.9
|
|
[Episode 63730] reward=-124131677.3 actor_loss=0.2964 critic_loss=139733590016.0000 entropy=17.5935 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 63740] reward=-113160200.1 actor_loss=0.2610 critic_loss=121941626880.0000 entropy=17.5832 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 63740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-605296.3 mean_steps=12.8
|
|
[Episode 63750] reward=-122930804.0 actor_loss=0.3589 critic_loss=180515334826.6667 entropy=17.5846 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 63760] reward=-121104404.1 actor_loss=0.2419 critic_loss=139434563253.6774 entropy=17.5953 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 63760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548251.6 mean_steps=14.5
|
|
[Episode 63770] reward=-126912107.4 actor_loss=0.1839 critic_loss=287656396312.3810 entropy=17.6027 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 63780] reward=-120903004.7 actor_loss=0.2328 critic_loss=138568838795.6364 entropy=17.5938 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 63780] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-339132.9 mean_steps=17.1
|
|
[Episode 63790] reward=-118170102.4 actor_loss=0.2785 critic_loss=228815771260.5405 entropy=17.5970 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 63800] reward=-119404331.8 actor_loss=0.3523 critic_loss=190415963340.8000 entropy=17.6027 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 63800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474493.0 mean_steps=14.8
|
|
[Episode 63810] reward=-118495485.7 actor_loss=0.3235 critic_loss=127836768548.5714 entropy=17.6048 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 63820] reward=-116076574.8 actor_loss=0.2586 critic_loss=130550351280.3556 entropy=17.6153 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 63820] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-340056.7 mean_steps=17.6
|
|
[Episode 63830] reward=-121683436.9 actor_loss=0.3179 critic_loss=136802898739.2000 entropy=17.6131 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 63840] reward=-116147470.3 actor_loss=0.2894 critic_loss=131423831440.6956 entropy=17.6148 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 63840] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-650655.0 mean_steps=11.4
|
|
[Episode 63850] reward=-116613099.1 actor_loss=0.2591 critic_loss=144859635712.0000 entropy=17.6243 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 63860] reward=-122331782.8 actor_loss=0.2189 critic_loss=134084762130.9630 entropy=17.6249 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 63860] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-680460.2 mean_steps=11.3
|
|
[Episode 63870] reward=-118475977.3 actor_loss=0.3476 critic_loss=134478746246.7368 entropy=17.6263 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 63880] reward=-122170714.7 actor_loss=0.3357 critic_loss=138406025352.5333 entropy=17.6301 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 63880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536188.1 mean_steps=13.3
|
|
[Episode 63890] reward=-116920247.4 actor_loss=0.4455 critic_loss=133474830745.6000 entropy=17.6397 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 63900] reward=-115611627.7 actor_loss=0.3045 critic_loss=128462691222.0690 entropy=17.6232 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 63900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587157.1 mean_steps=12.4
|
|
[Episode 63910] reward=-113795820.9 actor_loss=0.2495 critic_loss=134663707209.1429 entropy=17.6158 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 63920] reward=-114162798.8 actor_loss=0.3094 critic_loss=125891094430.4762 entropy=17.6166 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 63920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528053.1 mean_steps=14.2
|
|
[Episode 63930] reward=-121246706.4 actor_loss=0.2371 critic_loss=140714758144.0000 entropy=17.6250 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 63940] reward=-119294464.9 actor_loss=0.2787 critic_loss=139864020582.4000 entropy=17.6300 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 63940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-343704.4 mean_steps=15.7
|
|
[Episode 63950] reward=-118252670.9 actor_loss=0.4025 critic_loss=132026929737.1429 entropy=17.6364 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 63960] reward=-120708245.1 actor_loss=0.1951 critic_loss=139999282412.3077 entropy=17.6375 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 63960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506551.9 mean_steps=13.3
|
|
[Episode 63970] reward=-113660561.2 actor_loss=0.2887 critic_loss=129195490742.8571 entropy=17.6351 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 63980] reward=-120027560.8 actor_loss=0.3384 critic_loss=139802554709.3333 entropy=17.6432 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 63980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572953.4 mean_steps=13.5
|
|
[Episode 63990] reward=-119834881.6 actor_loss=0.2949 critic_loss=139287563650.8445 entropy=17.6522 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 64000] reward=-116316410.4 actor_loss=0.3593 critic_loss=127116179828.3636 entropy=17.6547 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 64000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470631.0 mean_steps=14.1
|
|
[Episode 64010] reward=-117407000.5 actor_loss=0.3280 critic_loss=130393826725.6471 entropy=17.6569 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 64020] reward=-120848552.2 actor_loss=0.2996 critic_loss=142755586048.0000 entropy=17.6550 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 64020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500541.4 mean_steps=14.2
|
|
[Episode 64030] reward=-117552710.6 actor_loss=0.3191 critic_loss=126468064324.2667 entropy=17.6679 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 64040] reward=-119708618.5 actor_loss=0.2942 critic_loss=137428455716.5714 entropy=17.6681 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 64040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444447.6 mean_steps=15.9
|
|
[Episode 64050] reward=-116939300.1 actor_loss=0.2617 critic_loss=130354395233.5238 entropy=17.6778 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 64060] reward=-123284582.8 actor_loss=0.3506 critic_loss=139661919118.2222 entropy=17.6692 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 64060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-686559.9 mean_steps=13.1
|
|
[Episode 64070] reward=-116064057.4 actor_loss=0.2701 critic_loss=135790164650.6667 entropy=17.6563 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 64080] reward=-128261129.9 actor_loss=0.2514 critic_loss=148504658550.1538 entropy=17.6507 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 64080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-689856.1 mean_steps=12.3
|
|
[Episode 64090] reward=-120104438.5 actor_loss=0.3272 critic_loss=135831588141.1765 entropy=17.6457 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 64100] reward=-121292826.9 actor_loss=0.2628 critic_loss=139192344576.0000 entropy=17.6378 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 64100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417744.5 mean_steps=16.5
|
|
[Episode 64110] reward=-121084519.6 actor_loss=0.2289 critic_loss=131376651657.8462 entropy=17.6248 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 64120] reward=-120353857.2 actor_loss=0.1651 critic_loss=135819152261.1200 entropy=17.6265 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 64120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496263.1 mean_steps=15.2
|
|
[Episode 64130] reward=-115860090.7 actor_loss=0.2992 critic_loss=129129201664.0000 entropy=17.6384 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 64140] reward=-117225729.8 actor_loss=0.2954 critic_loss=128757254233.0435 entropy=17.6388 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 64140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583706.3 mean_steps=13.6
|
|
[Episode 64150] reward=-113247246.3 actor_loss=0.3291 critic_loss=122560747520.0000 entropy=17.6411 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 64160] reward=-124049920.6 actor_loss=0.3456 critic_loss=146633400950.1538 entropy=17.6221 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 64160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513483.4 mean_steps=14.1
|
|
[Episode 64170] reward=-118714858.2 actor_loss=0.2042 critic_loss=132727814144.0000 entropy=17.6156 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 64180] reward=-117627767.4 actor_loss=0.2526 critic_loss=132222619122.8718 entropy=17.6210 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 64180] success_rate=0.750 qp_infeasible_rate=0.250 mean_return=-197821.3 mean_steps=20.1
|
|
[Episode 64190] reward=-123145893.9 actor_loss=0.1442 critic_loss=152613921792.0000 entropy=17.6124 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 64200] reward=-115276882.5 actor_loss=0.2012 critic_loss=123124423653.7436 entropy=17.6216 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 64200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566432.3 mean_steps=13.7
|
|
[Episode 64210] reward=-115244317.8 actor_loss=0.3571 critic_loss=141582211364.5714 entropy=17.6302 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 64220] reward=-115186455.8 actor_loss=0.3233 critic_loss=130242463024.4324 entropy=17.6307 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 64220] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-693762.8 mean_steps=10.9
|
|
[Episode 64230] reward=-121944937.6 actor_loss=0.2794 critic_loss=147084560922.9474 entropy=17.6318 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 64240] reward=-117396269.1 actor_loss=0.4113 critic_loss=137906471253.3333 entropy=17.6136 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 64240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418680.4 mean_steps=15.2
|
|
[Episode 64250] reward=-120819827.4 actor_loss=0.2766 critic_loss=132422128201.1429 entropy=17.6252 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 64260] reward=-120962726.4 actor_loss=0.3222 critic_loss=135426863104.0000 entropy=17.6255 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 64260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458966.4 mean_steps=14.6
|
|
[Episode 64270] reward=-123283193.9 actor_loss=0.2704 critic_loss=148126784443.7333 entropy=17.6209 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 64280] reward=-119565970.8 actor_loss=0.3474 critic_loss=139990546022.4000 entropy=17.6217 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 64280] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-290029.9 mean_steps=17.5
|
|
[Episode 64290] reward=-117954143.9 actor_loss=0.3448 critic_loss=130470100336.6400 entropy=17.6409 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 64300] reward=-120570373.5 actor_loss=0.2535 critic_loss=135527431314.2857 entropy=17.6452 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 64300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480131.3 mean_steps=15.1
|
|
[Episode 64310] reward=-119920683.9 actor_loss=0.3139 critic_loss=138134825984.0000 entropy=17.6535 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 64320] reward=-123914019.0 actor_loss=0.2280 critic_loss=138349783197.5385 entropy=17.6558 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 64320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-664528.0 mean_steps=12.5
|
|
[Episode 64330] reward=-119464474.0 actor_loss=0.2355 critic_loss=130172245560.8889 entropy=17.6540 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 64340] reward=-117739361.1 actor_loss=0.4082 critic_loss=129688858142.1176 entropy=17.6597 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 64340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-367792.2 mean_steps=16.9
|
|
[Episode 64350] reward=-119607883.9 actor_loss=0.3403 critic_loss=139074019328.0000 entropy=17.6701 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 64360] reward=-123781430.1 actor_loss=0.3059 critic_loss=147486754588.4445 entropy=17.6686 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 64360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-550627.6 mean_steps=15.6
|
|
[Episode 64370] reward=-119136316.4 actor_loss=0.3050 critic_loss=140243895296.0000 entropy=17.6610 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 64380] reward=-125285945.2 actor_loss=0.2858 critic_loss=162287093602.4615 entropy=17.6698 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 64380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-543071.8 mean_steps=14.2
|
|
[Episode 64390] reward=-124442951.6 actor_loss=0.2405 critic_loss=152162944068.2667 entropy=17.6862 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 64400] reward=-116941669.1 actor_loss=0.3917 critic_loss=130279365808.5517 entropy=17.6902 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 64400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-453837.5 mean_steps=16.4
|
|
[Episode 64410] reward=-118973424.5 actor_loss=0.2505 critic_loss=131248197632.0000 entropy=17.6972 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 64420] reward=-121928421.0 actor_loss=0.3893 critic_loss=144732120333.4737 entropy=17.6924 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 64420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465546.1 mean_steps=15.9
|
|
[Episode 64430] reward=-119876281.1 actor_loss=0.3179 critic_loss=151166163211.1304 entropy=17.6880 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 64440] reward=-114660084.7 actor_loss=0.3082 critic_loss=129082773699.0476 entropy=17.6869 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 64440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417388.5 mean_steps=16.2
|
|
[Episode 64450] reward=-116941621.5 actor_loss=0.3051 critic_loss=132034813321.8462 entropy=17.6905 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 64460] reward=-117805618.9 actor_loss=0.2819 critic_loss=132566958436.1739 entropy=17.6933 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 64460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475960.1 mean_steps=15.2
|
|
[Episode 64470] reward=-117449638.8 actor_loss=0.2669 critic_loss=129322382677.3333 entropy=17.6966 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 64480] reward=-146712005.8 actor_loss=209.5544 critic_loss=4258911873469.2173 entropy=17.6722 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 64480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-469574.3 mean_steps=16.1
|
|
[Episode 64490] reward=-117527504.3 actor_loss=0.3943 critic_loss=127627535360.0000 entropy=17.6728 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 64500] reward=-122740184.8 actor_loss=0.2597 critic_loss=139300682683.7333 entropy=17.6803 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 64500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445705.3 mean_steps=15.5
|
|
[Episode 64510] reward=-117702093.4 actor_loss=0.3628 critic_loss=133261493938.6046 entropy=17.6746 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 64520] reward=-121093285.3 actor_loss=0.2692 critic_loss=134964047747.8788 entropy=17.6670 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 64520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436842.4 mean_steps=15.3
|
|
[Episode 64530] reward=-115426836.6 actor_loss=0.3501 critic_loss=127343196023.4667 entropy=17.6661 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 64540] reward=-114515072.2 actor_loss=0.3542 critic_loss=128107553423.3600 entropy=17.6820 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 64540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551501.5 mean_steps=13.6
|
|
[Episode 64550] reward=-118978870.3 actor_loss=0.3264 critic_loss=129335584540.4444 entropy=17.6846 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 64560] reward=-125017553.0 actor_loss=0.3163 critic_loss=881977796077.0370 entropy=17.6690 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 64560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534352.1 mean_steps=14.4
|
|
[Episode 64570] reward=-115429417.0 actor_loss=0.2603 critic_loss=126481442973.5385 entropy=17.6831 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 64580] reward=-119778308.8 actor_loss=0.2586 critic_loss=131173936956.9524 entropy=17.6850 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 64580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-323431.6 mean_steps=16.2
|
|
[Episode 64590] reward=-118648856.4 actor_loss=0.3736 critic_loss=174037816817.3714 entropy=17.6799 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 64600] reward=-126263879.6 actor_loss=0.2437 critic_loss=142782433689.6000 entropy=17.6938 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 64600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476104.6 mean_steps=14.2
|
|
[Episode 64610] reward=-121448133.2 actor_loss=0.2297 critic_loss=132752311091.2000 entropy=17.6951 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 64620] reward=-123420799.0 actor_loss=0.2652 critic_loss=138563838267.0769 entropy=17.6971 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 64620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-352337.9 mean_steps=15.9
|
|
[Episode 64630] reward=-119111774.6 actor_loss=0.3355 critic_loss=136018185294.7692 entropy=17.6961 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 64640] reward=-122832175.2 actor_loss=0.2146 critic_loss=136072592952.8889 entropy=17.6877 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 64640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437663.4 mean_steps=15.4
|
|
[Episode 64650] reward=-121183858.4 actor_loss=0.2575 critic_loss=134733563580.6316 entropy=17.6843 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 64660] reward=-118243094.0 actor_loss=0.2993 critic_loss=131342070723.7647 entropy=17.6751 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 64660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448397.2 mean_steps=14.8
|
|
[Episode 64670] reward=-121251693.0 actor_loss=0.3638 critic_loss=139528527689.9556 entropy=17.6885 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 64680] reward=-121794229.6 actor_loss=0.2665 critic_loss=140816060006.4000 entropy=17.6876 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 64680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379055.5 mean_steps=16.4
|
|
[Episode 64690] reward=-121584774.4 actor_loss=0.3406 critic_loss=134100899009.7297 entropy=17.6716 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 64700] reward=-118519991.7 actor_loss=0.2079 critic_loss=128889584571.7333 entropy=17.6616 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 64700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-597586.6 mean_steps=12.0
|
|
[Episode 64710] reward=-120299052.7 actor_loss=0.3285 critic_loss=136850839961.6000 entropy=17.6602 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 64720] reward=-119663655.5 actor_loss=0.3878 critic_loss=134908553688.6154 entropy=17.6587 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 64720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-359513.1 mean_steps=15.5
|
|
[Episode 64730] reward=-113810509.7 actor_loss=0.3077 critic_loss=127945358987.6364 entropy=17.6453 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 64740] reward=-119528519.7 actor_loss=0.3067 critic_loss=139142202929.5484 entropy=17.6400 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 64740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349095.5 mean_steps=17.1
|
|
[Episode 64750] reward=-116603829.8 actor_loss=0.3410 critic_loss=130369438671.2381 entropy=17.6457 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 64760] reward=-122904492.5 actor_loss=0.3250 critic_loss=142668203874.4615 entropy=17.6474 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 64760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474834.1 mean_steps=13.7
|
|
[Episode 64770] reward=-120754808.8 actor_loss=0.2246 critic_loss=140100162706.2857 entropy=17.6461 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 64780] reward=-125281731.9 actor_loss=0.2574 critic_loss=144260485575.1111 entropy=17.6500 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 64780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457100.4 mean_steps=15.6
|
|
[Episode 64790] reward=-119397636.3 actor_loss=0.2923 critic_loss=137446623726.3448 entropy=17.6601 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 64800] reward=-121458063.2 actor_loss=0.3465 critic_loss=145958727680.0000 entropy=17.6592 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 64800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446232.8 mean_steps=15.4
|
|
[Episode 64810] reward=-119055254.1 actor_loss=0.2963 critic_loss=130956057088.0000 entropy=17.6458 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 64820] reward=-119530006.7 actor_loss=0.1984 critic_loss=150307865789.6296 entropy=17.6483 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 64820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-428800.3 mean_steps=16.6
|
|
[Episode 64830] reward=-118330748.7 actor_loss=0.3619 critic_loss=133006318985.8462 entropy=17.6551 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 64840] reward=-116552737.4 actor_loss=0.2757 critic_loss=131063128776.3478 entropy=17.6487 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 64840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580552.6 mean_steps=13.7
|
|
[Episode 64850] reward=-121617257.2 actor_loss=0.2347 critic_loss=133961798451.2000 entropy=17.6571 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 64860] reward=-118184643.5 actor_loss=0.3021 critic_loss=128285579520.0000 entropy=17.6497 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 64860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-575871.8 mean_steps=11.8
|
|
[Episode 64870] reward=-117502397.9 actor_loss=0.2932 critic_loss=132077957120.0000 entropy=17.6552 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 64880] reward=-122698658.2 actor_loss=0.2865 critic_loss=136938111348.3636 entropy=17.6484 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 64880] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-264828.9 mean_steps=18.4
|
|
[Episode 64890] reward=-119017704.2 actor_loss=0.2900 critic_loss=131089485289.7391 entropy=17.6615 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 64900] reward=-120902998.6 actor_loss=0.3970 critic_loss=132487340662.1538 entropy=17.6630 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 64900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432548.9 mean_steps=15.4
|
|
[Episode 64910] reward=-121005808.8 actor_loss=0.1712 critic_loss=130614146048.0000 entropy=17.6600 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 64920] reward=-120902879.1 actor_loss=0.3026 critic_loss=138136775065.6000 entropy=17.6561 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 64920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446919.4 mean_steps=14.4
|
|
[Episode 64930] reward=-115083027.9 actor_loss=0.2598 critic_loss=125052502016.0000 entropy=17.6488 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 64940] reward=-120678509.2 actor_loss=0.2909 critic_loss=133587422976.0000 entropy=17.6451 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 64940] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-278818.3 mean_steps=17.6
|
|
[Episode 64950] reward=-122470950.6 actor_loss=0.2204 critic_loss=136213172968.7273 entropy=17.6460 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 64960] reward=-124401164.3 actor_loss=0.3543 critic_loss=133522761045.3333 entropy=17.6543 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 64960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597509.6 mean_steps=13.0
|
|
[Episode 64970] reward=-120984394.1 actor_loss=0.2820 critic_loss=132890831725.7143 entropy=17.6554 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 64980] reward=-117445629.5 actor_loss=0.4255 critic_loss=123663234730.6667 entropy=17.6533 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 64980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563875.6 mean_steps=13.3
|
|
[Episode 64990] reward=-111073673.2 actor_loss=0.4301 critic_loss=128045968976.8421 entropy=17.6433 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 65000] reward=-122990093.2 actor_loss=0.2409 critic_loss=143975859504.4324 entropy=17.6498 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 65000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-343498.1 mean_steps=15.8
|
|
[Episode 65010] reward=-121509981.6 actor_loss=0.3424 critic_loss=135679964137.2444 entropy=17.6501 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 65020] reward=-119781873.0 actor_loss=0.3492 critic_loss=128773850533.6471 entropy=17.6600 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 65020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641115.5 mean_steps=12.4
|
|
[Episode 65030] reward=-118586114.2 actor_loss=0.3289 critic_loss=129740227689.9310 entropy=17.6615 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 65040] reward=-120559325.3 actor_loss=0.3433 critic_loss=137885039616.0000 entropy=17.6622 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 65040] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332715.9 mean_steps=16.5
|
|
[Episode 65050] reward=-116938945.9 actor_loss=0.3558 critic_loss=133078834176.0000 entropy=17.6600 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 65060] reward=-119425412.6 actor_loss=0.2244 critic_loss=130313770449.4545 entropy=17.6688 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 65060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-393465.6 mean_steps=17.3
|
|
[Episode 65070] reward=-116843065.9 actor_loss=0.2744 critic_loss=128464066048.0000 entropy=17.6738 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 65080] reward=-121580546.1 actor_loss=0.2809 critic_loss=134371566023.1111 entropy=17.6735 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 65080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447533.1 mean_steps=15.4
|
|
[Episode 65090] reward=-116405870.5 actor_loss=0.3642 critic_loss=129458433462.8571 entropy=17.6766 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 65100] reward=-120761202.1 actor_loss=0.2788 critic_loss=142239081813.3333 entropy=17.6871 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 65100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-446916.0 mean_steps=16.4
|
|
[Episode 65110] reward=-119583417.5 actor_loss=0.2800 critic_loss=129412513476.9231 entropy=17.6985 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 65120] reward=-117883731.4 actor_loss=0.2487 critic_loss=131812376576.0000 entropy=17.6962 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 65120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-531417.6 mean_steps=15.4
|
|
[Episode 65130] reward=-119524267.8 actor_loss=0.2688 critic_loss=143630311814.0952 entropy=17.6960 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 65140] reward=-118671026.4 actor_loss=0.3078 critic_loss=134405702997.3333 entropy=17.6971 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 65140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383340.8 mean_steps=15.8
|
|
[Episode 65150] reward=-119004920.9 actor_loss=0.2419 critic_loss=132851070293.3333 entropy=17.7072 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 65160] reward=-117503429.5 actor_loss=0.1868 critic_loss=142313406976.0000 entropy=17.7067 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 65160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525506.6 mean_steps=14.1
|
|
[Episode 65170] reward=-113119379.0 actor_loss=0.2587 critic_loss=132127529030.6207 entropy=17.7034 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 65180] reward=-116535869.7 actor_loss=0.3045 critic_loss=130303065526.8571 entropy=17.7022 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 65180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-453628.3 mean_steps=16.6
|
|
[Episode 65190] reward=-120228761.1 actor_loss=0.2853 critic_loss=135045363622.9565 entropy=17.7042 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 65200] reward=-115458632.5 actor_loss=0.2844 critic_loss=131112227157.3333 entropy=17.6919 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 65200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-615518.2 mean_steps=13.7
|
|
[Episode 65210] reward=-112617471.6 actor_loss=0.3199 critic_loss=121335897019.7333 entropy=17.7087 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 65220] reward=-117099388.8 actor_loss=0.3614 critic_loss=127718714314.1053 entropy=17.6977 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 65220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398286.7 mean_steps=16.4
|
|
[Episode 65230] reward=-111725215.9 actor_loss=0.3300 critic_loss=120300637811.6129 entropy=17.6914 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 65240] reward=-117934080.0 actor_loss=0.1975 critic_loss=131642450767.4483 entropy=17.7002 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 65240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476752.3 mean_steps=13.4
|
|
[Episode 65250] reward=-115138891.5 actor_loss=0.3354 critic_loss=123268030464.0000 entropy=17.7079 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 65260] reward=-116896967.0 actor_loss=0.3142 critic_loss=126039641249.6842 entropy=17.7116 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 65260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562327.2 mean_steps=13.4
|
|
[Episode 65270] reward=-122586316.0 actor_loss=0.1765 critic_loss=133923898289.2308 entropy=17.7149 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 65280] reward=-117703447.9 actor_loss=0.2899 critic_loss=128132189184.0000 entropy=17.7167 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 65280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-507227.6 mean_steps=16.1
|
|
[Episode 65290] reward=-118368198.8 actor_loss=0.2236 critic_loss=136247696695.6522 entropy=17.7239 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 65300] reward=-122185797.1 actor_loss=0.2346 critic_loss=135280549432.8889 entropy=17.7322 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 65300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514037.5 mean_steps=14.2
|
|
[Episode 65310] reward=-149319528.8 actor_loss=0.3115 critic_loss=5456997226905.5996 entropy=17.7269 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 65320] reward=-116483996.1 actor_loss=0.3139 critic_loss=137438146136.2759 entropy=17.7309 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 65320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415569.2 mean_steps=16.1
|
|
[Episode 65330] reward=-121312173.2 actor_loss=0.2203 critic_loss=134683228387.5556 entropy=17.7184 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 65340] reward=-122899037.1 actor_loss=0.2898 critic_loss=141635484057.6000 entropy=17.6993 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 65340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-562006.2 mean_steps=14.6
|
|
[Episode 65350] reward=-116715439.2 actor_loss=0.2100 critic_loss=128201559244.8000 entropy=17.6784 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 65360] reward=-117921008.4 actor_loss=0.2582 critic_loss=125896460238.0488 entropy=17.6798 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 65360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419069.4 mean_steps=15.6
|
|
[Episode 65370] reward=-125778488.1 actor_loss=0.3033 critic_loss=140558404864.0000 entropy=17.6768 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 65380] reward=-118144370.4 actor_loss=0.4012 critic_loss=133319511244.8000 entropy=17.6823 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 65380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-622671.8 mean_steps=12.8
|
|
[Episode 65390] reward=-116748017.3 actor_loss=0.2827 critic_loss=128152424174.9333 entropy=17.6854 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 65400] reward=-116510707.6 actor_loss=0.3253 critic_loss=126984743742.2703 entropy=17.6868 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 65400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-531407.2 mean_steps=12.5
|
|
[Episode 65410] reward=-119077410.7 actor_loss=0.3518 critic_loss=133342554521.6000 entropy=17.6890 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 65420] reward=-121758425.6 actor_loss=0.1679 critic_loss=129319052477.6296 entropy=17.6971 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 65420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-345394.5 mean_steps=17.3
|
|
[Episode 65430] reward=-118768316.6 actor_loss=0.2741 critic_loss=130233007786.6667 entropy=17.6826 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 65440] reward=-119056725.0 actor_loss=0.3611 critic_loss=130586242349.1765 entropy=17.6724 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 65440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542375.4 mean_steps=14.1
|
|
[Episode 65450] reward=-116198119.9 actor_loss=0.3831 critic_loss=134876965050.1818 entropy=17.6733 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 65460] reward=-117424856.3 actor_loss=0.2484 critic_loss=127405555712.0000 entropy=17.6775 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 65460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-512982.7 mean_steps=13.2
|
|
[Episode 65470] reward=-129011393.4 actor_loss=0.2284 critic_loss=151864123392.0000 entropy=17.6823 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 65480] reward=-120215482.6 actor_loss=0.1974 critic_loss=130441555968.0000 entropy=17.6769 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 65480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486634.5 mean_steps=13.7
|
|
[Episode 65490] reward=-122646723.4 actor_loss=0.2523 critic_loss=137582618313.6970 entropy=17.6635 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 65500] reward=-122434624.1 actor_loss=0.2499 critic_loss=132866801057.1852 entropy=17.6646 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 65500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-653502.1 mean_steps=12.9
|
|
[Episode 65510] reward=-125630985.6 actor_loss=0.2841 critic_loss=138698238138.1818 entropy=17.6654 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 65520] reward=-115916135.9 actor_loss=0.2522 critic_loss=126816732842.6667 entropy=17.6560 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 65520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538524.7 mean_steps=14.0
|
|
[Episode 65530] reward=-121435826.8 actor_loss=0.2765 critic_loss=129023259136.0000 entropy=17.6554 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 65540] reward=-121561724.2 actor_loss=0.3019 critic_loss=138751512985.6000 entropy=17.6512 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 65540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-411979.3 mean_steps=17.1
|
|
[Episode 65550] reward=-121903580.0 actor_loss=0.2281 critic_loss=133489508352.0000 entropy=17.6509 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 65560] reward=-120795623.0 actor_loss=0.2937 critic_loss=129563693875.2000 entropy=17.6524 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 65560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-488443.7 mean_steps=16.1
|
|
[Episode 65570] reward=-121600450.6 actor_loss=0.2215 critic_loss=133915390464.0000 entropy=17.6479 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 65580] reward=-124294796.4 actor_loss=0.2465 critic_loss=139235232846.7692 entropy=17.6484 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 65580] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-704228.1 mean_steps=11.4
|
|
[Episode 65590] reward=-121911775.1 actor_loss=0.2383 critic_loss=134212250586.0741 entropy=17.6512 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 65600] reward=-120862718.5 actor_loss=0.2504 critic_loss=133275256208.6956 entropy=17.6546 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 65600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481431.5 mean_steps=14.9
|
|
[Episode 65610] reward=-122015910.0 actor_loss=0.2615 critic_loss=134039874522.0741 entropy=17.6601 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 65620] reward=-122260745.5 actor_loss=0.2533 critic_loss=137426109533.0909 entropy=17.6664 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 65620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-382356.3 mean_steps=16.7
|
|
[Episode 65630] reward=-118943455.5 actor_loss=0.3957 critic_loss=131462987776.0000 entropy=17.6530 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 65640] reward=-124427173.2 actor_loss=0.2656 critic_loss=143242312003.3684 entropy=17.6491 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 65640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591527.5 mean_steps=12.9
|
|
[Episode 65650] reward=-116678018.2 actor_loss=0.3511 critic_loss=129000078049.2800 entropy=17.6609 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 65660] reward=-121103470.2 actor_loss=0.2789 critic_loss=139654744064.0000 entropy=17.6555 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 65660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475386.7 mean_steps=14.5
|
|
[Episode 65670] reward=-121355104.2 actor_loss=0.3360 critic_loss=138400331700.1482 entropy=17.6614 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 65680] reward=-124833722.2 actor_loss=0.2468 critic_loss=146261261243.7333 entropy=17.6591 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 65680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-603779.0 mean_steps=13.3
|
|
[Episode 65690] reward=-117456876.3 actor_loss=0.3452 critic_loss=129072002964.2105 entropy=17.6617 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 65700] reward=-118398025.8 actor_loss=0.3511 critic_loss=129557391968.8649 entropy=17.6725 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 65700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612277.9 mean_steps=11.8
|
|
[Episode 65710] reward=-119026559.8 actor_loss=0.3252 critic_loss=130240519099.7333 entropy=17.6818 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 65720] reward=-120419404.4 actor_loss=0.2901 critic_loss=137391616819.2000 entropy=17.6814 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 65720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-626326.5 mean_steps=13.7
|
|
[Episode 65730] reward=-120129107.5 actor_loss=0.2801 critic_loss=135162700868.2667 entropy=17.6858 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 65740] reward=-119240997.9 actor_loss=0.2308 critic_loss=131995111628.8000 entropy=17.6698 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 65740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498993.9 mean_steps=13.7
|
|
[Episode 65750] reward=-121036650.4 actor_loss=0.3115 critic_loss=131975696860.2791 entropy=17.6877 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 65760] reward=-120772703.4 actor_loss=0.2293 critic_loss=137311697669.6889 entropy=17.6812 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 65760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548466.9 mean_steps=13.8
|
|
[Episode 65770] reward=-117438389.6 actor_loss=0.2180 critic_loss=129227171352.3810 entropy=17.6769 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 65780] reward=-115049413.1 actor_loss=0.3376 critic_loss=127934082389.3333 entropy=17.6691 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 65780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602887.1 mean_steps=13.1
|
|
[Episode 65790] reward=-116564628.1 actor_loss=0.4004 critic_loss=345972512692.1481 entropy=17.6647 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 65800] reward=-118331593.0 actor_loss=0.2953 critic_loss=136003131255.4667 entropy=17.6789 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 65800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-668386.8 mean_steps=13.2
|
|
[Episode 65810] reward=-116111552.5 actor_loss=0.2172 critic_loss=128311491671.7714 entropy=17.6691 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 65820] reward=-121457455.9 actor_loss=0.2082 critic_loss=139245454829.0370 entropy=17.6681 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 65820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409136.0 mean_steps=16.4
|
|
[Episode 65830] reward=-117387141.0 actor_loss=0.3126 critic_loss=136770450090.6667 entropy=17.6811 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 65840] reward=-120640014.8 actor_loss=0.2784 critic_loss=132086708633.6000 entropy=17.6885 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 65840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531630.3 mean_steps=14.3
|
|
[Episode 65850] reward=-118364637.7 actor_loss=0.2118 critic_loss=133505334840.8889 entropy=17.6971 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 65860] reward=-114343282.7 actor_loss=0.3050 critic_loss=125225779655.1111 entropy=17.6937 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 65860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624466.0 mean_steps=13.0
|
|
[Episode 65870] reward=-122960973.1 actor_loss=0.2621 critic_loss=260017355310.5454 entropy=17.6824 approx_kl=0.0034 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 65880] reward=-113936926.6 actor_loss=0.3185 critic_loss=126194775927.4667 entropy=17.6831 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 65880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559153.7 mean_steps=13.3
|
|
[Episode 65890] reward=-112019480.3 actor_loss=0.3338 critic_loss=123643947627.1628 entropy=17.6791 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 65900] reward=-116522467.9 actor_loss=0.2727 critic_loss=124861157500.1212 entropy=17.6688 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 65900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600934.0 mean_steps=12.7
|
|
[Episode 65910] reward=-118597344.3 actor_loss=0.3197 critic_loss=131104711475.2000 entropy=17.6732 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 65920] reward=-119328711.8 actor_loss=0.2482 critic_loss=129113974101.3333 entropy=17.6685 approx_kl=0.0045 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 65920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468611.7 mean_steps=14.6
|
|
[Episode 65930] reward=-118697742.5 actor_loss=0.2496 critic_loss=133553043613.5385 entropy=17.6705 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 65940] reward=-111727556.9 actor_loss=0.2823 critic_loss=121381951488.0000 entropy=17.6634 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 65940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416234.2 mean_steps=15.1
|
|
[Episode 65950] reward=-117984103.7 actor_loss=0.3809 critic_loss=131918183082.6667 entropy=17.6585 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 65960] reward=-115129581.5 actor_loss=0.2796 critic_loss=125981964408.4706 entropy=17.6576 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 65960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497057.8 mean_steps=14.0
|
|
[Episode 65970] reward=-118711889.6 actor_loss=0.2724 critic_loss=130647232238.9333 entropy=17.6657 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 65980] reward=-114976200.2 actor_loss=0.2736 critic_loss=125182407168.0000 entropy=17.6442 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 65980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-375473.4 mean_steps=15.2
|
|
[Episode 65990] reward=-119528820.6 actor_loss=0.3201 critic_loss=131947327214.9333 entropy=17.6443 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 66000] reward=-119787900.3 actor_loss=0.2977 critic_loss=136157970432.0000 entropy=17.6593 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 66000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556876.5 mean_steps=13.6
|
|
[Episode 66010] reward=-115493575.5 actor_loss=0.3636 critic_loss=122261530466.4615 entropy=17.6711 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 66020] reward=-116954320.2 actor_loss=0.3200 critic_loss=124860322702.2222 entropy=17.6869 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 66020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-348212.9 mean_steps=15.2
|
|
[Episode 66030] reward=-114035600.4 actor_loss=0.2827 critic_loss=123815317162.6667 entropy=17.6871 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 66040] reward=-123607728.9 actor_loss=0.2768 critic_loss=163817891237.6471 entropy=17.6877 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 66040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567078.1 mean_steps=12.4
|
|
[Episode 66050] reward=-113460349.7 actor_loss=0.3596 critic_loss=138431647232.0000 entropy=17.6931 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 66060] reward=-120777053.6 actor_loss=0.3620 critic_loss=136993672098.9091 entropy=17.6959 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 66060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-395843.2 mean_steps=15.2
|
|
[Episode 66070] reward=-118684028.8 actor_loss=0.2903 critic_loss=127535225514.6667 entropy=17.7091 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 66080] reward=-121396668.8 actor_loss=0.2398 critic_loss=136063535360.0000 entropy=17.7069 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 66080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582939.0 mean_steps=13.0
|
|
[Episode 66090] reward=-120345213.3 actor_loss=0.2623 critic_loss=132100142421.3333 entropy=17.7028 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 66100] reward=-118325627.2 actor_loss=0.1996 critic_loss=129997879471.5429 entropy=17.7066 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 66100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535842.9 mean_steps=13.6
|
|
[Episode 66110] reward=-119862390.1 actor_loss=0.2441 critic_loss=131450242513.4545 entropy=17.7142 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 66120] reward=-117089806.7 actor_loss=0.2364 critic_loss=131850928443.0769 entropy=17.7042 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 66120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-633511.0 mean_steps=12.2
|
|
[Episode 66130] reward=-113686429.3 actor_loss=0.3050 critic_loss=124625185587.2000 entropy=17.7071 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 66140] reward=-121861271.4 actor_loss=0.2579 critic_loss=134810011940.5714 entropy=17.7173 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 66140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501688.0 mean_steps=14.3
|
|
[Episode 66150] reward=-121566531.5 actor_loss=0.2757 critic_loss=139968682861.7143 entropy=17.7196 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 66160] reward=-117888996.9 actor_loss=0.3108 critic_loss=127451881953.8824 entropy=17.7309 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 66160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493944.5 mean_steps=14.7
|
|
[Episode 66170] reward=-114528262.6 actor_loss=0.2964 critic_loss=130214224262.0952 entropy=17.7286 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 66180] reward=-121187134.8 actor_loss=0.2410 critic_loss=131945808271.6098 entropy=17.7258 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 66180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-441992.2 mean_steps=16.1
|
|
[Episode 66190] reward=-116756668.0 actor_loss=0.3234 critic_loss=129971846885.5172 entropy=17.7218 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 66200] reward=-114740975.1 actor_loss=0.3302 critic_loss=129403213141.3333 entropy=17.7149 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 66200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493126.4 mean_steps=13.8
|
|
[Episode 66210] reward=-119999504.6 actor_loss=0.2023 critic_loss=134080630588.9524 entropy=17.7051 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 66220] reward=-121245691.0 actor_loss=0.1860 critic_loss=131531254188.6512 entropy=17.6875 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 66220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-508577.1 mean_steps=15.7
|
|
[Episode 66230] reward=-117779262.5 actor_loss=0.3006 critic_loss=133859395121.5484 entropy=17.6896 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 66240] reward=-119889287.4 actor_loss=0.2769 critic_loss=134218222796.8000 entropy=17.6945 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 66240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540196.0 mean_steps=13.2
|
|
[Episode 66250] reward=-124551502.6 actor_loss=0.2650 critic_loss=143421791759.5151 entropy=17.6860 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 66260] reward=-123018804.7 actor_loss=0.2607 critic_loss=146219263512.3810 entropy=17.6851 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 66260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473737.6 mean_steps=13.9
|
|
[Episode 66270] reward=-119297594.5 actor_loss=0.2448 critic_loss=134174722949.1200 entropy=17.6833 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 66280] reward=-119615306.3 actor_loss=0.1940 critic_loss=130857634838.7556 entropy=17.6940 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 66280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538779.3 mean_steps=13.9
|
|
[Episode 66290] reward=-119466718.6 actor_loss=0.2987 critic_loss=136518851072.0000 entropy=17.6886 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 66300] reward=-116702870.6 actor_loss=0.2564 critic_loss=140366436165.8182 entropy=17.6971 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 66300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548806.4 mean_steps=13.3
|
|
[Episode 66310] reward=-116976388.8 actor_loss=0.2761 critic_loss=127794208533.9429 entropy=17.7147 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 66320] reward=-121077551.7 actor_loss=0.2880 critic_loss=145726057494.7556 entropy=17.7234 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 66320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391780.1 mean_steps=16.3
|
|
[Episode 66330] reward=-113437087.3 actor_loss=0.2491 critic_loss=121513740060.4444 entropy=17.7314 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 66340] reward=-117465122.5 actor_loss=0.2963 critic_loss=137121606190.5455 entropy=17.7242 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 66340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-566839.6 mean_steps=14.3
|
|
[Episode 66350] reward=-121526040.8 actor_loss=0.3008 critic_loss=175189941452.8000 entropy=17.7042 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 66360] reward=-113088754.5 actor_loss=0.2342 critic_loss=120283797504.0000 entropy=17.6863 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 66360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514105.4 mean_steps=14.1
|
|
[Episode 66370] reward=-120375399.2 actor_loss=0.1942 critic_loss=141194135997.2174 entropy=17.6707 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 66380] reward=-120751013.0 actor_loss=0.3506 critic_loss=178060143979.3548 entropy=17.6740 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 66380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-425824.3 mean_steps=17.4
|
|
[Episode 66390] reward=-117226809.2 actor_loss=0.3129 critic_loss=184463062357.3333 entropy=17.6738 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 66400] reward=-114596560.7 actor_loss=0.2821 critic_loss=122716780544.0000 entropy=17.6742 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 66400] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-633271.5 mean_steps=11.3
|
|
[Episode 66410] reward=-114226054.9 actor_loss=0.2614 critic_loss=125143197372.6316 entropy=17.6864 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 66420] reward=-121565188.8 actor_loss=0.2448 critic_loss=137083056713.1429 entropy=17.6893 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 66420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382157.5 mean_steps=16.4
|
|
[Episode 66430] reward=-118231695.8 actor_loss=0.2206 critic_loss=127449574435.3103 entropy=17.6858 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 66440] reward=-120092528.0 actor_loss=0.3457 critic_loss=137753363797.3333 entropy=17.6763 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 66440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-558335.4 mean_steps=11.6
|
|
[Episode 66450] reward=-115824414.4 actor_loss=0.3554 critic_loss=133727680804.5714 entropy=17.6707 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 66460] reward=-116535688.6 actor_loss=0.1881 critic_loss=135603048448.0000 entropy=17.6565 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 66460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-609431.7 mean_steps=12.7
|
|
[Episode 66470] reward=-118795584.2 actor_loss=0.2456 critic_loss=134538897853.2174 entropy=17.6576 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 66480] reward=-120717911.6 actor_loss=0.1268 critic_loss=132322534031.3600 entropy=17.6595 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 66480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-566987.3 mean_steps=14.3
|
|
[Episode 66490] reward=-119454451.4 actor_loss=0.2738 critic_loss=131893815416.4706 entropy=17.6614 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 66500] reward=-122489403.2 actor_loss=0.4135 critic_loss=139546038727.1111 entropy=17.6587 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 66500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542659.2 mean_steps=13.5
|
|
[Episode 66510] reward=-122829020.1 actor_loss=0.2315 critic_loss=192053579535.0588 entropy=17.6417 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 66520] reward=-121037852.0 actor_loss=0.3400 critic_loss=133023801636.5714 entropy=17.6443 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 66520] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-666940.7 mean_steps=11.1
|
|
[Episode 66530] reward=-116880426.1 actor_loss=0.2932 critic_loss=125703944533.3333 entropy=17.6574 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 66540] reward=-116712171.1 actor_loss=0.3324 critic_loss=127760084850.7586 entropy=17.6613 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 66540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458564.5 mean_steps=14.8
|
|
[Episode 66550] reward=-119129290.9 actor_loss=0.1971 critic_loss=146470353219.3684 entropy=17.6565 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 66560] reward=-120089104.5 actor_loss=0.2972 critic_loss=138030757958.6207 entropy=17.6474 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 66560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461505.5 mean_steps=14.3
|
|
[Episode 66570] reward=-187531537.6 actor_loss=1.0193 critic_loss=18579447839343.3047 entropy=17.6506 approx_kl=0.0032 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 66580] reward=-116857545.8 actor_loss=0.2812 critic_loss=120945319936.0000 entropy=17.6525 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 66580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443034.6 mean_steps=14.2
|
|
[Episode 66590] reward=-114693004.9 actor_loss=0.3216 critic_loss=125446110976.0000 entropy=17.6453 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 66600] reward=-112411471.6 actor_loss=0.2712 critic_loss=127986208919.7037 entropy=17.6504 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 66600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-629104.8 mean_steps=12.9
|
|
[Episode 66610] reward=-117640619.0 actor_loss=0.3118 critic_loss=130059246445.7143 entropy=17.6478 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 66620] reward=-113987321.9 actor_loss=0.3125 critic_loss=126319524044.8000 entropy=17.6413 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 66620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-407555.0 mean_steps=13.3
|
|
[Episode 66630] reward=-119460224.6 actor_loss=0.1994 critic_loss=134906171240.2963 entropy=17.6402 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 66640] reward=-117155105.3 actor_loss=0.3525 critic_loss=143903554127.6444 entropy=17.6387 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 66640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531410.7 mean_steps=14.1
|
|
[Episode 66650] reward=-115367978.3 actor_loss=0.3145 critic_loss=125493923644.9524 entropy=17.6364 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 66660] reward=-116159129.2 actor_loss=0.3129 critic_loss=130813602474.6667 entropy=17.6334 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 66660] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-338263.7 mean_steps=17.4
|
|
[Episode 66670] reward=-114525642.8 actor_loss=0.2822 critic_loss=158435274893.2414 entropy=17.6370 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 66680] reward=-124855091.2 actor_loss=0.3205 critic_loss=173410544298.6667 entropy=17.6269 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 66680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401276.4 mean_steps=14.9
|
|
[Episode 66690] reward=-119981108.8 actor_loss=0.1864 critic_loss=137930741467.4286 entropy=17.6254 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 66700] reward=-121073224.5 actor_loss=0.1797 critic_loss=134054695731.2000 entropy=17.6291 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 66700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487425.1 mean_steps=14.7
|
|
[Episode 66710] reward=-119937642.8 actor_loss=0.4201 critic_loss=133077138031.3044 entropy=17.6293 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 66720] reward=-122452385.7 actor_loss=0.3175 critic_loss=137680060074.6667 entropy=17.6388 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 66720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549097.6 mean_steps=13.1
|
|
[Episode 66730] reward=-120719405.7 actor_loss=0.2669 critic_loss=137549447168.0000 entropy=17.6391 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 66740] reward=-111920686.0 actor_loss=0.2865 critic_loss=120147422139.7333 entropy=17.6573 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 66740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548908.9 mean_steps=12.7
|
|
[Episode 66750] reward=-116104446.3 actor_loss=0.3194 critic_loss=130501918310.4000 entropy=17.6701 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 66760] reward=-121128334.3 actor_loss=0.3015 critic_loss=133045927545.9048 entropy=17.6765 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 66760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486446.3 mean_steps=14.5
|
|
[Episode 66770] reward=-117255467.0 actor_loss=0.3145 critic_loss=127154252800.0000 entropy=17.6704 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 66780] reward=-117328801.6 actor_loss=0.1984 critic_loss=131424374510.9333 entropy=17.6690 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 66780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423590.7 mean_steps=14.4
|
|
[Episode 66790] reward=-115155370.8 actor_loss=0.3647 critic_loss=128586514432.0000 entropy=17.6624 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 66800] reward=-118646469.5 actor_loss=0.3978 critic_loss=128558069380.7407 entropy=17.6684 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 66800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399446.9 mean_steps=15.0
|
|
[Episode 66810] reward=-120631720.1 actor_loss=0.1936 critic_loss=135213880115.2000 entropy=17.6685 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 66820] reward=-111297619.6 actor_loss=0.3402 critic_loss=121966763849.9556 entropy=17.6675 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 66820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504482.7 mean_steps=13.8
|
|
[Episode 66830] reward=-121209074.5 actor_loss=0.2656 critic_loss=133790754816.0000 entropy=17.6553 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 66840] reward=-119088344.0 actor_loss=0.3311 critic_loss=128771034391.2727 entropy=17.6418 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 66840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526641.6 mean_steps=12.7
|
|
[Episode 66850] reward=-112003603.7 actor_loss=0.3712 critic_loss=123844039717.9259 entropy=17.6364 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 66860] reward=-127992231.7 actor_loss=2.3088 critic_loss=505363252460.3077 entropy=17.6299 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 66860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421750.3 mean_steps=15.7
|
|
[Episode 66870] reward=-112607933.9 actor_loss=0.3054 critic_loss=125730356891.8261 entropy=17.6333 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 66880] reward=-122648902.1 actor_loss=0.2006 critic_loss=220850342570.6667 entropy=17.6253 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 66880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588031.0 mean_steps=13.5
|
|
[Episode 66890] reward=-123689275.5 actor_loss=0.3098 critic_loss=188949765840.5926 entropy=17.6231 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 66900] reward=-121769047.2 actor_loss=0.3333 critic_loss=134901245794.4615 entropy=17.6148 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 66900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473744.6 mean_steps=15.0
|
|
[Episode 66910] reward=-121743090.8 actor_loss=0.3232 critic_loss=133105393664.0000 entropy=17.6113 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 66920] reward=-116864361.6 actor_loss=0.2237 critic_loss=128755859812.1739 entropy=17.6143 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 66920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539499.2 mean_steps=13.4
|
|
[Episode 66930] reward=-117991862.1 actor_loss=0.3097 critic_loss=130175503902.1176 entropy=17.6109 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 66940] reward=-121558549.6 actor_loss=0.2656 critic_loss=137075965952.0000 entropy=17.6044 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 66940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-615309.2 mean_steps=12.1
|
|
[Episode 66950] reward=-117965359.6 actor_loss=0.2984 critic_loss=131123680177.2308 entropy=17.6110 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 66960] reward=-115063560.3 actor_loss=0.3229 critic_loss=125419030889.4118 entropy=17.6074 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 66960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518258.8 mean_steps=15.3
|
|
[Episode 66970] reward=-112324600.4 actor_loss=0.2575 critic_loss=151974818201.6000 entropy=17.6077 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 66980] reward=-120930833.1 actor_loss=0.2754 critic_loss=139035010389.3333 entropy=17.6040 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 66980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554330.7 mean_steps=12.7
|
|
[Episode 66990] reward=-116593182.8 actor_loss=0.3453 critic_loss=133004359621.4857 entropy=17.6064 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 67000] reward=-120248174.4 actor_loss=0.2755 critic_loss=141450505495.2727 entropy=17.6038 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 67000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492947.9 mean_steps=15.0
|
|
[Episode 67010] reward=-120867168.8 actor_loss=0.2370 critic_loss=132014213802.6667 entropy=17.6168 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 67020] reward=-112058780.9 actor_loss=0.3996 critic_loss=124643237497.9048 entropy=17.6253 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 67020] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-314890.5 mean_steps=17.4
|
|
[Episode 67030] reward=-115444165.9 actor_loss=0.2225 critic_loss=121376018747.0769 entropy=17.6201 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 67040] reward=-116334316.0 actor_loss=0.3725 critic_loss=138049197718.5882 entropy=17.6251 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 67040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524258.4 mean_steps=13.3
|
|
[Episode 67050] reward=-121739625.0 actor_loss=0.2912 critic_loss=135983893018.9474 entropy=17.6337 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 67060] reward=-119280253.5 actor_loss=0.3085 critic_loss=135409338026.6667 entropy=17.6331 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 67060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403400.7 mean_steps=15.4
|
|
[Episode 67070] reward=-123295724.0 actor_loss=0.3235 critic_loss=140555954537.4118 entropy=17.6313 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 67080] reward=-112319619.3 actor_loss=0.3323 critic_loss=120695718616.1778 entropy=17.6212 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 67080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496477.5 mean_steps=14.5
|
|
[Episode 67090] reward=-120832649.7 actor_loss=0.3303 critic_loss=131502749696.0000 entropy=17.6192 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 67100] reward=-117312243.1 actor_loss=0.2748 critic_loss=139540509378.2069 entropy=17.6148 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 67100] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-391287.1 mean_steps=16.9
|
|
[Episode 67110] reward=-117902515.7 actor_loss=0.4214 critic_loss=136434110919.1111 entropy=17.6157 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 67120] reward=-114303044.5 actor_loss=0.2602 critic_loss=125046245099.2432 entropy=17.6138 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 67120] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-289867.9 mean_steps=17.6
|
|
[Episode 67130] reward=-121604609.5 actor_loss=0.2422 critic_loss=141991478231.0400 entropy=17.6063 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 67140] reward=-120026128.4 actor_loss=0.2828 critic_loss=131187468089.8065 entropy=17.5963 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 67140] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-648716.7 mean_steps=11.4
|
|
[Episode 67150] reward=-116141898.6 actor_loss=0.2919 critic_loss=126709935308.8000 entropy=17.6155 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 67160] reward=-116744134.1 actor_loss=0.2912 critic_loss=126621347384.8889 entropy=17.6195 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 67160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-437181.5 mean_steps=17.2
|
|
[Episode 67170] reward=-112227767.2 actor_loss=0.4857 critic_loss=133663426236.6316 entropy=17.6309 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 67180] reward=-121395608.7 actor_loss=0.3862 critic_loss=136450750388.1481 entropy=17.6253 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 67180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431015.3 mean_steps=14.7
|
|
[Episode 67190] reward=-118993786.5 actor_loss=0.2208 critic_loss=129440076413.1555 entropy=17.6247 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 67200] reward=-123226182.4 actor_loss=0.2498 critic_loss=134157380096.0000 entropy=17.6472 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 67200] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-324437.7 mean_steps=18.1
|
|
[Episode 67210] reward=-113751340.1 actor_loss=0.3715 critic_loss=123946954226.8718 entropy=17.6405 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 67220] reward=-118178202.2 actor_loss=0.3274 critic_loss=129683072926.4762 entropy=17.6227 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 67220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479493.0 mean_steps=14.8
|
|
[Episode 67230] reward=-118370249.3 actor_loss=0.3567 critic_loss=128356718119.3846 entropy=17.6130 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 67240] reward=-116353046.6 actor_loss=0.2058 critic_loss=127087715896.8889 entropy=17.6053 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 67240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452000.5 mean_steps=14.7
|
|
[Episode 67250] reward=-121319746.4 actor_loss=0.2619 critic_loss=136669344890.8800 entropy=17.6121 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 67260] reward=-122481909.6 actor_loss=0.2431 critic_loss=133696075202.5600 entropy=17.6145 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 67260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-601586.5 mean_steps=12.6
|
|
[Episode 67270] reward=-122170859.2 actor_loss=0.2809 critic_loss=136305650005.3333 entropy=17.6056 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 67280] reward=-119719635.5 actor_loss=0.1452 critic_loss=129922770066.2857 entropy=17.5962 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 67280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455610.1 mean_steps=14.9
|
|
[Episode 67290] reward=-113678285.0 actor_loss=0.2575 critic_loss=129751729038.2222 entropy=17.5896 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 67300] reward=-120222715.6 actor_loss=0.2899 critic_loss=129436228721.7778 entropy=17.5936 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 67300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-577892.4 mean_steps=14.6
|
|
[Episode 67310] reward=-117205594.0 actor_loss=0.3280 critic_loss=122781594828.8000 entropy=17.5839 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 67320] reward=-115917607.7 actor_loss=0.3860 critic_loss=124645477210.8387 entropy=17.5803 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 67320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-439936.8 mean_steps=13.8
|
|
[Episode 67330] reward=-118576965.3 actor_loss=0.4926 critic_loss=132682174464.0000 entropy=17.5844 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1543 front_blocked=0
|
|
[Episode 67340] reward=-119616665.8 actor_loss=0.2422 critic_loss=133148696948.3636 entropy=17.5835 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 67340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448461.3 mean_steps=14.6
|
|
[Episode 67350] reward=-117679680.7 actor_loss=0.3357 critic_loss=129257097352.5333 entropy=17.5712 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 67360] reward=-117102432.6 actor_loss=0.3905 critic_loss=125776042170.1818 entropy=17.5628 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 67360] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-694118.8 mean_steps=11.2
|
|
[Episode 67370] reward=-115748398.1 actor_loss=0.4471 critic_loss=129137843867.8261 entropy=17.5646 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 67380] reward=-117801165.8 actor_loss=0.2690 critic_loss=127976476672.0000 entropy=17.5644 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 67380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-609298.0 mean_steps=13.8
|
|
[Episode 67390] reward=-113913540.2 actor_loss=0.2876 critic_loss=127262804744.8276 entropy=17.5576 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 67400] reward=-115513436.3 actor_loss=0.2790 critic_loss=125557825104.8421 entropy=17.5619 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 67400] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-647962.2 mean_steps=10.2
|
|
[Episode 67410] reward=-113848958.1 actor_loss=0.2788 critic_loss=130451881528.8889 entropy=17.5601 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 67420] reward=-113345085.8 actor_loss=0.2513 critic_loss=119600466721.3913 entropy=17.5537 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 67420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509440.5 mean_steps=12.9
|
|
[Episode 67430] reward=-118398434.4 actor_loss=0.2519 critic_loss=126427996501.3333 entropy=17.5468 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 67440] reward=-114698880.9 actor_loss=0.3302 critic_loss=129583099688.4211 entropy=17.5467 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 67440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491015.7 mean_steps=15.0
|
|
[Episode 67450] reward=-116185597.3 actor_loss=0.3715 critic_loss=125474826740.6222 entropy=17.5495 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 67460] reward=-114821027.0 actor_loss=0.3758 critic_loss=124103428568.6154 entropy=17.5554 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 67460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501517.5 mean_steps=15.1
|
|
[Episode 67470] reward=-115649211.6 actor_loss=0.2991 critic_loss=124916628775.8222 entropy=17.5527 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 67480] reward=-116351221.5 actor_loss=0.2512 critic_loss=125096310491.4286 entropy=17.5475 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 67480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508048.4 mean_steps=13.5
|
|
[Episode 67490] reward=-120239711.0 actor_loss=0.2984 critic_loss=127496400700.9524 entropy=17.5430 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 67500] reward=-115967890.2 actor_loss=0.2860 critic_loss=124870616795.4286 entropy=17.5352 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 67500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445460.4 mean_steps=15.6
|
|
[Episode 67510] reward=-116518362.4 actor_loss=0.2267 critic_loss=123473225984.0000 entropy=17.5394 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 67520] reward=-115232443.6 actor_loss=0.2573 critic_loss=122298577526.1538 entropy=17.5330 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 67520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469856.0 mean_steps=14.8
|
|
[Episode 67530] reward=-117413703.2 actor_loss=0.3287 critic_loss=131465574887.6190 entropy=17.5451 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 67540] reward=-118089404.0 actor_loss=0.3172 critic_loss=133306858609.7778 entropy=17.5502 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 67540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366485.8 mean_steps=16.1
|
|
[Episode 67550] reward=-115437022.0 actor_loss=0.2683 critic_loss=136601888610.4615 entropy=17.5499 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 67560] reward=-118521616.8 actor_loss=0.4068 critic_loss=139228462882.5946 entropy=17.5450 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 67560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-604106.2 mean_steps=13.6
|
|
[Episode 67570] reward=-118955515.9 actor_loss=0.3940 critic_loss=131292776693.7600 entropy=17.5530 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 67580] reward=-118503408.6 actor_loss=0.2592 critic_loss=129323717154.1333 entropy=17.5525 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 67580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470543.7 mean_steps=15.1
|
|
[Episode 67590] reward=-110574054.3 actor_loss=0.4025 critic_loss=121107418712.2759 entropy=17.5676 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 67600] reward=-117562886.3 actor_loss=0.3123 critic_loss=128560956509.0909 entropy=17.5739 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 67600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603172.1 mean_steps=11.8
|
|
[Episode 67610] reward=-120231389.2 actor_loss=0.2772 critic_loss=134558716045.2414 entropy=17.5685 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 67620] reward=-118475638.0 actor_loss=0.2550 critic_loss=133607101781.3333 entropy=17.5666 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 67620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554720.4 mean_steps=12.1
|
|
[Episode 67630] reward=-114468945.9 actor_loss=0.3005 critic_loss=119780116666.1818 entropy=17.5626 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 67640] reward=-116363484.2 actor_loss=0.3225 critic_loss=144857911628.1081 entropy=17.5519 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 67640] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-637459.1 mean_steps=11.3
|
|
[Episode 67650] reward=-121248741.9 actor_loss=0.2203 critic_loss=135174569984.0000 entropy=17.5521 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 67660] reward=-118262176.8 actor_loss=0.2412 critic_loss=130734037522.9630 entropy=17.5599 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 67660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422954.9 mean_steps=16.2
|
|
[Episode 67670] reward=-118068120.8 actor_loss=0.2022 critic_loss=132792629368.4706 entropy=17.5628 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 67680] reward=-119693564.7 actor_loss=0.3640 critic_loss=135356077007.2381 entropy=17.5560 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 67680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525015.7 mean_steps=14.3
|
|
[Episode 67690] reward=-117528321.2 actor_loss=0.3887 critic_loss=127143773720.3810 entropy=17.5548 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 67700] reward=-113451960.1 actor_loss=0.2722 critic_loss=123231424238.9333 entropy=17.5537 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 67700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505693.4 mean_steps=13.3
|
|
[Episode 67710] reward=-116806897.6 actor_loss=0.3742 critic_loss=131083615533.1765 entropy=17.5441 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 67720] reward=-110472203.6 actor_loss=0.2549 critic_loss=116125950944.9697 entropy=17.5473 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 67720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470983.7 mean_steps=13.8
|
|
[Episode 67730] reward=-113970379.2 actor_loss=0.3445 critic_loss=120393128960.0000 entropy=17.5417 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 67740] reward=-120646816.5 actor_loss=0.2184 critic_loss=131863119751.5294 entropy=17.5383 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 67740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-466372.2 mean_steps=13.9
|
|
[Episode 67750] reward=-113855767.5 actor_loss=0.2730 critic_loss=126143775980.3077 entropy=17.5389 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 67760] reward=-114616181.4 actor_loss=0.3624 critic_loss=126669803246.9333 entropy=17.5461 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 67760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474919.4 mean_steps=14.0
|
|
[Episode 67770] reward=-112044283.4 actor_loss=0.2278 critic_loss=123352771242.6667 entropy=17.5450 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 67780] reward=-118183822.3 actor_loss=0.2550 critic_loss=130917183488.0000 entropy=17.5445 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 67780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590700.8 mean_steps=12.6
|
|
[Episode 67790] reward=-123096932.3 actor_loss=0.2410 critic_loss=136305726146.2069 entropy=17.5351 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 67800] reward=-108047386.4 actor_loss=0.2554 critic_loss=116652607750.5641 entropy=17.5503 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 67800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539734.1 mean_steps=14.4
|
|
[Episode 67810] reward=-112438899.3 actor_loss=0.2887 critic_loss=121938414126.5455 entropy=17.5602 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 67820] reward=-120439363.3 actor_loss=0.3188 critic_loss=131059961628.4444 entropy=17.5664 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 67820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616741.0 mean_steps=12.9
|
|
[Episode 67830] reward=-118188049.1 actor_loss=0.2221 critic_loss=131697769767.8222 entropy=17.5709 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 67840] reward=-121408683.7 actor_loss=0.1864 critic_loss=128415581649.4545 entropy=17.5793 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 67840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503664.0 mean_steps=13.1
|
|
[Episode 67850] reward=-118827781.7 actor_loss=0.2940 critic_loss=127243906816.0000 entropy=17.5870 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 67860] reward=-118078404.0 actor_loss=0.2406 critic_loss=127619250949.6889 entropy=17.5894 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 67860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540651.4 mean_steps=13.4
|
|
[Episode 67870] reward=-112888463.4 actor_loss=0.2549 critic_loss=123538780492.1081 entropy=17.5895 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 67880] reward=-113095151.2 actor_loss=0.3331 critic_loss=128057005312.0000 entropy=17.5912 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 67880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438767.9 mean_steps=15.7
|
|
[Episode 67890] reward=-119134734.8 actor_loss=0.2726 critic_loss=129444284757.3333 entropy=17.6029 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 67900] reward=-117092852.7 actor_loss=0.2971 critic_loss=130191868616.3478 entropy=17.6033 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 67900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555079.3 mean_steps=13.2
|
|
[Episode 67910] reward=-115066807.7 actor_loss=0.4315 critic_loss=127631838549.3333 entropy=17.6042 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 67920] reward=-118793008.9 actor_loss=0.3347 critic_loss=126590896537.6000 entropy=17.5852 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 67920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572323.8 mean_steps=12.5
|
|
[Episode 67930] reward=-117338726.0 actor_loss=0.2479 critic_loss=123489359492.7407 entropy=17.5645 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 67940] reward=-113457206.9 actor_loss=0.2595 critic_loss=120424393932.8000 entropy=17.5697 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 67940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400748.2 mean_steps=16.6
|
|
[Episode 67950] reward=-121943265.0 actor_loss=0.3027 critic_loss=162289836578.1333 entropy=17.5865 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 67960] reward=-118917044.2 actor_loss=0.2397 critic_loss=125464433823.2889 entropy=17.6066 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 67960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-384653.1 mean_steps=15.3
|
|
[Episode 67970] reward=-106341021.1 actor_loss=0.3558 critic_loss=122035398064.3556 entropy=17.5941 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 67980] reward=-112901509.2 actor_loss=0.2523 critic_loss=125362369149.1555 entropy=17.6014 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 67980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441444.1 mean_steps=15.6
|
|
[Episode 67990] reward=-117390992.7 actor_loss=0.3180 critic_loss=124658131375.1579 entropy=17.5875 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 68000] reward=-135367116.1 actor_loss=0.2615 critic_loss=1540422769580.9729 entropy=17.6056 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 68000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527166.0 mean_steps=13.6
|
|
[Episode 68010] reward=-122863573.3 actor_loss=0.2234 critic_loss=294180586291.2000 entropy=17.6194 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 68020] reward=-115912098.7 actor_loss=0.3261 critic_loss=126833491090.2857 entropy=17.6219 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 68020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-409496.7 mean_steps=17.1
|
|
[Episode 68030] reward=-124730604.9 actor_loss=0.2343 critic_loss=136833477108.6222 entropy=17.6438 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 68040] reward=-115075882.0 actor_loss=0.2064 critic_loss=127875931787.6364 entropy=17.6422 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 68040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503975.9 mean_steps=14.3
|
|
[Episode 68050] reward=-118477730.1 actor_loss=0.3649 critic_loss=133787571268.2667 entropy=17.6551 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 68060] reward=-140627810.5 actor_loss=0.2476 critic_loss=3472893690948.2666 entropy=17.6439 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 68060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495655.3 mean_steps=15.4
|
|
[Episode 68070] reward=-116411707.6 actor_loss=0.1664 critic_loss=124857534530.0645 entropy=17.6454 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 68080] reward=-119300358.6 actor_loss=0.3794 critic_loss=129760997831.1111 entropy=17.6355 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 68080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577109.1 mean_steps=12.9
|
|
[Episode 68090] reward=-115533314.0 actor_loss=0.3559 critic_loss=128822308278.8571 entropy=17.6216 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 68100] reward=-116843416.0 actor_loss=0.1959 critic_loss=131316160739.5556 entropy=17.6276 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 68100] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-337454.4 mean_steps=16.9
|
|
[Episode 68110] reward=-664134188.1 actor_loss=5.1866 critic_loss=772385395208647.1250 entropy=17.6370 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 68120] reward=-124187997.7 actor_loss=0.2341 critic_loss=139362677813.8947 entropy=17.6423 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 68120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509227.2 mean_steps=14.3
|
|
[Episode 68130] reward=-114451975.7 actor_loss=0.3695 critic_loss=122226689706.6667 entropy=17.6444 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 68140] reward=-118042629.8 actor_loss=0.2968 critic_loss=166640656384.0000 entropy=17.6522 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 68140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588165.0 mean_steps=13.9
|
|
[Episode 68150] reward=-117536322.9 actor_loss=0.2409 critic_loss=135037867597.5758 entropy=17.6479 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 68160] reward=-107451721.4 actor_loss=0.3853 critic_loss=120305775229.1555 entropy=17.6408 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 68160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-309067.5 mean_steps=16.8
|
|
[Episode 68170] reward=-117877333.9 actor_loss=0.2599 critic_loss=128354814331.2593 entropy=17.6437 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 68180] reward=-115316117.0 actor_loss=0.3496 critic_loss=127997184409.6000 entropy=17.6510 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 68180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-648684.9 mean_steps=12.0
|
|
[Episode 68190] reward=-113472376.6 actor_loss=0.3151 critic_loss=127209877836.1081 entropy=17.6549 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 68200] reward=-119154531.9 actor_loss=0.2942 critic_loss=127783185248.7111 entropy=17.6476 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 68200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-671128.9 mean_steps=11.3
|
|
[Episode 68210] reward=-118823471.3 actor_loss=0.1541 critic_loss=134038079744.0000 entropy=17.6348 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 68220] reward=-115622682.2 actor_loss=0.2741 critic_loss=142256146589.5385 entropy=17.6319 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 68220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526367.8 mean_steps=14.4
|
|
[Episode 68230] reward=-114852236.5 actor_loss=0.3780 critic_loss=127786318592.0000 entropy=17.6349 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 68240] reward=-47656084153.8 actor_loss=529.5467 critic_loss=1686348654562683648.0000 entropy=17.6420 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1029 front_blocked=0
|
|
[Eval 68240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-597796.1 mean_steps=14.8
|
|
[Episode 68250] reward=-2520489295.5 actor_loss=0.2466 critic_loss=12968369650115834.0000 entropy=17.6363 approx_kl=-0.0008 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 68260] reward=-20248618780.2 actor_loss=949.1123 critic_loss=283821697755849888.0000 entropy=17.6475 approx_kl=0.1027 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 68260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453885.1 mean_steps=15.5
|
|
[Episode 68270] reward=-1431988660.7 actor_loss=1.9254 critic_loss=2722956535992593.0000 entropy=17.6478 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 68280] reward=-123027621.6 actor_loss=0.3379 critic_loss=148927398980.2667 entropy=17.6526 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 68280] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-640953.1 mean_steps=11.1
|
|
[Episode 68290] reward=-30220206291.8 actor_loss=3.3425 critic_loss=528322087930753664.0000 entropy=17.6507 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 68300] reward=-117795610.8 actor_loss=0.3122 critic_loss=137792543311.6444 entropy=17.6567 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 68300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375892.3 mean_steps=16.4
|
|
[Episode 68310] reward=-3433209424.2 actor_loss=0.1688 critic_loss=13373651723945392.0000 entropy=17.6614 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1003 front_blocked=0
|
|
[Episode 68320] reward=-115854337.5 actor_loss=0.3132 critic_loss=129434449334.8571 entropy=17.6569 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 68320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475149.1 mean_steps=13.8
|
|
[Episode 68330] reward=-121936890.3 actor_loss=0.3068 critic_loss=205590520600.7742 entropy=17.6767 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 68340] reward=-164502096.9 actor_loss=4.3741 critic_loss=8525857324327.8223 entropy=17.6658 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 68340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552418.5 mean_steps=13.2
|
|
[Episode 68350] reward=-113533297.4 actor_loss=0.2609 critic_loss=126646561870.7692 entropy=17.6708 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 68360] reward=-112679910.9 actor_loss=0.2833 critic_loss=139214464000.0000 entropy=17.6754 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 68360] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-302642.6 mean_steps=17.5
|
|
[Episode 68370] reward=-116041165.4 actor_loss=0.2318 critic_loss=124243900308.2105 entropy=17.6834 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 68380] reward=-116473711.7 actor_loss=0.3351 critic_loss=145458466048.0000 entropy=17.6755 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 68380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487104.7 mean_steps=14.7
|
|
[Episode 68390] reward=-123443943.9 actor_loss=0.2031 critic_loss=156951526604.8000 entropy=17.6812 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 68400] reward=-115249467.8 actor_loss=0.2808 critic_loss=139200957917.8667 entropy=17.6761 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 68400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616244.6 mean_steps=12.9
|
|
[Episode 68410] reward=-117317654.9 actor_loss=0.3727 critic_loss=130273306869.7600 entropy=17.6807 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 68420] reward=-121133931.4 actor_loss=0.3059 critic_loss=135079615049.1429 entropy=17.6809 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 68420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-467810.1 mean_steps=15.6
|
|
[Episode 68430] reward=-115803304.2 actor_loss=0.4704 critic_loss=139544248729.6000 entropy=17.6735 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 68440] reward=-112679713.7 actor_loss=0.3762 critic_loss=122674656309.8947 entropy=17.6818 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 68440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392096.4 mean_steps=16.2
|
|
[Episode 68450] reward=-116862703.7 actor_loss=0.3530 critic_loss=125491315370.6667 entropy=17.6910 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 68460] reward=-115164622.8 actor_loss=0.2056 critic_loss=128209248256.0000 entropy=17.6942 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 68460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514920.2 mean_steps=14.1
|
|
[Episode 68470] reward=-111994430.7 actor_loss=0.3394 critic_loss=123531862471.1111 entropy=17.6999 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 68480] reward=-118623662.6 actor_loss=0.2964 critic_loss=130293501952.0000 entropy=17.6989 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 68480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-556950.0 mean_steps=12.7
|
|
[Episode 68490] reward=-117253460.3 actor_loss=0.3096 critic_loss=129832803942.4000 entropy=17.6924 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 68500] reward=-122911493.7 actor_loss=0.2888 critic_loss=160026305331.2000 entropy=17.6936 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 68500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429946.8 mean_steps=15.4
|
|
[Episode 68510] reward=-122613723.5 actor_loss=0.2676 critic_loss=138433279180.8000 entropy=17.6944 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 68520] reward=-115493593.1 actor_loss=0.3515 critic_loss=124825441765.0526 entropy=17.6927 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 68520] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-627372.3 mean_steps=10.9
|
|
[Episode 68530] reward=-124472988.0 actor_loss=0.2476 critic_loss=150415397236.3636 entropy=17.6914 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 68540] reward=-113194291.4 actor_loss=0.4344 critic_loss=128586421522.7317 entropy=17.6874 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 68540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-410916.8 mean_steps=14.1
|
|
[Episode 68550] reward=-120030497.0 actor_loss=0.2291 critic_loss=132086049814.7556 entropy=17.6896 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 68560] reward=-121176639.1 actor_loss=0.2317 critic_loss=130453105322.6667 entropy=17.6881 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 68560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448186.1 mean_steps=15.9
|
|
[Episode 68570] reward=-116389625.5 actor_loss=0.3844 critic_loss=131461207381.3333 entropy=17.6966 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 68580] reward=-118919876.9 actor_loss=0.4045 critic_loss=131020763955.2000 entropy=17.7050 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 68580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544096.3 mean_steps=13.2
|
|
[Episode 68590] reward=-123536892.6 actor_loss=0.2060 critic_loss=398620640721.4545 entropy=17.7034 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 68600] reward=-118806377.4 actor_loss=0.3150 critic_loss=135072581818.1818 entropy=17.7018 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 68600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-321627.8 mean_steps=16.8
|
|
[Episode 68610] reward=-121161934.6 actor_loss=0.3272 critic_loss=135288059494.4000 entropy=17.6989 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 68620] reward=-117578183.4 actor_loss=0.3218 critic_loss=149926979971.4595 entropy=17.6947 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 68620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567915.8 mean_steps=13.3
|
|
[Episode 68630] reward=-121117913.0 actor_loss=0.2851 critic_loss=130477110431.2889 entropy=17.6899 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 68640] reward=-118196476.8 actor_loss=0.3348 critic_loss=135545012506.4828 entropy=17.6831 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 68640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500499.8 mean_steps=14.9
|
|
[Episode 68650] reward=-116626464.1 actor_loss=0.3520 critic_loss=127829510826.6667 entropy=17.6776 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 68660] reward=-120571872.8 actor_loss=0.1808 critic_loss=133028158577.7778 entropy=17.6791 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 68660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588869.5 mean_steps=13.8
|
|
[Episode 68670] reward=-121872537.3 actor_loss=0.2519 critic_loss=132896163794.4889 entropy=17.6730 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 68680] reward=-116305296.9 actor_loss=0.2608 critic_loss=124566412921.9048 entropy=17.6761 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 68680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579771.9 mean_steps=12.9
|
|
[Episode 68690] reward=-114240039.8 actor_loss=0.2929 critic_loss=125542957966.2222 entropy=17.6607 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 68700] reward=-111423635.0 actor_loss=0.2862 critic_loss=121975173120.0000 entropy=17.6700 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 68700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479612.5 mean_steps=13.9
|
|
[Episode 68710] reward=-122736821.9 actor_loss=0.2297 critic_loss=138653911722.6667 entropy=17.6760 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 68720] reward=-116629848.8 actor_loss=0.2590 critic_loss=125630188069.4634 entropy=17.6804 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 68720] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349153.6 mean_steps=16.9
|
|
[Episode 68730] reward=-124536792.8 actor_loss=0.2659 critic_loss=137096390703.6279 entropy=17.6801 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 68740] reward=-115054484.5 actor_loss=0.3581 critic_loss=122520520021.3333 entropy=17.6839 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 68740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459110.6 mean_steps=14.7
|
|
[Episode 68750] reward=-112814170.7 actor_loss=0.3850 critic_loss=120841422620.4444 entropy=17.6803 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 68760] reward=-116902726.0 actor_loss=0.2669 critic_loss=133155672242.0870 entropy=17.6857 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 68760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531115.4 mean_steps=14.5
|
|
[Episode 68770] reward=-113462259.9 actor_loss=0.2426 critic_loss=118615686257.7778 entropy=17.6829 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 68780] reward=-117301048.1 actor_loss=0.3673 critic_loss=130864596582.4000 entropy=17.6724 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 68780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488660.3 mean_steps=13.7
|
|
[Episode 68790] reward=-115962296.4 actor_loss=0.3118 critic_loss=125169809408.0000 entropy=17.6737 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 68800] reward=-116843499.3 actor_loss=0.3237 critic_loss=128441928908.8000 entropy=17.6729 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 68800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535336.6 mean_steps=14.2
|
|
[Episode 68810] reward=-125140707.3 actor_loss=0.2471 critic_loss=138898807076.5714 entropy=17.6659 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 68820] reward=-114088342.3 actor_loss=0.3321 critic_loss=124555315200.0000 entropy=17.6611 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 68820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-639422.4 mean_steps=13.2
|
|
[Episode 68830] reward=-117263914.6 actor_loss=0.3083 critic_loss=134301751393.5238 entropy=17.6601 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 68840] reward=-109770124.5 actor_loss=0.3439 critic_loss=124961876104.5333 entropy=17.6523 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 68840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523453.3 mean_steps=14.1
|
|
[Episode 68850] reward=-111958576.9 actor_loss=0.3067 critic_loss=122180449924.7407 entropy=17.6483 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 68860] reward=-117310302.9 actor_loss=0.3133 critic_loss=131981588293.8182 entropy=17.6472 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 68860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-624646.6 mean_steps=11.9
|
|
[Episode 68870] reward=-120706956.0 actor_loss=0.2801 critic_loss=140524768521.4815 entropy=17.6346 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 68880] reward=-116567423.9 actor_loss=0.3372 critic_loss=123619429153.3913 entropy=17.6396 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 68880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569743.0 mean_steps=13.7
|
|
[Episode 68890] reward=-115997189.1 actor_loss=0.2451 critic_loss=122810636101.8182 entropy=17.6488 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 68900] reward=-117678180.7 actor_loss=0.2850 critic_loss=135867964074.6667 entropy=17.6529 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 68900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-633356.7 mean_steps=11.8
|
|
[Episode 68910] reward=-117687673.8 actor_loss=0.2681 critic_loss=131343326163.4783 entropy=17.6533 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 68920] reward=-117125560.0 actor_loss=0.2065 critic_loss=129629336798.6087 entropy=17.6461 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 68920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416424.7 mean_steps=16.6
|
|
[Episode 68930] reward=-118635384.7 actor_loss=0.2712 critic_loss=132416740966.4000 entropy=17.6451 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 68940] reward=-120347428.3 actor_loss=0.2889 critic_loss=131618743910.4000 entropy=17.6484 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 68940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484118.0 mean_steps=14.2
|
|
[Episode 68950] reward=-115832344.6 actor_loss=0.3647 critic_loss=138446396269.7143 entropy=17.6554 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 68960] reward=-114567220.5 actor_loss=0.3133 critic_loss=125357022549.3333 entropy=17.6730 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 68960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-635274.9 mean_steps=13.6
|
|
[Episode 68970] reward=-112295049.6 actor_loss=0.3277 critic_loss=130071571602.2857 entropy=17.6798 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 68980] reward=-118339317.5 actor_loss=0.2890 critic_loss=132857733120.0000 entropy=17.6690 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 68980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534837.4 mean_steps=13.9
|
|
[Episode 68990] reward=-118160163.0 actor_loss=0.2754 critic_loss=130166449038.2222 entropy=17.6768 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 69000] reward=-121729278.8 actor_loss=0.3790 critic_loss=137051977386.6667 entropy=17.6789 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 69000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-425549.0 mean_steps=16.6
|
|
[Episode 69010] reward=-116446602.6 actor_loss=0.3197 critic_loss=125104375320.3810 entropy=17.6839 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 69020] reward=-123680409.4 actor_loss=0.3476 critic_loss=244103104984.6154 entropy=17.6830 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 69020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524252.4 mean_steps=13.2
|
|
[Episode 69030] reward=-115365114.7 actor_loss=0.2803 critic_loss=126462962892.8000 entropy=17.6785 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 69040] reward=-116595614.2 actor_loss=0.3158 critic_loss=133705613312.0000 entropy=17.6914 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 69040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580238.7 mean_steps=13.7
|
|
[Episode 69050] reward=-120132373.4 actor_loss=0.2373 critic_loss=139263548371.4783 entropy=17.6818 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 69060] reward=-117618605.2 actor_loss=0.2897 critic_loss=130944730577.4545 entropy=17.6836 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 69060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446916.0 mean_steps=15.8
|
|
[Episode 69070] reward=-123498141.4 actor_loss=0.2658 critic_loss=135574243417.0435 entropy=17.6777 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 69080] reward=-125825616.8 actor_loss=1.2568 critic_loss=322764153222.0952 entropy=17.6829 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 69080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623235.3 mean_steps=13.3
|
|
[Episode 69090] reward=-117803212.2 actor_loss=0.3353 critic_loss=132172273436.4444 entropy=17.6820 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 69100] reward=-119695865.3 actor_loss=0.3361 critic_loss=130799322329.2121 entropy=17.6830 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 69100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497603.6 mean_steps=14.9
|
|
[Episode 69110] reward=-118282470.3 actor_loss=0.3311 critic_loss=133845245561.9048 entropy=17.6775 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 69120] reward=-115796374.3 actor_loss=0.3145 critic_loss=126905817770.6667 entropy=17.6833 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 69120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-372169.4 mean_steps=15.9
|
|
[Episode 69130] reward=-118689507.6 actor_loss=0.2609 critic_loss=129064615936.0000 entropy=17.6815 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 69140] reward=-117425617.9 actor_loss=0.3124 critic_loss=134878894715.5862 entropy=17.6747 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 69140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-504854.9 mean_steps=13.2
|
|
[Episode 69150] reward=-119427407.5 actor_loss=0.2261 critic_loss=132153111893.3333 entropy=17.6592 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 69160] reward=-120369070.9 actor_loss=0.3829 critic_loss=130078586587.4286 entropy=17.6552 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 69160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-512024.2 mean_steps=15.2
|
|
[Episode 69170] reward=-121352584.1 actor_loss=0.2164 critic_loss=132774258364.6316 entropy=17.6666 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 69180] reward=-109834563.0 actor_loss=0.3646 critic_loss=122314679832.3810 entropy=17.6687 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 69180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-608802.7 mean_steps=12.9
|
|
[Episode 69190] reward=-112586353.5 actor_loss=0.3834 critic_loss=124384475428.5714 entropy=17.6680 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 69200] reward=-119585447.0 actor_loss=0.2821 critic_loss=132512298513.6552 entropy=17.6613 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 69200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-536601.0 mean_steps=12.7
|
|
[Episode 69210] reward=-118952898.2 actor_loss=0.3135 critic_loss=239678380299.1304 entropy=17.6605 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 69220] reward=-116575270.6 actor_loss=0.2351 critic_loss=126769671795.6129 entropy=17.6545 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 69220] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-769319.6 mean_steps=10.1
|
|
[Episode 69230] reward=-115265118.1 actor_loss=0.2676 critic_loss=126751401496.3810 entropy=17.6435 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 69240] reward=-109821731.8 actor_loss=0.3352 critic_loss=114557654903.4667 entropy=17.6434 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 69240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447641.1 mean_steps=15.2
|
|
[Episode 69250] reward=-121023697.1 actor_loss=0.2933 critic_loss=161507658107.2592 entropy=17.6310 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 69260] reward=-116428032.6 actor_loss=0.3463 critic_loss=133332102507.3548 entropy=17.6272 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 69260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-384871.2 mean_steps=15.2
|
|
[Episode 69270] reward=-118828797.1 actor_loss=0.2560 critic_loss=139250467108.5714 entropy=17.6236 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 69280] reward=-120636177.5 actor_loss=0.2782 critic_loss=132244455424.0000 entropy=17.6249 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 69280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499696.1 mean_steps=14.8
|
|
[Episode 69290] reward=-118697267.4 actor_loss=0.3491 critic_loss=129604395008.0000 entropy=17.6295 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 69300] reward=-120098644.0 actor_loss=0.2648 critic_loss=130902535485.7931 entropy=17.6459 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 69300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480493.6 mean_steps=14.0
|
|
[Episode 69310] reward=-123027903.2 actor_loss=0.2205 critic_loss=145600877597.2571 entropy=17.6457 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 69320] reward=-119445690.5 actor_loss=0.2195 critic_loss=134733217792.0000 entropy=17.6467 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 69320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-504314.2 mean_steps=15.9
|
|
[Episode 69330] reward=-116775278.1 actor_loss=0.3775 critic_loss=131869608866.9091 entropy=17.6468 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 69340] reward=-117211352.2 actor_loss=0.2714 critic_loss=130325036714.6667 entropy=17.6442 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 69340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-625072.4 mean_steps=12.0
|
|
[Episode 69350] reward=-113186569.2 actor_loss=0.3083 critic_loss=126538453187.0476 entropy=17.6418 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 69360] reward=-118012747.0 actor_loss=0.3019 critic_loss=131579465142.8571 entropy=17.6526 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 69360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534682.8 mean_steps=13.9
|
|
[Episode 69370] reward=-116650275.5 actor_loss=0.2803 critic_loss=130442911992.2424 entropy=17.6504 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 69380] reward=-115827251.8 actor_loss=0.2704 critic_loss=120335882532.5714 entropy=17.6503 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 69380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-565763.9 mean_steps=12.7
|
|
[Episode 69390] reward=-109236951.3 actor_loss=0.3071 critic_loss=123562272358.4000 entropy=17.6450 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 69400] reward=-108947293.4 actor_loss=0.3290 critic_loss=116801712748.6061 entropy=17.6454 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 69400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-705309.0 mean_steps=12.4
|
|
[Episode 69410] reward=-119511049.2 actor_loss=0.3325 critic_loss=133575912448.0000 entropy=17.6413 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 69420] reward=-115724441.1 actor_loss=0.3540 critic_loss=123279285043.2000 entropy=17.6416 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 69420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575436.4 mean_steps=13.5
|
|
[Episode 69430] reward=-112787291.9 actor_loss=0.3043 critic_loss=120421341656.6154 entropy=17.6358 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 69440] reward=-115211659.0 actor_loss=0.3253 critic_loss=125683786752.0000 entropy=17.6393 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 69440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402437.2 mean_steps=15.3
|
|
[Episode 69450] reward=-115882317.1 actor_loss=0.2732 critic_loss=127390616864.8205 entropy=17.6428 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 69460] reward=-120288155.1 actor_loss=0.2541 critic_loss=129960637599.2889 entropy=17.6304 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 69460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-514735.8 mean_steps=12.3
|
|
[Episode 69470] reward=-118651770.6 actor_loss=0.2969 critic_loss=130683568500.3636 entropy=17.6232 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 69480] reward=-117225235.9 actor_loss=0.1933 critic_loss=128596497588.7059 entropy=17.6321 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 69480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519953.0 mean_steps=14.6
|
|
[Episode 69490] reward=-123268790.9 actor_loss=0.3040 critic_loss=135374046629.6471 entropy=17.6327 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 69500] reward=-125470495.1 actor_loss=0.2763 critic_loss=145385715277.5757 entropy=17.6298 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 69500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-522261.3 mean_steps=12.2
|
|
[Episode 69510] reward=-114917686.6 actor_loss=0.2927 critic_loss=127789395968.0000 entropy=17.6367 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 69520] reward=-121523314.8 actor_loss=0.2865 critic_loss=132381933750.0444 entropy=17.6377 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 69520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-584438.4 mean_steps=11.8
|
|
[Episode 69530] reward=-116634929.8 actor_loss=0.3150 critic_loss=126002663847.7241 entropy=17.6417 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 69540] reward=-122324735.1 actor_loss=0.1040 critic_loss=134622433495.5789 entropy=17.6404 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 69540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630431.9 mean_steps=12.8
|
|
[Episode 69550] reward=-117424139.2 actor_loss=0.3033 critic_loss=127072013458.2857 entropy=17.6366 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 69560] reward=-125501718.0 actor_loss=0.3398 critic_loss=925316090657.3914 entropy=17.6416 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 69560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-594978.9 mean_steps=11.8
|
|
[Episode 69570] reward=-113202926.2 actor_loss=0.3297 critic_loss=125178653021.6585 entropy=17.6396 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 69580] reward=-117132379.6 actor_loss=0.2595 critic_loss=125276110241.1852 entropy=17.6416 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 69580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440674.3 mean_steps=15.8
|
|
[Episode 69590] reward=-122483294.9 actor_loss=0.2744 critic_loss=135603915044.5714 entropy=17.6359 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 69600] reward=-119222510.7 actor_loss=0.2936 critic_loss=125763053779.8621 entropy=17.6485 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 69600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430957.9 mean_steps=15.7
|
|
[Episode 69610] reward=-115909042.1 actor_loss=0.1991 critic_loss=127800173636.2667 entropy=17.6317 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 69620] reward=-117752801.5 actor_loss=0.2345 critic_loss=128279022411.2941 entropy=17.6316 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 69620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561238.2 mean_steps=14.6
|
|
[Episode 69630] reward=-116714417.4 actor_loss=0.3488 critic_loss=124917855153.2308 entropy=17.6326 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 69640] reward=-118045564.6 actor_loss=0.2224 critic_loss=131140867754.6667 entropy=17.6277 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 69640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582027.9 mean_steps=12.7
|
|
[Episode 69650] reward=-120113783.8 actor_loss=0.3114 critic_loss=125154213546.6667 entropy=17.6267 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 69660] reward=-119280220.9 actor_loss=0.2791 critic_loss=125261552864.7805 entropy=17.6324 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 69660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416552.6 mean_steps=16.6
|
|
[Episode 69670] reward=-117108521.6 actor_loss=0.2793 critic_loss=129007550464.0000 entropy=17.6178 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 69680] reward=-119463000.4 actor_loss=0.3403 critic_loss=126819241323.3548 entropy=17.6253 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 69680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431046.7 mean_steps=14.6
|
|
[Episode 69690] reward=-112357482.1 actor_loss=0.4264 critic_loss=116207607451.8261 entropy=17.6515 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 69700] reward=-111796368.0 actor_loss=0.3455 critic_loss=124245029410.1333 entropy=17.6505 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 69700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493518.4 mean_steps=14.2
|
|
[Episode 69710] reward=-114307005.8 actor_loss=0.3436 critic_loss=122190081675.6364 entropy=17.6457 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 69720] reward=-118056699.5 actor_loss=0.1755 critic_loss=134910732424.5333 entropy=17.6497 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 69720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500097.3 mean_steps=12.9
|
|
[Episode 69730] reward=-116829565.2 actor_loss=0.3088 critic_loss=129419823872.0000 entropy=17.6365 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 69740] reward=-116736269.4 actor_loss=0.2513 critic_loss=125273593593.4359 entropy=17.6573 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 69740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409991.0 mean_steps=15.3
|
|
[Episode 69750] reward=-106567147.0 actor_loss=0.3617 critic_loss=120547866965.3333 entropy=17.6478 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 69760] reward=-116679794.0 actor_loss=0.2275 critic_loss=123532461670.4000 entropy=17.6425 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 69760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-362664.6 mean_steps=15.8
|
|
[Episode 69770] reward=-116555739.4 actor_loss=0.2781 critic_loss=125041499553.1852 entropy=17.6396 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 69780] reward=-114149884.0 actor_loss=0.2844 critic_loss=128753788928.0000 entropy=17.6496 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 69780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445937.8 mean_steps=15.3
|
|
[Episode 69790] reward=-118638562.5 actor_loss=0.3573 critic_loss=129507824006.0952 entropy=17.6476 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 69800] reward=-116710366.0 actor_loss=0.3160 critic_loss=125642840622.5455 entropy=17.6445 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 69800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570631.2 mean_steps=13.4
|
|
[Episode 69810] reward=-116946941.5 actor_loss=0.3751 critic_loss=123679519357.1555 entropy=17.6454 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 69820] reward=-118517803.6 actor_loss=0.3057 critic_loss=126917053098.6667 entropy=17.6462 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 69820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572565.8 mean_steps=13.8
|
|
[Episode 69830] reward=-116921820.4 actor_loss=0.3486 critic_loss=126829970612.7059 entropy=17.6451 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 69840] reward=-117603593.9 actor_loss=0.3239 critic_loss=124385988980.3636 entropy=17.6428 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 69840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523723.6 mean_steps=13.9
|
|
[Episode 69850] reward=-114555610.3 actor_loss=0.3106 critic_loss=122548518663.7576 entropy=17.6518 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 69860] reward=-118954804.3 actor_loss=0.2879 critic_loss=130490319447.4146 entropy=17.6568 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 69860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560070.1 mean_steps=13.3
|
|
[Episode 69870] reward=-115405698.3 actor_loss=0.2933 critic_loss=124997734934.2609 entropy=17.6689 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 69880] reward=-113831417.0 actor_loss=0.3201 critic_loss=124459255808.0000 entropy=17.6628 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 69880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400943.7 mean_steps=15.4
|
|
[Episode 69890] reward=-118175326.9 actor_loss=0.3214 critic_loss=123499319144.2963 entropy=17.6508 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 69900] reward=-114477536.3 actor_loss=0.3554 critic_loss=124921338343.6190 entropy=17.6553 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 69900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-600923.1 mean_steps=14.1
|
|
[Episode 69910] reward=-116240096.1 actor_loss=0.2828 critic_loss=124541892380.4444 entropy=17.6529 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 69920] reward=-117147015.6 actor_loss=0.3333 critic_loss=132514417699.3103 entropy=17.6500 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 69920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422615.5 mean_steps=16.6
|
|
[Episode 69930] reward=-119715498.6 actor_loss=0.3089 critic_loss=127460450304.0000 entropy=17.6487 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 69940] reward=-119048224.0 actor_loss=0.3469 critic_loss=134996103623.1111 entropy=17.6599 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 69940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459207.2 mean_steps=15.7
|
|
[Episode 69950] reward=-121117796.0 actor_loss=0.2329 critic_loss=131233083240.2963 entropy=17.6622 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 69960] reward=-120295453.3 actor_loss=0.3096 critic_loss=138915900211.2000 entropy=17.6757 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 69960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506758.3 mean_steps=13.9
|
|
[Episode 69970] reward=-118477412.3 actor_loss=0.1691 critic_loss=131704529988.2667 entropy=17.6805 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 69980] reward=-116233228.1 actor_loss=0.2805 critic_loss=127579992215.7037 entropy=17.6861 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 69980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-675665.1 mean_steps=12.4
|
|
[Episode 69990] reward=-116920284.9 actor_loss=0.3180 critic_loss=127158182851.7647 entropy=17.6932 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 70000] reward=-121744045.3 actor_loss=0.2566 critic_loss=132488147399.1111 entropy=17.6931 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 70000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505535.9 mean_steps=14.5
|
|
[Episode 70010] reward=-116395348.9 actor_loss=0.3885 critic_loss=125325717362.7586 entropy=17.6950 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 70020] reward=-116354523.2 actor_loss=0.3291 critic_loss=132662893811.8095 entropy=17.6891 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 70020] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-233438.6 mean_steps=18.4
|
|
[Episode 70030] reward=-118247544.6 actor_loss=0.3284 critic_loss=129171082093.7143 entropy=17.6845 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 70040] reward=-114459304.3 actor_loss=0.3700 critic_loss=127389766087.1111 entropy=17.6831 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 70040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583821.5 mean_steps=12.6
|
|
[Episode 70050] reward=-115315243.2 actor_loss=0.4084 critic_loss=125609568392.5333 entropy=17.6867 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 70060] reward=-120151375.7 actor_loss=0.2747 critic_loss=131482562875.0769 entropy=17.6712 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 70060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417546.3 mean_steps=15.2
|
|
[Episode 70070] reward=-117062944.6 actor_loss=0.2321 critic_loss=123767045933.9487 entropy=17.6688 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 70080] reward=-118325978.5 actor_loss=0.3393 critic_loss=130574123008.0000 entropy=17.6554 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 70080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563318.5 mean_steps=12.8
|
|
[Episode 70090] reward=-113480040.7 actor_loss=0.3639 critic_loss=122551846684.4444 entropy=17.6632 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 70100] reward=-119223379.7 actor_loss=0.2870 critic_loss=130238614186.6667 entropy=17.6520 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 70100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486347.2 mean_steps=14.8
|
|
[Episode 70110] reward=-116709266.4 actor_loss=0.3281 critic_loss=127798101333.3333 entropy=17.6552 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 70120] reward=-122543001.4 actor_loss=0.2869 critic_loss=142014071265.8824 entropy=17.6623 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 70120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454114.2 mean_steps=15.9
|
|
[Episode 70130] reward=-118669696.5 actor_loss=0.2952 critic_loss=124808209944.3810 entropy=17.6618 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 70140] reward=-120250267.1 actor_loss=0.2164 critic_loss=131711833060.3243 entropy=17.6898 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 70140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534240.6 mean_steps=14.3
|
|
[Episode 70150] reward=-119078520.8 actor_loss=0.2851 critic_loss=124706725632.0000 entropy=17.6842 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 70160] reward=-121811629.6 actor_loss=0.2675 critic_loss=133934263059.6923 entropy=17.6839 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 70160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536166.0 mean_steps=14.2
|
|
[Episode 70170] reward=-113090280.3 actor_loss=0.3227 critic_loss=129045873717.8947 entropy=17.6778 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 70180] reward=-110984976.4 actor_loss=0.3188 critic_loss=126316855668.3636 entropy=17.6778 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 70180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446523.0 mean_steps=14.9
|
|
[Episode 70190] reward=-117864327.8 actor_loss=0.3112 critic_loss=126431855372.1905 entropy=17.6712 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 70200] reward=-119594678.9 actor_loss=0.2336 critic_loss=136217336945.7778 entropy=17.6520 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 70200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-384406.7 mean_steps=16.9
|
|
[Episode 70210] reward=-118640820.7 actor_loss=0.3035 critic_loss=132221198584.2424 entropy=17.6448 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 70220] reward=-120191531.4 actor_loss=0.1816 critic_loss=133175370898.2857 entropy=17.6433 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 70220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495386.0 mean_steps=14.5
|
|
[Episode 70230] reward=-116795033.3 actor_loss=0.2417 critic_loss=123963695104.0000 entropy=17.6448 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 70240] reward=-115160975.9 actor_loss=0.3311 critic_loss=127405306880.0000 entropy=17.6362 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 70240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378039.7 mean_steps=15.9
|
|
[Episode 70250] reward=-119622549.5 actor_loss=0.2878 critic_loss=133571006976.0000 entropy=17.6407 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 70260] reward=-110342913.2 actor_loss=0.3898 critic_loss=125303085056.0000 entropy=17.6383 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 70260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542103.9 mean_steps=14.7
|
|
[Episode 70270] reward=-110101320.5 actor_loss=0.2879 critic_loss=115984805763.8788 entropy=17.6423 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 70280] reward=-118348262.2 actor_loss=0.3568 critic_loss=130282113979.7333 entropy=17.6440 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 70280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491392.3 mean_steps=13.9
|
|
[Episode 70290] reward=-123027777.7 actor_loss=0.2505 critic_loss=135620819854.2222 entropy=17.6411 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 70300] reward=-128334479.2 actor_loss=0.2856 critic_loss=1469408371078.0952 entropy=17.6477 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 70300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565056.1 mean_steps=13.5
|
|
[Episode 70310] reward=-118196294.8 actor_loss=0.1881 critic_loss=121055049255.3846 entropy=17.6618 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 70320] reward=-118829283.8 actor_loss=0.2229 critic_loss=132114491187.2000 entropy=17.6716 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 70320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426925.5 mean_steps=14.6
|
|
[Episode 70330] reward=-113260477.7 actor_loss=0.2214 critic_loss=115918122139.1515 entropy=17.6754 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 70340] reward=-118579448.5 actor_loss=0.2668 critic_loss=131216054830.5455 entropy=17.6824 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 70340] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-293900.8 mean_steps=17.6
|
|
[Episode 70350] reward=-116372062.7 actor_loss=0.3157 critic_loss=125678323712.0000 entropy=17.6810 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 70360] reward=-116908401.0 actor_loss=0.2921 critic_loss=125043326533.1892 entropy=17.6775 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 70360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-617095.9 mean_steps=12.9
|
|
[Episode 70370] reward=-113315356.0 actor_loss=0.3901 critic_loss=125433090770.8235 entropy=17.6766 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 70380] reward=-119062225.1 actor_loss=0.2464 critic_loss=126018169304.6154 entropy=17.6639 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 70380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-355642.4 mean_steps=16.2
|
|
[Episode 70390] reward=-115312603.9 actor_loss=0.3817 critic_loss=124957586882.5600 entropy=17.6705 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 70400] reward=-121047677.6 actor_loss=0.4054 critic_loss=131133442234.1818 entropy=17.6686 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 70400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597690.1 mean_steps=13.1
|
|
[Episode 70410] reward=-122197112.3 actor_loss=0.3462 critic_loss=132070872405.3333 entropy=17.6583 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 70420] reward=-119906252.3 actor_loss=0.2493 critic_loss=129421718732.8000 entropy=17.6502 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 70420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578229.3 mean_steps=13.6
|
|
[Episode 70430] reward=-113811880.4 actor_loss=0.3002 critic_loss=121312932132.5714 entropy=17.6479 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 70440] reward=-114639923.9 actor_loss=0.3390 critic_loss=125209270085.8182 entropy=17.6405 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 70440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432535.5 mean_steps=15.8
|
|
[Episode 70450] reward=-119703558.2 actor_loss=0.2821 critic_loss=131718631879.1111 entropy=17.6442 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 70460] reward=-118635146.0 actor_loss=0.3104 critic_loss=137012491468.8000 entropy=17.6494 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 70460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537968.4 mean_steps=13.4
|
|
[Episode 70470] reward=-117961891.5 actor_loss=0.1420 critic_loss=127035468800.0000 entropy=17.6522 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 70480] reward=-115032003.4 actor_loss=0.3714 critic_loss=123866382882.1333 entropy=17.6645 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 70480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520940.9 mean_steps=13.7
|
|
[Episode 70490] reward=-118155742.9 actor_loss=0.2083 critic_loss=125087634773.3333 entropy=17.6632 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 70500] reward=-120118016.3 actor_loss=0.2831 critic_loss=137266107343.2381 entropy=17.6716 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 70500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-404167.6 mean_steps=14.8
|
|
[Episode 70510] reward=-116327234.1 actor_loss=0.3675 critic_loss=122842207027.2000 entropy=17.6693 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 70520] reward=-114707870.5 actor_loss=0.2987 critic_loss=125132789623.4667 entropy=17.6570 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 70520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-453957.6 mean_steps=13.8
|
|
[Episode 70530] reward=-118060696.4 actor_loss=0.3077 critic_loss=125630503139.5556 entropy=17.6703 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 70540] reward=-116858204.7 actor_loss=0.3857 critic_loss=124713384618.6667 entropy=17.6650 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 70540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587383.6 mean_steps=13.5
|
|
[Episode 70550] reward=-115294031.3 actor_loss=0.3521 critic_loss=121497745817.6000 entropy=17.6520 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 70560] reward=-118144911.6 actor_loss=0.2733 critic_loss=130202404953.0435 entropy=17.6503 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 70560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590750.1 mean_steps=12.9
|
|
[Episode 70570] reward=-117132668.3 actor_loss=0.2045 critic_loss=126847590043.8261 entropy=17.6447 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 70580] reward=-118993797.7 actor_loss=0.3440 critic_loss=134953299558.4000 entropy=17.6328 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 70580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523073.6 mean_steps=13.3
|
|
[Episode 70590] reward=-117729585.4 actor_loss=0.2706 critic_loss=124080346271.2889 entropy=17.6407 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 70600] reward=-117017757.6 actor_loss=0.4154 critic_loss=128871045618.1622 entropy=17.6418 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 70600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-356474.2 mean_steps=17.1
|
|
[Episode 70610] reward=-114491292.4 actor_loss=0.3242 critic_loss=121828998621.8667 entropy=17.6618 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 70620] reward=-117652592.3 actor_loss=0.2627 critic_loss=131840641994.1053 entropy=17.6701 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 70620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-392000.4 mean_steps=17.4
|
|
[Episode 70630] reward=-113559951.2 actor_loss=0.3815 critic_loss=124602904576.0000 entropy=17.6650 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 70640] reward=-121623036.8 actor_loss=0.3122 critic_loss=128878409045.3333 entropy=17.6642 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 70640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542180.5 mean_steps=13.7
|
|
[Episode 70650] reward=-117561656.6 actor_loss=0.1906 critic_loss=125644060792.4706 entropy=17.6661 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 70660] reward=-119180822.6 actor_loss=0.3295 critic_loss=132692746811.5349 entropy=17.6578 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 70660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525280.1 mean_steps=13.2
|
|
[Episode 70670] reward=-118166004.1 actor_loss=0.3388 critic_loss=130713648215.7714 entropy=17.6667 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 70680] reward=-111844329.5 actor_loss=0.3436 critic_loss=119579527613.2174 entropy=17.6633 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 70680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449079.7 mean_steps=15.9
|
|
[Episode 70690] reward=-115067233.8 actor_loss=0.3301 critic_loss=123576405040.7619 entropy=17.6642 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 70700] reward=-117649592.1 actor_loss=0.3918 critic_loss=123946585656.8889 entropy=17.6723 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 70700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486613.8 mean_steps=15.1
|
|
[Episode 70710] reward=-118538603.1 actor_loss=0.2916 critic_loss=131253391360.0000 entropy=17.6863 approx_kl=0.0119 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 70720] reward=-119452311.1 actor_loss=0.2786 critic_loss=128799721062.4000 entropy=17.6919 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 70720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587054.3 mean_steps=12.8
|
|
[Episode 70730] reward=-115100413.3 actor_loss=0.2879 critic_loss=152263006021.8182 entropy=17.7045 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 70740] reward=-118185142.7 actor_loss=0.2426 critic_loss=132919585147.2593 entropy=17.7071 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 70740] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-320747.0 mean_steps=18.1
|
|
[Episode 70750] reward=-116931474.1 actor_loss=0.2416 critic_loss=126977631573.3333 entropy=17.7071 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 70760] reward=-119917985.3 actor_loss=0.3157 critic_loss=131615547782.0952 entropy=17.7066 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 70760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-375781.0 mean_steps=15.3
|
|
[Episode 70770] reward=-116741394.5 actor_loss=0.2721 critic_loss=122597955356.4444 entropy=17.7038 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 70780] reward=-116547831.8 actor_loss=0.3713 critic_loss=124364377770.6667 entropy=17.6974 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 70780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531363.3 mean_steps=13.6
|
|
[Episode 70790] reward=-122880502.4 actor_loss=0.2504 critic_loss=132692726735.2381 entropy=17.7030 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 70800] reward=-113658684.8 actor_loss=0.3111 critic_loss=128158624256.0000 entropy=17.7072 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 70800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-559943.0 mean_steps=14.6
|
|
[Episode 70810] reward=-117408727.2 actor_loss=0.3477 critic_loss=128036326968.8889 entropy=17.7090 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 70820] reward=-118185032.5 actor_loss=0.4104 critic_loss=129201911249.4545 entropy=17.7264 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 70820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476036.8 mean_steps=13.8
|
|
[Episode 70830] reward=-115788374.8 actor_loss=0.2605 critic_loss=126810315161.6000 entropy=17.7198 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 70840] reward=-115852736.4 actor_loss=0.2514 critic_loss=128241081046.7097 entropy=17.7142 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 70840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-568303.8 mean_steps=12.5
|
|
[Episode 70850] reward=-117989405.1 actor_loss=0.2575 critic_loss=130476119276.3077 entropy=17.7084 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 70860] reward=-120183726.7 actor_loss=0.1971 critic_loss=130079997761.4884 entropy=17.7087 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 70860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506847.5 mean_steps=13.4
|
|
[Episode 70870] reward=-112311334.4 actor_loss=0.3107 critic_loss=124631018609.7778 entropy=17.7018 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 70880] reward=-115182591.6 actor_loss=0.2614 critic_loss=131509332650.6667 entropy=17.7047 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 70880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566231.7 mean_steps=12.6
|
|
[Episode 70890] reward=-119156685.5 actor_loss=0.2684 critic_loss=129826688808.4211 entropy=17.6886 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 70900] reward=-119763329.9 actor_loss=0.2492 critic_loss=127823813138.9630 entropy=17.6946 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 70900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554397.4 mean_steps=13.4
|
|
[Episode 70910] reward=-122479837.7 actor_loss=0.2471 critic_loss=137369752234.6667 entropy=17.6878 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 70920] reward=-116881925.5 actor_loss=0.3069 critic_loss=127033843029.3333 entropy=17.6948 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 70920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488726.6 mean_steps=14.1
|
|
[Episode 70930] reward=-116681463.5 actor_loss=0.2721 critic_loss=127778982045.5385 entropy=17.6980 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 70940] reward=-120338116.1 actor_loss=0.2796 critic_loss=128471576700.1212 entropy=17.6927 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 70940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581734.6 mean_steps=12.8
|
|
[Episode 70950] reward=-115430800.3 actor_loss=0.2569 critic_loss=123910182157.4737 entropy=17.6918 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 70960] reward=-118463492.2 actor_loss=0.2526 critic_loss=129806113522.5263 entropy=17.6715 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 70960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-435450.2 mean_steps=17.9
|
|
[Episode 70970] reward=-121840335.7 actor_loss=0.3264 critic_loss=132155529058.4615 entropy=17.6771 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 70980] reward=-118430201.0 actor_loss=0.2417 critic_loss=128617435591.1111 entropy=17.6791 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 70980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548508.8 mean_steps=12.6
|
|
[Episode 70990] reward=-121344232.6 actor_loss=0.2617 critic_loss=134361224657.4545 entropy=17.6941 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 71000] reward=-121714587.2 actor_loss=0.3457 critic_loss=135049882828.8000 entropy=17.6910 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 71000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564294.0 mean_steps=13.6
|
|
[Episode 71010] reward=-114227025.8 actor_loss=0.3044 critic_loss=130532487300.1290 entropy=17.6923 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 71020] reward=-119323432.7 actor_loss=0.2168 critic_loss=134572469513.4815 entropy=17.6942 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 71020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-526576.6 mean_steps=15.5
|
|
[Episode 71030] reward=-114680084.5 actor_loss=0.4056 critic_loss=157006024388.9231 entropy=17.7093 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 71040] reward=-115470421.0 actor_loss=0.3344 critic_loss=124172051342.2222 entropy=17.7255 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 71040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-615660.6 mean_steps=11.9
|
|
[Episode 71050] reward=-117375219.1 actor_loss=0.3165 critic_loss=127570674944.0000 entropy=17.7179 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 71060] reward=-117604325.5 actor_loss=0.3015 critic_loss=132199150387.2000 entropy=17.7199 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 71060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526034.3 mean_steps=13.2
|
|
[Episode 71070] reward=-117216408.9 actor_loss=0.2297 critic_loss=132298402786.7429 entropy=17.7201 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 71080] reward=-116891946.6 actor_loss=0.3710 critic_loss=130726764544.0000 entropy=17.7150 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 71080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-486873.0 mean_steps=15.7
|
|
[Episode 71090] reward=-113392923.6 actor_loss=0.3725 critic_loss=121531002606.9333 entropy=17.7185 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 71100] reward=-120187978.4 actor_loss=0.2890 critic_loss=127817423189.3333 entropy=17.7150 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 71100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-653961.2 mean_steps=12.1
|
|
[Episode 71110] reward=-119475794.9 actor_loss=0.3218 critic_loss=125452071335.7241 entropy=17.7209 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 71120] reward=-112451211.3 actor_loss=0.2412 critic_loss=124333312558.5455 entropy=17.7132 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 71120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480198.5 mean_steps=15.1
|
|
[Episode 71130] reward=-113134226.2 actor_loss=0.3563 critic_loss=120997649729.8286 entropy=17.7115 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 71140] reward=-117196532.2 actor_loss=0.4059 critic_loss=125850699851.8519 entropy=17.7033 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 71140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432073.3 mean_steps=16.0
|
|
[Episode 71150] reward=-116142872.6 actor_loss=0.2965 critic_loss=132526070988.8000 entropy=17.7091 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 71160] reward=-119173538.9 actor_loss=0.3633 critic_loss=131073922885.8182 entropy=17.7180 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 71160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429630.8 mean_steps=15.7
|
|
[Episode 71170] reward=-121967125.6 actor_loss=0.2917 critic_loss=163319800164.1739 entropy=17.7151 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 71180] reward=-117115885.5 actor_loss=0.3166 critic_loss=128182974102.5882 entropy=17.7115 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 71180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600034.0 mean_steps=12.7
|
|
[Episode 71190] reward=-119137978.5 actor_loss=0.3864 critic_loss=131550550173.5385 entropy=17.7008 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 71200] reward=-122326892.5 actor_loss=0.3032 critic_loss=134633221198.7692 entropy=17.6993 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 71200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443171.1 mean_steps=15.4
|
|
[Episode 71210] reward=-117806487.6 actor_loss=0.2688 critic_loss=123597241856.0000 entropy=17.6969 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 71220] reward=-114915499.4 actor_loss=0.3321 critic_loss=124270419285.3333 entropy=17.6910 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 71220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499107.4 mean_steps=14.1
|
|
[Episode 71230] reward=-119547052.4 actor_loss=0.2773 critic_loss=134152354394.3529 entropy=17.6886 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 71240] reward=-117424254.9 actor_loss=0.2750 critic_loss=125017401246.4762 entropy=17.6699 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 71240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427058.6 mean_steps=14.7
|
|
[Episode 71250] reward=-114042243.4 actor_loss=0.4217 critic_loss=129356611584.0000 entropy=17.6784 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 71260] reward=-120956752.2 actor_loss=0.3597 critic_loss=132271515089.4545 entropy=17.6780 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 71260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-561581.6 mean_steps=13.1
|
|
[Episode 71270] reward=-114514060.8 actor_loss=0.2963 critic_loss=125193668864.0000 entropy=17.6702 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 71280] reward=-114624194.7 actor_loss=0.2911 critic_loss=122888306688.0000 entropy=17.6527 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 71280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-584648.1 mean_steps=14.1
|
|
[Episode 71290] reward=-115296148.8 actor_loss=0.2697 critic_loss=126141082158.5455 entropy=17.6420 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 71300] reward=-117707015.0 actor_loss=0.2487 critic_loss=128851619384.8889 entropy=17.6313 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 71300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508015.8 mean_steps=14.0
|
|
[Episode 71310] reward=-122053915.2 actor_loss=0.1941 critic_loss=131931366058.6667 entropy=17.6323 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 71320] reward=-122228446.1 actor_loss=0.2724 critic_loss=133673358801.4545 entropy=17.6309 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 71320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-481658.4 mean_steps=13.2
|
|
[Episode 71330] reward=-114955192.7 actor_loss=0.3300 critic_loss=144609817334.5185 entropy=17.6244 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 71340] reward=-118070636.9 actor_loss=0.2977 critic_loss=129374051328.0000 entropy=17.6209 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 71340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-628023.6 mean_steps=12.1
|
|
[Episode 71350] reward=-118762134.0 actor_loss=0.1816 critic_loss=125231748534.8571 entropy=17.6106 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 71360] reward=-114809849.7 actor_loss=0.3323 critic_loss=129343174726.6207 entropy=17.6188 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 71360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626067.8 mean_steps=11.9
|
|
[Episode 71370] reward=-114470148.5 actor_loss=0.3737 critic_loss=117964483885.1765 entropy=17.6170 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 71380] reward=-118638876.9 actor_loss=0.3340 critic_loss=130631648256.0000 entropy=17.6099 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 71380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-490294.9 mean_steps=16.6
|
|
[Episode 71390] reward=-115064860.6 actor_loss=0.3753 critic_loss=126099050203.4286 entropy=17.6115 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 71400] reward=-117388726.8 actor_loss=0.3023 critic_loss=122683890483.2000 entropy=17.6107 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 71400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603208.6 mean_steps=12.8
|
|
[Episode 71410] reward=-120512860.9 actor_loss=0.3346 critic_loss=129961143910.4000 entropy=17.6110 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 71420] reward=-119261234.8 actor_loss=0.2842 critic_loss=128295065041.4545 entropy=17.6056 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 71420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-428149.5 mean_steps=17.5
|
|
[Episode 71430] reward=-114948419.7 actor_loss=0.3884 critic_loss=131342350522.1818 entropy=17.5996 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 71440] reward=-118561704.4 actor_loss=0.2323 critic_loss=128187934827.7895 entropy=17.5917 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 71440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510397.9 mean_steps=15.2
|
|
[Episode 71450] reward=-113875081.8 actor_loss=0.2843 critic_loss=131054448054.8571 entropy=17.6057 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 71460] reward=-117097488.9 actor_loss=0.3290 critic_loss=125521762011.4286 entropy=17.5982 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 71460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550245.8 mean_steps=12.3
|
|
[Episode 71470] reward=-113404819.4 actor_loss=0.2949 critic_loss=119331745336.8889 entropy=17.5943 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 71480] reward=-119814029.2 actor_loss=0.3421 critic_loss=127785279131.8261 entropy=17.5895 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 71480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461517.9 mean_steps=14.9
|
|
[Episode 71490] reward=-119392161.0 actor_loss=0.1898 critic_loss=129255954773.3333 entropy=17.5695 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 71500] reward=-118538350.1 actor_loss=0.3335 critic_loss=130888457584.6400 entropy=17.5563 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 71500] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-302884.0 mean_steps=16.4
|
|
[Episode 71510] reward=-119273710.9 actor_loss=0.3110 critic_loss=134559777938.2857 entropy=17.5611 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 71520] reward=-110401085.6 actor_loss=0.2629 critic_loss=114109106858.6667 entropy=17.5706 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 71520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492128.4 mean_steps=15.0
|
|
[Episode 71530] reward=-114002465.2 actor_loss=0.3658 critic_loss=124100454257.1163 entropy=17.5727 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 71540] reward=-115794949.3 actor_loss=0.2590 critic_loss=123056627463.7576 entropy=17.5789 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 71540] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-676109.5 mean_steps=10.5
|
|
[Episode 71550] reward=-118996914.3 actor_loss=0.3009 critic_loss=130983336056.4706 entropy=17.5717 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 71560] reward=-112670598.2 actor_loss=0.2649 critic_loss=124782162664.7273 entropy=17.5689 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 71560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-672236.1 mean_steps=13.2
|
|
[Episode 71570] reward=-116779161.2 actor_loss=0.2750 critic_loss=123968645178.5143 entropy=17.5816 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 71580] reward=-117749325.8 actor_loss=0.2543 critic_loss=124050773138.2857 entropy=17.5835 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 71580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590658.9 mean_steps=13.8
|
|
[Episode 71590] reward=-116217929.2 actor_loss=0.2809 critic_loss=124424648801.5238 entropy=17.5751 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 71600] reward=-115388841.1 actor_loss=0.2338 critic_loss=122502498906.3529 entropy=17.5623 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 71600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580641.0 mean_steps=12.9
|
|
[Episode 71610] reward=-118827436.4 actor_loss=0.2798 critic_loss=122783756492.8000 entropy=17.5734 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 71620] reward=-113567148.9 actor_loss=0.3818 critic_loss=126586328746.6667 entropy=17.5768 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 71620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532850.0 mean_steps=13.6
|
|
[Episode 71630] reward=-117893671.5 actor_loss=0.4168 critic_loss=131340207941.8182 entropy=17.5761 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 71640] reward=-116835288.0 actor_loss=0.2129 critic_loss=125550689441.6842 entropy=17.5640 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 71640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-356841.6 mean_steps=14.9
|
|
[Episode 71650] reward=-118882460.6 actor_loss=0.3208 critic_loss=246768344784.5926 entropy=17.5674 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 71660] reward=-115579684.3 actor_loss=0.2952 critic_loss=127036458028.5217 entropy=17.5796 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 71660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-433956.5 mean_steps=16.6
|
|
[Episode 71670] reward=-114460727.9 actor_loss=0.2226 critic_loss=122268605170.5263 entropy=17.5730 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 71680] reward=-115572928.5 actor_loss=0.3322 critic_loss=123967715425.5238 entropy=17.5684 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 71680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507983.5 mean_steps=13.2
|
|
[Episode 71690] reward=-110172407.0 actor_loss=0.3054 critic_loss=114647369728.0000 entropy=17.5624 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 71700] reward=-119495737.3 actor_loss=0.2533 critic_loss=131989779322.4348 entropy=17.5712 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 71700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606600.8 mean_steps=13.2
|
|
[Episode 71710] reward=-116697794.3 actor_loss=0.3391 critic_loss=129809619041.5238 entropy=17.5817 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 71720] reward=-118105762.3 actor_loss=0.3497 critic_loss=138552187634.5263 entropy=17.5801 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 71720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-683446.7 mean_steps=12.4
|
|
[Episode 71730] reward=-118155805.6 actor_loss=0.3293 critic_loss=129487207424.0000 entropy=17.5875 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 71740] reward=-117345482.6 actor_loss=0.2793 critic_loss=123748300920.4706 entropy=17.5891 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 71740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523805.1 mean_steps=14.2
|
|
[Episode 71750] reward=-116083762.2 actor_loss=0.2783 critic_loss=123668253481.6744 entropy=17.5907 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 71760] reward=-117032167.2 actor_loss=0.2485 critic_loss=120167374438.4000 entropy=17.5979 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 71760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-585207.7 mean_steps=11.8
|
|
[Episode 71770] reward=-124767641.2 actor_loss=0.2028 critic_loss=137208381732.5714 entropy=17.5779 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 71780] reward=-111512309.9 actor_loss=0.3261 critic_loss=118265563709.4400 entropy=17.5714 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 71780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473331.0 mean_steps=14.7
|
|
[Episode 71790] reward=-124074438.0 actor_loss=0.2923 critic_loss=131161979438.5455 entropy=17.5550 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 71800] reward=-115736938.2 actor_loss=0.2912 critic_loss=127504878250.6667 entropy=17.5527 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 71800] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-688138.2 mean_steps=10.7
|
|
[Episode 71810] reward=-119399515.5 actor_loss=0.3274 critic_loss=131092423338.6667 entropy=17.5463 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 71820] reward=-120229291.8 actor_loss=0.2297 critic_loss=131607560378.1818 entropy=17.5474 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 71820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-501599.7 mean_steps=12.3
|
|
[Episode 71830] reward=-112861802.1 actor_loss=0.2802 critic_loss=118417163176.2286 entropy=17.5486 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 71840] reward=-113479112.0 actor_loss=0.1829 critic_loss=119610971914.2400 entropy=17.5451 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 71840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626804.7 mean_steps=11.8
|
|
[Episode 71850] reward=-120381517.8 actor_loss=0.2715 critic_loss=133457786368.0000 entropy=17.5432 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 71860] reward=-112640978.7 actor_loss=0.3096 critic_loss=122399321353.4815 entropy=17.5331 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 71860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503498.2 mean_steps=15.2
|
|
[Episode 71870] reward=-115069124.3 actor_loss=0.2493 critic_loss=119926595584.0000 entropy=17.5367 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 71880] reward=-116028118.8 actor_loss=0.3269 critic_loss=124074905972.3636 entropy=17.5476 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 71880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-480103.6 mean_steps=16.1
|
|
[Episode 71890] reward=-116816300.7 actor_loss=0.1741 critic_loss=120239709851.8261 entropy=17.5508 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 71900] reward=-117246022.3 actor_loss=0.2997 critic_loss=123791430400.0000 entropy=17.5425 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 71900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524918.5 mean_steps=13.3
|
|
[Episode 71910] reward=-110793298.6 actor_loss=0.3832 critic_loss=132459107123.2000 entropy=17.5334 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 71920] reward=-110873796.4 actor_loss=0.2817 critic_loss=119852817066.6667 entropy=17.5504 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 71920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495515.8 mean_steps=15.2
|
|
[Episode 71930] reward=-117679583.0 actor_loss=0.2126 critic_loss=129579078113.8824 entropy=17.5511 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 71940] reward=-113126825.2 actor_loss=0.2255 critic_loss=114345806961.7778 entropy=17.5404 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 71940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-408695.4 mean_steps=16.4
|
|
[Episode 71950] reward=-116605517.4 actor_loss=0.2961 critic_loss=125960552057.9048 entropy=17.5376 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 71960] reward=-116946441.3 actor_loss=0.2721 critic_loss=124180057960.2963 entropy=17.5562 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 71960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-531934.9 mean_steps=12.7
|
|
[Episode 71970] reward=-121085620.7 actor_loss=0.2129 critic_loss=131760377692.1600 entropy=17.5645 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 71980] reward=-114850377.9 actor_loss=0.4111 critic_loss=121499531574.3030 entropy=17.5653 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 71980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-460327.8 mean_steps=16.1
|
|
[Episode 71990] reward=-118650231.6 actor_loss=0.2791 critic_loss=130024299026.9630 entropy=17.5634 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 72000] reward=-117674338.0 actor_loss=0.3742 critic_loss=132386376089.6000 entropy=17.5513 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 72000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-468600.6 mean_steps=16.4
|
|
[Episode 72010] reward=-116343964.6 actor_loss=0.3355 critic_loss=135229147363.5556 entropy=17.5535 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 72020] reward=-115064740.5 actor_loss=0.3206 critic_loss=122208172655.3044 entropy=17.5556 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 72020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-590126.0 mean_steps=11.8
|
|
[Episode 72030] reward=-121105923.7 actor_loss=0.2067 critic_loss=126488488072.5333 entropy=17.5432 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 72040] reward=-113386518.1 actor_loss=0.2982 critic_loss=119908575641.6000 entropy=17.5414 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 72040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-460421.2 mean_steps=16.2
|
|
[Episode 72050] reward=-110640418.3 actor_loss=0.3028 critic_loss=121665219632.7619 entropy=17.5553 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 72060] reward=-111614409.6 actor_loss=0.2799 critic_loss=116856181077.3333 entropy=17.5645 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 72060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-503024.8 mean_steps=16.5
|
|
[Episode 72070] reward=-109791873.1 actor_loss=0.3939 critic_loss=119802724165.8182 entropy=17.5850 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 72080] reward=-118988266.7 actor_loss=0.2473 critic_loss=127839275083.8519 entropy=17.6014 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 72080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-607063.0 mean_steps=12.1
|
|
[Episode 72090] reward=-121176527.2 actor_loss=0.2837 critic_loss=131713691062.8571 entropy=17.5890 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 72100] reward=-121519023.2 actor_loss=0.2857 critic_loss=350826229304.8889 entropy=17.5834 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 72100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-606041.9 mean_steps=14.1
|
|
[Episode 72110] reward=-121313156.3 actor_loss=0.3561 critic_loss=261469055853.7143 entropy=17.5892 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 72120] reward=-116173334.1 actor_loss=0.3029 critic_loss=124982595451.8710 entropy=17.6001 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 72120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440697.8 mean_steps=14.9
|
|
[Episode 72130] reward=-114975600.5 actor_loss=0.4040 critic_loss=130734086826.6667 entropy=17.5993 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 72140] reward=-114855988.1 actor_loss=0.2716 critic_loss=129515220764.4444 entropy=17.6039 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 72140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547301.3 mean_steps=14.3
|
|
[Episode 72150] reward=-114050921.4 actor_loss=0.3123 critic_loss=122836273652.6222 entropy=17.5958 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 72160] reward=-114977681.9 actor_loss=0.2593 critic_loss=130848283989.3333 entropy=17.6121 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 72160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491376.2 mean_steps=15.2
|
|
[Episode 72170] reward=-120204582.5 actor_loss=0.2426 critic_loss=127610653602.9091 entropy=17.6213 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 72180] reward=-115372354.9 actor_loss=0.3131 critic_loss=130082695547.2593 entropy=17.6246 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 72180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421497.0 mean_steps=15.9
|
|
[Episode 72190] reward=-206176080.0 actor_loss=4.4348 critic_loss=29260507571086.2227 entropy=17.6350 approx_kl=0.0044 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 72200] reward=-124785083.5 actor_loss=0.3086 critic_loss=420606497713.2308 entropy=17.6339 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 72200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426845.0 mean_steps=14.7
|
|
[Episode 72210] reward=-118518519.5 actor_loss=0.2328 critic_loss=124139075646.0606 entropy=17.6296 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 72220] reward=-112097345.8 actor_loss=0.3725 critic_loss=121153910404.7407 entropy=17.6308 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 72220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-471012.4 mean_steps=15.8
|
|
[Episode 72230] reward=-114703798.1 actor_loss=0.2414 critic_loss=129872851968.0000 entropy=17.6229 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 72240] reward=-117867282.2 actor_loss=0.3735 critic_loss=126312511351.4667 entropy=17.6161 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 72240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464105.1 mean_steps=15.9
|
|
[Episode 72250] reward=-114474717.5 actor_loss=0.2922 critic_loss=117911054581.7600 entropy=17.6320 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 72260] reward=-113676669.8 actor_loss=0.4178 critic_loss=118172670414.4516 entropy=17.6326 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 72260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-541675.4 mean_steps=12.2
|
|
[Episode 72270] reward=-118515660.1 actor_loss=0.2931 critic_loss=130077547568.7619 entropy=17.6275 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 72280] reward=-120560063.5 actor_loss=0.3368 critic_loss=130690439249.9200 entropy=17.6210 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 72280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-639684.6 mean_steps=12.9
|
|
[Episode 72290] reward=-587850967.9 actor_loss=3.7464 critic_loss=453489575661841.0625 entropy=17.6146 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 72300] reward=-114119501.3 actor_loss=0.3032 critic_loss=121097857469.2174 entropy=17.6186 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 72300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-569018.2 mean_steps=14.8
|
|
[Episode 72310] reward=-117451610.8 actor_loss=0.3408 critic_loss=128671980122.3529 entropy=17.6100 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 72320] reward=-112018857.6 actor_loss=0.3166 critic_loss=119276146141.8667 entropy=17.6122 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 72320] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-648184.1 mean_steps=11.3
|
|
[Episode 72330] reward=-122239040.8 actor_loss=0.2487 critic_loss=144577381662.7200 entropy=17.6188 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 72340] reward=-117234507.8 actor_loss=0.1825 critic_loss=130193899975.1111 entropy=17.6177 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 72340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598103.4 mean_steps=12.9
|
|
[Episode 72350] reward=-118218769.1 actor_loss=0.3318 critic_loss=139929352874.6667 entropy=17.6102 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 72360] reward=-121762070.1 actor_loss=0.3020 critic_loss=129298241080.8889 entropy=17.6192 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 72360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570379.5 mean_steps=13.7
|
|
[Episode 72370] reward=-117695276.7 actor_loss=0.2787 critic_loss=124783254277.6889 entropy=17.6198 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 72380] reward=-117832284.6 actor_loss=0.0898 critic_loss=124243619547.4286 entropy=17.6376 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 72380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501250.6 mean_steps=14.2
|
|
[Episode 72390] reward=-118941014.6 actor_loss=0.1357 critic_loss=130403622383.4839 entropy=17.6443 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 72400] reward=-140917240.0 actor_loss=0.3478 critic_loss=1914651918832.4849 entropy=17.6623 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 72400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626317.7 mean_steps=11.8
|
|
[Episode 72410] reward=-117889223.3 actor_loss=0.2474 critic_loss=127461813760.0000 entropy=17.6711 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 72420] reward=-236137200.0 actor_loss=0.3312 critic_loss=51340225293445.5625 entropy=17.6841 approx_kl=0.0042 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 72420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571532.8 mean_steps=12.9
|
|
[Episode 72430] reward=-149153366.8 actor_loss=0.3633 critic_loss=4361032920086.7554 entropy=17.6908 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 72440] reward=-110437815.4 actor_loss=0.3360 critic_loss=119408369117.8667 entropy=17.7003 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 72440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-374830.6 mean_steps=15.4
|
|
[Episode 72450] reward=-117205218.2 actor_loss=0.3525 critic_loss=129483235328.0000 entropy=17.7155 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 72460] reward=-118443641.7 actor_loss=0.2950 critic_loss=131069382283.6364 entropy=17.7329 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 72460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617832.4 mean_steps=12.1
|
|
[Episode 72470] reward=-92326257395.7 actor_loss=583.2700 critic_loss=2858631734300944896.0000 entropy=17.7367 approx_kl=0.0631 kl_stop=1 intervention_rate=0.0306 front_blocked=0
|
|
[Episode 72480] reward=-118311468.0 actor_loss=0.3552 critic_loss=129790138368.0000 entropy=17.7405 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 72480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-382409.0 mean_steps=17.9
|
|
[Episode 72490] reward=-121101470.0 actor_loss=0.2386 critic_loss=130266896793.6000 entropy=17.7447 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 72500] reward=-117530087.5 actor_loss=0.3684 critic_loss=128078298654.1176 entropy=17.7505 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 72500] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-375561.4 mean_steps=17.1
|
|
[Episode 72510] reward=-18682953443.7 actor_loss=523.5656 critic_loss=239299597782354784.0000 entropy=17.7578 approx_kl=0.0058 kl_stop=0 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 72520] reward=-8932512204.1 actor_loss=0.2178 critic_loss=79970832098851408.0000 entropy=17.7620 approx_kl=-0.0017 kl_stop=0 intervention_rate=0.0990 front_blocked=0
|
|
[Eval 72520] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-687630.6 mean_steps=11.7
|
|
[Episode 72530] reward=-14868395176.5 actor_loss=3.4081 critic_loss=233094857052756384.0000 entropy=17.7597 approx_kl=0.3869 kl_stop=1 intervention_rate=0.1003 front_blocked=0
|
|
[Episode 72540] reward=-46327853795.7 actor_loss=181.3190 critic_loss=1675330082036864256.0000 entropy=17.7646 approx_kl=0.3769 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 72540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-375059.7 mean_steps=17.1
|
|
[Episode 72550] reward=-122380409.8 actor_loss=0.3419 critic_loss=138483080487.8222 entropy=17.7700 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 72560] reward=-469106981.0 actor_loss=0.2076 critic_loss=391666585701216.6875 entropy=17.7766 approx_kl=0.0006 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 72560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489974.9 mean_steps=15.3
|
|
[Episode 72570] reward=-620384149.5 actor_loss=8.6500 critic_loss=728698803541333.3750 entropy=17.7734 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 72580] reward=-516749348.7 actor_loss=2.1963 critic_loss=432375000069279.3125 entropy=17.7784 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 72580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574583.0 mean_steps=13.6
|
|
[Episode 72590] reward=-444136247.0 actor_loss=0.2570 critic_loss=331057732663523.5625 entropy=17.7921 approx_kl=0.0011 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 72600] reward=-118106250.3 actor_loss=0.2313 critic_loss=134232175469.7143 entropy=17.8027 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 72600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513565.0 mean_steps=14.0
|
|
[Episode 72610] reward=-118701817.2 actor_loss=0.2178 critic_loss=168714480298.6667 entropy=17.7984 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 72620] reward=-120756600.0 actor_loss=0.2272 critic_loss=135809202778.3529 entropy=17.8001 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 72620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592264.9 mean_steps=12.6
|
|
[Episode 72630] reward=-112468107.7 actor_loss=0.3674 critic_loss=124165871115.3778 entropy=17.7992 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 72640] reward=-112721635.3 actor_loss=0.2708 critic_loss=125056733821.1555 entropy=17.8003 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 72640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545334.5 mean_steps=13.4
|
|
[Episode 72650] reward=-119792146.9 actor_loss=0.3009 critic_loss=135364172109.3954 entropy=17.8099 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 72660] reward=-169542968.6 actor_loss=0.3420 critic_loss=12453129291408.4102 entropy=17.8083 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 72660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548772.6 mean_steps=14.3
|
|
[Episode 72670] reward=-111051507.1 actor_loss=0.3595 critic_loss=120236237970.2857 entropy=17.7731 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 72680] reward=-117915208.2 actor_loss=0.3885 critic_loss=130207268864.0000 entropy=17.7740 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 72680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588159.3 mean_steps=13.5
|
|
[Episode 72690] reward=-121966781.0 actor_loss=0.3091 critic_loss=138761368420.8485 entropy=17.7799 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 72700] reward=-118955183.8 actor_loss=0.2483 critic_loss=130897256083.9111 entropy=17.7847 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 72700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521373.0 mean_steps=13.1
|
|
[Episode 72710] reward=-883494419.2 actor_loss=1.4471 critic_loss=1645600418109253.7500 entropy=17.7707 approx_kl=0.0017 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 72720] reward=-119759080.7 actor_loss=0.2547 critic_loss=129584221525.3333 entropy=17.7726 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 72720] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-729829.9 mean_steps=10.7
|
|
[Episode 72730] reward=-120470486.4 actor_loss=0.3338 critic_loss=134373423261.5385 entropy=17.7660 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 72740] reward=-119298469.5 actor_loss=0.3758 critic_loss=152084627456.0000 entropy=17.7729 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 72740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520286.6 mean_steps=14.7
|
|
[Episode 72750] reward=-118937142.0 actor_loss=0.2289 critic_loss=128839786496.0000 entropy=17.7776 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 72760] reward=-113950137.6 actor_loss=0.3264 critic_loss=129852621892.2667 entropy=17.7807 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 72760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468026.7 mean_steps=14.9
|
|
[Episode 72770] reward=-110093529.4 actor_loss=0.4168 critic_loss=122459505198.5455 entropy=17.7745 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 72780] reward=-115966861.9 actor_loss=0.3351 critic_loss=127760993952.9143 entropy=17.7673 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 72780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464715.5 mean_steps=14.7
|
|
[Episode 72790] reward=-122252881.5 actor_loss=0.3153 critic_loss=139417274014.8965 entropy=17.7675 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 72800] reward=-118877350.5 actor_loss=0.3072 critic_loss=128312715491.5556 entropy=17.7660 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 72800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521596.4 mean_steps=14.2
|
|
[Episode 72810] reward=-218466465.8 actor_loss=0.2339 critic_loss=38523987383637.3359 entropy=17.7537 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 72820] reward=-233793203.8 actor_loss=0.3421 critic_loss=49474350251303.8203 entropy=17.7685 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 72820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-597805.0 mean_steps=13.9
|
|
[Episode 72830] reward=-121360239.4 actor_loss=0.2855 critic_loss=131867388376.6154 entropy=17.7849 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 72840] reward=-116068647.7 actor_loss=0.3316 critic_loss=126102988642.4615 entropy=17.7850 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 72840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-535680.7 mean_steps=12.6
|
|
[Episode 72850] reward=-118980033.5 actor_loss=0.2889 critic_loss=128969812787.2000 entropy=17.7677 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 72860] reward=-112178089.8 actor_loss=0.3232 critic_loss=122584519826.2857 entropy=17.7575 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 72860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550512.2 mean_steps=13.6
|
|
[Episode 72870] reward=-114939620.3 actor_loss=0.3081 critic_loss=123838536775.4419 entropy=17.7469 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 72880] reward=-116865628.9 actor_loss=0.3874 critic_loss=126464316269.7143 entropy=17.7325 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 72880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426660.5 mean_steps=15.6
|
|
[Episode 72890] reward=-121869269.9 actor_loss=0.2208 critic_loss=141135999510.2609 entropy=17.7291 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 72900] reward=-115872720.2 actor_loss=0.3679 critic_loss=125818550954.6667 entropy=17.7251 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 72900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-435977.6 mean_steps=16.6
|
|
[Episode 72910] reward=-117184768.1 actor_loss=0.2409 critic_loss=131938687534.5455 entropy=17.7037 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 72920] reward=-267063864.3 actor_loss=1.6613 critic_loss=82413343323867.4219 entropy=17.6939 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 72920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553081.9 mean_steps=14.6
|
|
[Episode 72930] reward=-113825867.6 actor_loss=0.3883 critic_loss=127617603149.5758 entropy=17.7048 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 72940] reward=-115355853.2 actor_loss=0.4553 critic_loss=124616770383.4483 entropy=17.7173 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 72940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-544965.8 mean_steps=12.3
|
|
[Episode 72950] reward=-119217384.8 actor_loss=0.2630 critic_loss=128262724656.7619 entropy=17.7140 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 72960] reward=-115702303.6 actor_loss=0.4189 critic_loss=125813222058.6667 entropy=17.7287 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 72960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418750.5 mean_steps=15.3
|
|
[Episode 72970] reward=-115894797.9 actor_loss=0.3502 critic_loss=124126300842.6667 entropy=17.7353 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 72980] reward=-117442207.3 actor_loss=0.3766 critic_loss=126848406505.2444 entropy=17.7281 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 72980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505609.3 mean_steps=15.3
|
|
[Episode 72990] reward=-116396896.0 actor_loss=0.3595 critic_loss=123932318378.6667 entropy=17.7367 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 73000] reward=-115166570.8 actor_loss=0.4271 critic_loss=131284474880.0000 entropy=17.7370 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 73000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-638194.4 mean_steps=12.0
|
|
[Episode 73010] reward=-201838339.0 actor_loss=11.2113 critic_loss=25556413928789.3320 entropy=17.7283 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 73020] reward=-114534362.6 actor_loss=0.1850 critic_loss=133985693240.8889 entropy=17.7529 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 73020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508426.7 mean_steps=12.7
|
|
[Episode 73030] reward=-113566865.2 actor_loss=0.2397 critic_loss=126135640670.8148 entropy=17.7489 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 73040] reward=-118901061.6 actor_loss=0.2061 critic_loss=132538910492.4444 entropy=17.7486 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 73040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465654.6 mean_steps=13.9
|
|
[Episode 73050] reward=-122804007.4 actor_loss=0.2301 critic_loss=130666297344.0000 entropy=17.7459 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 73060] reward=-122028015.7 actor_loss=0.2651 critic_loss=134855464960.0000 entropy=17.7315 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 73060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558467.5 mean_steps=13.7
|
|
[Episode 73070] reward=-113386882.3 actor_loss=0.3274 critic_loss=126392084359.5294 entropy=17.7195 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 73080] reward=-116344902.2 actor_loss=0.3172 critic_loss=128167097437.0909 entropy=17.7138 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 73080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584409.4 mean_steps=12.8
|
|
[Episode 73090] reward=-119692484.4 actor_loss=0.2938 critic_loss=131821910698.6667 entropy=17.7122 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 73100] reward=-117917420.8 actor_loss=0.3215 critic_loss=129935361560.3810 entropy=17.7268 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 73100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480968.6 mean_steps=13.9
|
|
[Episode 73110] reward=-115440634.0 actor_loss=0.2416 critic_loss=128275424072.2051 entropy=17.7352 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 73120] reward=-119198441.1 actor_loss=0.2231 critic_loss=130247256250.1818 entropy=17.7242 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 73120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-561618.8 mean_steps=12.8
|
|
[Episode 73130] reward=-120217261.6 actor_loss=0.3016 critic_loss=132483917637.8182 entropy=17.7237 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 73140] reward=-112433980.0 actor_loss=0.3582 critic_loss=120576207712.7111 entropy=17.7338 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 73140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525510.2 mean_steps=14.1
|
|
[Episode 73150] reward=-126394475.4 actor_loss=0.2773 critic_loss=207395251159.0400 entropy=17.7418 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 73160] reward=-360906342.3 actor_loss=0.2130 critic_loss=193902899573555.1875 entropy=17.7466 approx_kl=0.0041 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 73160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526635.9 mean_steps=14.2
|
|
[Episode 73170] reward=-119321716.0 actor_loss=0.2281 critic_loss=125669261019.4286 entropy=17.7465 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 73180] reward=-122360460.1 actor_loss=0.2616 critic_loss=180402550283.3778 entropy=17.7480 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 73180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-446410.6 mean_steps=16.4
|
|
[Episode 73190] reward=-117907418.7 actor_loss=0.3162 critic_loss=135118380032.0000 entropy=17.7603 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 73200] reward=-138995614.4 actor_loss=0.2322 critic_loss=2216936633437.0908 entropy=17.7705 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 73200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463683.8 mean_steps=13.9
|
|
[Episode 73210] reward=-121462973.7 actor_loss=0.3229 critic_loss=194581164851.2000 entropy=17.7777 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 73220] reward=-112261987.8 actor_loss=0.2725 critic_loss=128060293939.2000 entropy=17.7695 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 73220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584666.5 mean_steps=12.9
|
|
[Episode 73230] reward=-113639212.5 actor_loss=0.4945 critic_loss=126496425921.9394 entropy=17.7744 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 73240] reward=-111797170.8 actor_loss=0.3142 critic_loss=124606309565.6296 entropy=17.8034 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 73240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565449.7 mean_steps=13.7
|
|
[Episode 73250] reward=-119262220.5 actor_loss=0.3404 critic_loss=129370599565.2414 entropy=17.8084 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 73260] reward=-113005461.6 actor_loss=0.3498 critic_loss=123938335948.8000 entropy=17.8082 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 73260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497136.4 mean_steps=15.2
|
|
[Episode 73270] reward=-113910550.0 actor_loss=0.3841 critic_loss=125875396923.0769 entropy=17.8062 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 73280] reward=-116061176.2 actor_loss=0.3035 critic_loss=124281567277.5111 entropy=17.8164 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 73280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589930.9 mean_steps=12.8
|
|
[Episode 73290] reward=-116040447.5 actor_loss=0.2909 critic_loss=129448681472.0000 entropy=17.8169 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 73300] reward=-116801265.2 actor_loss=0.4102 critic_loss=133569450985.2444 entropy=17.8248 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 73300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-561282.8 mean_steps=12.2
|
|
[Episode 73310] reward=-115487852.9 actor_loss=0.2524 critic_loss=125794263917.7143 entropy=17.8283 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 73320] reward=-118877034.2 actor_loss=0.1832 critic_loss=125249363626.6667 entropy=17.8249 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 73320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-414571.0 mean_steps=14.8
|
|
[Episode 73330] reward=-119615364.6 actor_loss=0.2819 critic_loss=131300191072.7111 entropy=17.8222 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 73340] reward=-114412943.3 actor_loss=0.2713 critic_loss=124571666944.0000 entropy=17.8134 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 73340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-478254.0 mean_steps=12.8
|
|
[Episode 73350] reward=-118547268.3 actor_loss=0.3479 critic_loss=127187317321.1429 entropy=17.8241 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 73360] reward=-120551930.6 actor_loss=0.3293 critic_loss=135677928360.2286 entropy=17.8098 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 73360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589568.6 mean_steps=12.8
|
|
[Episode 73370] reward=-117936990.4 actor_loss=0.2888 critic_loss=131940936362.6667 entropy=17.8062 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 73380] reward=-113684261.9 actor_loss=0.2692 critic_loss=127515768539.4286 entropy=17.8038 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 73380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591016.4 mean_steps=13.7
|
|
[Episode 73390] reward=-117624595.5 actor_loss=0.4182 critic_loss=134036145766.4000 entropy=17.8034 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 73400] reward=-117297242.2 actor_loss=0.2915 critic_loss=130235497865.8462 entropy=17.7962 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 73400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549180.9 mean_steps=13.6
|
|
[Episode 73410] reward=-109816588.3 actor_loss=0.3327 critic_loss=118357819695.4074 entropy=17.8136 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 73420] reward=-117811471.5 actor_loss=0.2461 critic_loss=125729143724.9730 entropy=17.7971 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 73420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562905.2 mean_steps=12.7
|
|
[Episode 73430] reward=-116134815.5 actor_loss=0.3510 critic_loss=124996015809.4222 entropy=17.7813 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 73440] reward=-116511792.3 actor_loss=0.2464 critic_loss=126440943721.0256 entropy=17.7816 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 73440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478544.8 mean_steps=14.2
|
|
[Episode 73450] reward=-122184959.5 actor_loss=0.2704 critic_loss=136789267420.6897 entropy=17.7766 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 73460] reward=-114768627.5 actor_loss=0.2803 critic_loss=122299128581.6889 entropy=17.7649 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 73460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421309.8 mean_steps=15.8
|
|
[Episode 73470] reward=-119045029.2 actor_loss=0.3417 critic_loss=128929284096.0000 entropy=17.7563 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 73480] reward=-120085953.8 actor_loss=0.2094 critic_loss=130961065229.4737 entropy=17.7641 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 73480] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-742464.7 mean_steps=12.2
|
|
[Episode 73490] reward=-114807931.6 actor_loss=0.3695 critic_loss=127733079781.5172 entropy=17.7510 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 73500] reward=-125586746.4 actor_loss=0.2315 critic_loss=194922155008.0000 entropy=17.7395 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 73500] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-367797.7 mean_steps=17.1
|
|
[Episode 73510] reward=-113735206.7 actor_loss=0.3606 critic_loss=126290444288.0000 entropy=17.7504 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 73520] reward=-119790621.8 actor_loss=0.2843 critic_loss=133138062474.3784 entropy=17.7399 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 73520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530761.1 mean_steps=13.4
|
|
[Episode 73530] reward=-113601905.5 actor_loss=0.2969 critic_loss=128112169123.8400 entropy=17.7327 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 73540] reward=-116773333.6 actor_loss=0.3566 critic_loss=139941818709.3333 entropy=17.7455 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 73540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442058.5 mean_steps=14.8
|
|
[Episode 73550] reward=-115418224.5 actor_loss=0.2981 critic_loss=125826620893.8667 entropy=17.7391 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 73560] reward=-111173364.6 actor_loss=0.2875 critic_loss=118034779340.8000 entropy=17.7459 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 73560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399963.6 mean_steps=16.2
|
|
[Episode 73570] reward=-123236870.6 actor_loss=0.2025 critic_loss=134733419081.1429 entropy=17.7509 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 73580] reward=-116046489.5 actor_loss=0.2775 critic_loss=129381360937.2903 entropy=17.7545 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 73580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542910.7 mean_steps=13.1
|
|
[Episode 73590] reward=-116018847.8 actor_loss=0.2367 critic_loss=132988877537.2800 entropy=17.7456 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 73600] reward=-119005850.9 actor_loss=0.3239 critic_loss=132247196547.1219 entropy=17.7459 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 73600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-369015.4 mean_steps=16.9
|
|
[Episode 73610] reward=-115878169.7 actor_loss=0.2401 critic_loss=128801794906.8387 entropy=17.7477 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 73620] reward=-113587514.7 actor_loss=0.2835 critic_loss=122038241659.2593 entropy=17.7461 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 73620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460880.2 mean_steps=14.2
|
|
[Episode 73630] reward=-118593254.0 actor_loss=0.2821 critic_loss=168463303724.5217 entropy=17.7487 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 73640] reward=-110964915.1 actor_loss=0.3776 critic_loss=121280995328.0000 entropy=17.7486 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 73640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586221.3 mean_steps=12.8
|
|
[Episode 73650] reward=-115445384.5 actor_loss=0.1912 critic_loss=122751370854.4000 entropy=17.7574 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 73660] reward=-110956759.7 actor_loss=0.2427 critic_loss=118222882523.4286 entropy=17.7588 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 73660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503137.4 mean_steps=13.3
|
|
[Episode 73670] reward=-118208174.1 actor_loss=0.3323 critic_loss=127987104929.6842 entropy=17.7630 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 73680] reward=-117707584.4 actor_loss=0.3568 critic_loss=128954339164.1600 entropy=17.7504 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 73680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426197.3 mean_steps=15.5
|
|
[Episode 73690] reward=-119731856.6 actor_loss=0.3270 critic_loss=189667553676.3871 entropy=17.7468 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 73700] reward=-112918169.6 actor_loss=0.3719 critic_loss=128769849697.1035 entropy=17.7337 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 73700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556733.3 mean_steps=13.4
|
|
[Episode 73710] reward=-116610206.8 actor_loss=0.3130 critic_loss=127057289871.3600 entropy=17.7467 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 73720] reward=-121238163.5 actor_loss=0.2747 critic_loss=129145941837.9130 entropy=17.7410 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 73720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603861.1 mean_steps=12.8
|
|
[Episode 73730] reward=-117623060.9 actor_loss=0.2878 critic_loss=132419715072.0000 entropy=17.7390 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 73740] reward=-117274040.0 actor_loss=0.4514 critic_loss=144928450373.8182 entropy=17.7305 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 73740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-610704.6 mean_steps=14.7
|
|
[Episode 73750] reward=-112148767.8 actor_loss=0.2249 critic_loss=131571396608.0000 entropy=17.7285 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 73760] reward=-121037354.1 actor_loss=0.2756 critic_loss=135026391267.5556 entropy=17.7203 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 73760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-448094.6 mean_steps=13.7
|
|
[Episode 73770] reward=-118938287.1 actor_loss=0.2583 critic_loss=134105857691.8261 entropy=17.7274 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 73780] reward=-119607383.2 actor_loss=0.2595 critic_loss=128887948008.7273 entropy=17.7379 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 73780] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-310627.7 mean_steps=16.8
|
|
[Episode 73790] reward=-117712356.3 actor_loss=0.3520 critic_loss=125545726696.7273 entropy=17.7220 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 73800] reward=-116218611.8 actor_loss=0.3391 critic_loss=128805926619.4286 entropy=17.7252 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 73800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493468.5 mean_steps=14.2
|
|
[Episode 73810] reward=-115544619.4 actor_loss=0.3172 critic_loss=127392922903.2727 entropy=17.7417 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 73820] reward=-115967406.7 actor_loss=0.3769 critic_loss=125820762269.5385 entropy=17.7313 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 73820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472638.8 mean_steps=13.8
|
|
[Episode 73830] reward=-118128167.3 actor_loss=0.3250 critic_loss=129847872061.4400 entropy=17.7375 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 73840] reward=-109187160.8 actor_loss=0.3298 critic_loss=124746883306.0571 entropy=17.7243 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 73840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-547585.3 mean_steps=12.7
|
|
[Episode 73850] reward=-114785657.5 actor_loss=0.2436 critic_loss=135078103722.6667 entropy=17.7249 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 73860] reward=-117887952.5 actor_loss=0.3079 critic_loss=124172301032.7273 entropy=17.7241 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 73860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532765.3 mean_steps=14.1
|
|
[Episode 73870] reward=-118299211.3 actor_loss=0.5047 critic_loss=220416103765.3333 entropy=17.7394 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 73880] reward=-119901296.7 actor_loss=0.3465 critic_loss=199825877486.3448 entropy=17.7488 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 73880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-567654.9 mean_steps=14.4
|
|
[Episode 73890] reward=-118554550.1 actor_loss=0.2078 critic_loss=129109024137.8462 entropy=17.7509 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 73900] reward=-137478884.3 actor_loss=0.3723 critic_loss=1678353891328.0000 entropy=17.7304 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 73900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443446.0 mean_steps=15.4
|
|
[Episode 73910] reward=-116413457.6 actor_loss=0.2205 critic_loss=123352930154.1463 entropy=17.7352 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 73920] reward=-114022349.7 actor_loss=0.2574 critic_loss=130179681523.8095 entropy=17.7332 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 73920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513765.9 mean_steps=13.1
|
|
[Episode 73930] reward=-120539140.0 actor_loss=0.2367 critic_loss=132088377148.9524 entropy=17.7206 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 73940] reward=-118676655.1 actor_loss=0.3391 critic_loss=134426814756.5714 entropy=17.7269 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 73940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-551155.3 mean_steps=14.2
|
|
[Episode 73950] reward=-118531706.8 actor_loss=0.1680 critic_loss=132693329812.2105 entropy=17.7117 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 73960] reward=-115252203.2 actor_loss=0.2877 critic_loss=126581303978.6667 entropy=17.7203 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 73960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-337704.0 mean_steps=15.8
|
|
[Episode 73970] reward=-119875485.8 actor_loss=0.2934 critic_loss=132606752194.5600 entropy=17.7343 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 73980] reward=-117652816.6 actor_loss=0.2932 critic_loss=128002435754.6667 entropy=17.7337 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 73980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527259.2 mean_steps=13.2
|
|
[Episode 73990] reward=-123019872.0 actor_loss=0.2864 critic_loss=192789127168.0000 entropy=17.7329 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 74000] reward=-115482577.7 actor_loss=0.2318 critic_loss=129402925924.8485 entropy=17.7274 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 74000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-628018.6 mean_steps=13.8
|
|
[Episode 74010] reward=-116989345.2 actor_loss=0.3049 critic_loss=144796609280.0000 entropy=17.7192 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 74020] reward=-117902114.3 actor_loss=0.2445 critic_loss=123301955513.3793 entropy=17.7168 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 74020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468292.7 mean_steps=13.4
|
|
[Episode 74030] reward=-117596583.4 actor_loss=0.3016 critic_loss=127415491624.9600 entropy=17.7185 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 74040] reward=-117629652.9 actor_loss=0.4274 critic_loss=126362221879.6522 entropy=17.7190 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 74040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-610054.2 mean_steps=11.8
|
|
[Episode 74050] reward=-129116487.4 actor_loss=0.2767 critic_loss=1586502696960.0000 entropy=17.7074 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 74060] reward=-111630863.5 actor_loss=0.3799 critic_loss=117999864019.8621 entropy=17.6940 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 74060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-560440.1 mean_steps=14.3
|
|
[Episode 74070] reward=-114611527.5 actor_loss=0.4349 critic_loss=123865618990.5455 entropy=17.6859 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 74080] reward=-116332396.9 actor_loss=0.3206 critic_loss=126258953867.6364 entropy=17.6729 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 74080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513910.4 mean_steps=14.0
|
|
[Episode 74090] reward=-121355401.3 actor_loss=0.2443 critic_loss=130549633730.2069 entropy=17.6764 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 74100] reward=-118789705.2 actor_loss=0.2514 critic_loss=128715839715.5556 entropy=17.6773 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 74100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494245.5 mean_steps=15.0
|
|
[Episode 74110] reward=-116806676.8 actor_loss=0.3054 critic_loss=126510223360.0000 entropy=17.6849 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 74120] reward=-113843052.9 actor_loss=0.3150 critic_loss=126569320721.0667 entropy=17.6823 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 74120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-580214.6 mean_steps=12.2
|
|
[Episode 74130] reward=-118592588.3 actor_loss=0.2720 critic_loss=129505256204.1905 entropy=17.6938 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 74140] reward=-120908058.0 actor_loss=0.2720 critic_loss=140222740847.5898 entropy=17.6946 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 74140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511288.4 mean_steps=13.4
|
|
[Episode 74150] reward=-115089747.7 actor_loss=0.2963 critic_loss=122362755481.6000 entropy=17.6643 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 74160] reward=-110323395.6 actor_loss=0.3312 critic_loss=115744095709.8667 entropy=17.6610 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 74160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588472.9 mean_steps=13.8
|
|
[Episode 74170] reward=-120236329.5 actor_loss=0.2836 critic_loss=129733196848.7619 entropy=17.6568 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 74180] reward=-119891489.3 actor_loss=0.2685 critic_loss=127505463887.6444 entropy=17.6474 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 74180] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-638182.8 mean_steps=10.4
|
|
[Episode 74190] reward=-117518922.7 actor_loss=0.3377 critic_loss=126952090219.1628 entropy=17.6603 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 74200] reward=-120261700.5 actor_loss=0.2437 critic_loss=129966661822.5116 entropy=17.6620 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 74200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509179.2 mean_steps=14.2
|
|
[Episode 74210] reward=-122070654.6 actor_loss=0.2098 critic_loss=131606067987.6923 entropy=17.6630 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 74220] reward=-118785496.1 actor_loss=0.3300 critic_loss=127909275966.5778 entropy=17.6621 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 74220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460343.1 mean_steps=13.8
|
|
[Episode 74230] reward=-115630466.3 actor_loss=0.2307 critic_loss=126062311651.5556 entropy=17.6625 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 74240] reward=-119416299.1 actor_loss=0.2481 critic_loss=126282932946.8235 entropy=17.6598 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 74240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-596929.1 mean_steps=12.0
|
|
[Episode 74250] reward=-116444694.3 actor_loss=0.2581 critic_loss=126736753950.7200 entropy=17.6709 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 74260] reward=-122573393.2 actor_loss=0.2998 critic_loss=132385611111.7838 entropy=17.6623 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 74260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486803.2 mean_steps=13.9
|
|
[Episode 74270] reward=-114880614.5 actor_loss=0.2346 critic_loss=121183805713.0667 entropy=17.6558 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 74280] reward=-118903789.9 actor_loss=0.3086 critic_loss=133685381461.3333 entropy=17.6360 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 74280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517862.2 mean_steps=13.9
|
|
[Episode 74290] reward=-121964606.7 actor_loss=0.2182 critic_loss=133264232572.1212 entropy=17.6274 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 74300] reward=-123948250.9 actor_loss=0.2022 critic_loss=133847362218.6667 entropy=17.6286 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 74300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500080.3 mean_steps=15.1
|
|
[Episode 74310] reward=-116111628.5 actor_loss=0.3349 critic_loss=127770545980.9524 entropy=17.6336 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 74320] reward=-115824998.2 actor_loss=0.3949 critic_loss=123001077174.8571 entropy=17.6266 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 74320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530134.2 mean_steps=14.2
|
|
[Episode 74330] reward=-114415369.6 actor_loss=0.2739 critic_loss=125741396898.9091 entropy=17.6379 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 74340] reward=-118327899.1 actor_loss=0.2639 critic_loss=127373464980.8372 entropy=17.6414 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 74340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-383867.6 mean_steps=16.9
|
|
[Episode 74350] reward=-112466621.6 actor_loss=0.4152 critic_loss=118227550936.1778 entropy=17.6105 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 74360] reward=-115204825.7 actor_loss=0.3284 critic_loss=126188957563.8710 entropy=17.5987 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 74360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480375.6 mean_steps=14.1
|
|
[Episode 74370] reward=-116312977.6 actor_loss=0.3799 critic_loss=123390985502.7200 entropy=17.5938 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 74380] reward=-116744084.7 actor_loss=0.2734 critic_loss=125089305941.3333 entropy=17.5941 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 74380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472821.4 mean_steps=14.0
|
|
[Episode 74390] reward=-115260417.8 actor_loss=0.2408 critic_loss=120886658599.3846 entropy=17.5888 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 74400] reward=-111588833.1 actor_loss=0.3409 critic_loss=117502668700.0976 entropy=17.5776 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 74400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-681927.9 mean_steps=12.2
|
|
[Episode 74410] reward=-119147898.5 actor_loss=0.2835 critic_loss=130034630109.8667 entropy=17.5635 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 74420] reward=-121343377.7 actor_loss=0.2725 critic_loss=129478207715.5556 entropy=17.5471 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 74420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-516563.0 mean_steps=12.2
|
|
[Episode 74430] reward=-116574100.5 actor_loss=0.3519 critic_loss=126052736311.6522 entropy=17.5469 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 74440] reward=-116483022.4 actor_loss=0.3449 critic_loss=124793147977.1429 entropy=17.5484 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 74440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498000.2 mean_steps=15.2
|
|
[Episode 74450] reward=-119037215.0 actor_loss=0.3252 critic_loss=131760296345.6000 entropy=17.5509 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 74460] reward=-114557820.0 actor_loss=0.3158 critic_loss=124379724653.7143 entropy=17.5582 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 74460] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-372463.6 mean_steps=17.0
|
|
[Episode 74470] reward=-117206250.6 actor_loss=0.3008 critic_loss=124166583296.0000 entropy=17.5597 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 74480] reward=-112069455.2 actor_loss=0.2993 critic_loss=116280336839.1111 entropy=17.5682 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 74480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581943.0 mean_steps=12.8
|
|
[Episode 74490] reward=-119642907.4 actor_loss=0.3140 critic_loss=129781802520.3810 entropy=17.5725 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 74500] reward=-115992921.2 actor_loss=0.2179 critic_loss=123862639023.1579 entropy=17.5646 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 74500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583733.4 mean_steps=13.8
|
|
[Episode 74510] reward=-111286443.5 actor_loss=0.3407 critic_loss=117593824964.9231 entropy=17.5728 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 74520] reward=-118068247.0 actor_loss=0.2743 critic_loss=132505597706.2400 entropy=17.5720 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 74520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497292.2 mean_steps=13.9
|
|
[Episode 74530] reward=-115420939.4 actor_loss=0.2810 critic_loss=126404746300.2353 entropy=17.5599 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 74540] reward=-115630911.0 actor_loss=0.3249 critic_loss=127407675578.1818 entropy=17.5644 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 74540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541709.6 mean_steps=14.0
|
|
[Episode 74550] reward=-116437168.6 actor_loss=0.2983 critic_loss=127100597511.3143 entropy=17.5662 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 74560] reward=-114936761.9 actor_loss=0.4266 critic_loss=124172623216.6400 entropy=17.5543 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 74560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566599.1 mean_steps=13.3
|
|
[Episode 74570] reward=-121618972.7 actor_loss=0.2825 critic_loss=127287727655.3846 entropy=17.5592 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 74580] reward=-117639323.2 actor_loss=0.2458 critic_loss=131190199761.4545 entropy=17.5567 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 74580] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-652945.3 mean_steps=11.1
|
|
[Episode 74590] reward=-120181369.6 actor_loss=0.3667 critic_loss=130476127436.8000 entropy=17.5626 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 74600] reward=-113052322.3 actor_loss=0.2325 critic_loss=115575307878.4000 entropy=17.5720 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 74600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-615106.4 mean_steps=13.8
|
|
[Episode 74610] reward=-114361439.0 actor_loss=0.2777 critic_loss=121067076539.7333 entropy=17.5683 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 74620] reward=-117626452.1 actor_loss=0.2244 critic_loss=127405735025.7778 entropy=17.5671 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 74620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469394.7 mean_steps=13.9
|
|
[Episode 74630] reward=-123659696.0 actor_loss=0.2626 critic_loss=134370973588.2105 entropy=17.5613 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 74640] reward=-119344209.3 actor_loss=0.3902 critic_loss=129444274532.1739 entropy=17.5476 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 74640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416829.3 mean_steps=15.5
|
|
[Episode 74650] reward=-121529334.1 actor_loss=0.3924 critic_loss=128331250919.2258 entropy=17.5296 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 74660] reward=-116655075.2 actor_loss=0.3165 critic_loss=123152300441.6000 entropy=17.5224 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 74660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-540568.8 mean_steps=12.6
|
|
[Episode 74670] reward=-118991726.6 actor_loss=0.2588 critic_loss=129728272481.5238 entropy=17.5187 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 74680] reward=-112628891.1 actor_loss=0.3046 critic_loss=119483779610.9474 entropy=17.5252 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 74680] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-678900.1 mean_steps=11.1
|
|
[Episode 74690] reward=-118548892.6 actor_loss=0.2395 critic_loss=125080429429.6216 entropy=17.5300 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 74700] reward=-113362401.5 actor_loss=0.2704 critic_loss=116713693184.0000 entropy=17.5217 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 74700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520588.7 mean_steps=14.3
|
|
[Episode 74710] reward=-112358627.2 actor_loss=0.2657 critic_loss=121232988795.5862 entropy=17.5210 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 74720] reward=-116697690.9 actor_loss=0.1996 critic_loss=126077296955.0769 entropy=17.5457 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 74720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515202.0 mean_steps=13.8
|
|
[Episode 74730] reward=-120777690.5 actor_loss=0.2463 critic_loss=126370987535.5152 entropy=17.5457 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 74740] reward=-117091261.3 actor_loss=0.3838 critic_loss=126749878458.1818 entropy=17.5524 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 74740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581251.6 mean_steps=13.3
|
|
[Episode 74750] reward=-111649373.2 actor_loss=0.3653 critic_loss=121355667642.1818 entropy=17.5472 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 74760] reward=-112667819.7 actor_loss=0.3694 critic_loss=120113208173.7143 entropy=17.5588 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 74760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577202.0 mean_steps=13.8
|
|
[Episode 74770] reward=-118692119.2 actor_loss=0.3241 critic_loss=129991450927.4074 entropy=17.5547 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 74780] reward=-115671132.4 actor_loss=0.2765 critic_loss=122433884928.0000 entropy=17.5509 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 74780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451175.0 mean_steps=14.6
|
|
[Episode 74790] reward=-115688974.1 actor_loss=0.2838 critic_loss=131028205568.0000 entropy=17.5550 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 74800] reward=-116582941.1 actor_loss=0.3313 critic_loss=123676520306.7586 entropy=17.5674 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 74800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497260.9 mean_steps=13.9
|
|
[Episode 74810] reward=-117867778.3 actor_loss=0.3039 critic_loss=123179886230.5882 entropy=17.5716 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 74820] reward=-116938051.5 actor_loss=0.2938 critic_loss=126954293493.7600 entropy=17.5599 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 74820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504140.5 mean_steps=13.6
|
|
[Episode 74830] reward=-115108531.0 actor_loss=0.2498 critic_loss=121765574183.3846 entropy=17.5642 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 74840] reward=-113551119.1 actor_loss=0.3860 critic_loss=120432162454.5882 entropy=17.5773 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 74840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-366922.0 mean_steps=14.0
|
|
[Episode 74850] reward=-121791948.9 actor_loss=0.2354 critic_loss=187410953137.2308 entropy=17.5703 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 74860] reward=-120004167.0 actor_loss=0.3545 critic_loss=127943116390.4000 entropy=17.5530 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 74860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445462.3 mean_steps=15.8
|
|
[Episode 74870] reward=-118353470.0 actor_loss=0.3233 critic_loss=138382812540.3429 entropy=17.5589 approx_kl=0.0121 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 74880] reward=-121135232.2 actor_loss=0.2775 critic_loss=149132259028.2927 entropy=17.5631 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 74880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-650623.5 mean_steps=12.2
|
|
[Episode 74890] reward=-118769004.0 actor_loss=0.3144 critic_loss=121431401904.3556 entropy=17.5506 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 74900] reward=-119900507.6 actor_loss=0.2233 critic_loss=130919342792.3478 entropy=17.5503 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 74900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510798.3 mean_steps=13.8
|
|
[Episode 74910] reward=-118027588.9 actor_loss=0.3416 critic_loss=128031002624.0000 entropy=17.5534 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 74920] reward=-116415723.7 actor_loss=0.2576 critic_loss=127024538282.6667 entropy=17.5429 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 74920] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-318495.8 mean_steps=17.1
|
|
[Episode 74930] reward=-113443836.1 actor_loss=0.4132 critic_loss=119084177635.5556 entropy=17.5463 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 74940] reward=-121824537.6 actor_loss=0.3149 critic_loss=127635817708.3077 entropy=17.5327 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 74940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390144.5 mean_steps=15.8
|
|
[Episode 74950] reward=-110536535.6 actor_loss=0.3195 critic_loss=115945345024.0000 entropy=17.5350 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 74960] reward=-111377865.4 actor_loss=0.3842 critic_loss=119921507805.8667 entropy=17.5436 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 74960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502146.4 mean_steps=14.6
|
|
[Episode 74970] reward=-120250838.7 actor_loss=0.3888 critic_loss=126846886278.0952 entropy=17.5518 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 74980] reward=-117913225.2 actor_loss=0.2443 critic_loss=122118501990.4000 entropy=17.5539 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 74980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-631983.2 mean_steps=12.8
|
|
[Episode 74990] reward=-116528053.6 actor_loss=0.2574 critic_loss=122416699068.6316 entropy=17.5476 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 75000] reward=-112417557.3 actor_loss=0.3907 critic_loss=117539126343.4419 entropy=17.5348 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 75000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530744.0 mean_steps=13.9
|
|
[Episode 75010] reward=-115415897.7 actor_loss=0.2682 critic_loss=118437862942.1176 entropy=17.5324 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 75020] reward=-118165764.8 actor_loss=0.3507 critic_loss=126424891977.1429 entropy=17.5411 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 75020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-400280.5 mean_steps=16.8
|
|
[Episode 75030] reward=-118125274.2 actor_loss=0.3248 critic_loss=125694415468.6061 entropy=17.5401 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 75040] reward=-130340603.4 actor_loss=0.3390 critic_loss=1184694388177.4546 entropy=17.5467 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 75040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565952.4 mean_steps=13.6
|
|
[Episode 75050] reward=-114293738.6 actor_loss=0.2748 critic_loss=126262845440.0000 entropy=17.5419 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 75060] reward=-110756708.7 actor_loss=0.3321 critic_loss=120459816448.0000 entropy=17.5469 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 75060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547127.3 mean_steps=14.3
|
|
[Episode 75070] reward=-121798883.9 actor_loss=0.3232 critic_loss=157872539294.8965 entropy=17.5497 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 75080] reward=-119866113.9 actor_loss=0.2514 critic_loss=123839531885.7143 entropy=17.5445 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 75080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-639541.9 mean_steps=12.2
|
|
[Episode 75090] reward=-119232165.4 actor_loss=0.1976 critic_loss=127015880793.0435 entropy=17.5509 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 75100] reward=-111889699.5 actor_loss=0.2817 critic_loss=117106954076.1600 entropy=17.5522 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 75100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548925.4 mean_steps=12.6
|
|
[Episode 75110] reward=-109423886.6 actor_loss=0.3582 critic_loss=114550293224.7273 entropy=17.5474 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 75120] reward=-116595779.9 actor_loss=0.2504 critic_loss=124121489125.5172 entropy=17.5431 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 75120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521370.3 mean_steps=13.2
|
|
[Episode 75130] reward=-115548665.3 actor_loss=0.3697 critic_loss=119732806494.3158 entropy=17.5546 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 75140] reward=-113614181.5 actor_loss=0.2126 critic_loss=124317177901.5111 entropy=17.5696 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 75140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642670.9 mean_steps=13.2
|
|
[Episode 75150] reward=-116843104.3 actor_loss=0.2499 critic_loss=120474359234.5600 entropy=17.5714 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 75160] reward=-118297474.2 actor_loss=0.2975 critic_loss=127118206855.5294 entropy=17.5534 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 75160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-478713.4 mean_steps=13.0
|
|
[Episode 75170] reward=-117205741.8 actor_loss=0.3189 critic_loss=121683036346.1818 entropy=17.5604 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 75180] reward=-114630987.6 actor_loss=0.4147 critic_loss=257141389312.0000 entropy=17.5591 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 75180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481806.5 mean_steps=13.7
|
|
[Episode 75190] reward=-118633300.3 actor_loss=0.2251 critic_loss=123537299654.1935 entropy=17.5577 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 75200] reward=-117480115.8 actor_loss=0.3523 critic_loss=124643285869.7143 entropy=17.5599 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 75200] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-299088.0 mean_steps=17.1
|
|
[Episode 75210] reward=-112608709.1 actor_loss=0.3730 critic_loss=118390303232.0000 entropy=17.5575 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 75220] reward=-116410668.3 actor_loss=0.2352 critic_loss=123557280231.6190 entropy=17.5516 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 75220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-380968.2 mean_steps=15.8
|
|
[Episode 75230] reward=-110960921.9 actor_loss=0.3270 critic_loss=114971687936.0000 entropy=17.5553 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 75240] reward=-114249633.8 actor_loss=0.2560 critic_loss=119452501333.3333 entropy=17.5608 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 75240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489817.4 mean_steps=14.9
|
|
[Episode 75250] reward=-119762038.6 actor_loss=0.3560 critic_loss=126680180459.2432 entropy=17.5676 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 75260] reward=-118464796.0 actor_loss=0.3035 critic_loss=124427117601.0323 entropy=17.5682 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 75260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546748.9 mean_steps=14.2
|
|
[Episode 75270] reward=-112965270.6 actor_loss=0.3442 critic_loss=121260467159.0400 entropy=17.5649 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 75280] reward=-115640199.4 actor_loss=0.3173 critic_loss=124821106866.0870 entropy=17.5584 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 75280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537209.8 mean_steps=14.0
|
|
[Episode 75290] reward=-114282151.5 actor_loss=0.3041 critic_loss=123560749242.1818 entropy=17.5643 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 75300] reward=-113551790.4 actor_loss=0.3285 critic_loss=120610855830.0690 entropy=17.5728 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 75300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-531795.9 mean_steps=14.6
|
|
[Episode 75310] reward=-108235506.7 actor_loss=0.3075 critic_loss=121534118198.3030 entropy=17.5658 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 75320] reward=-120402518.8 actor_loss=0.3549 critic_loss=132857630720.0000 entropy=17.5673 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 75320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538245.8 mean_steps=14.1
|
|
[Episode 75330] reward=-112184171.1 actor_loss=0.3109 critic_loss=122703523095.2727 entropy=17.5823 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 75340] reward=-122348183.5 actor_loss=0.2992 critic_loss=136385215419.7333 entropy=17.5899 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 75340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511982.8 mean_steps=15.1
|
|
[Episode 75350] reward=-119243020.4 actor_loss=0.3148 critic_loss=124111850934.8571 entropy=17.5940 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 75360] reward=-115821319.7 actor_loss=0.3413 critic_loss=120147178447.2381 entropy=17.5922 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 75360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500509.8 mean_steps=14.1
|
|
[Episode 75370] reward=-117181780.9 actor_loss=0.2322 critic_loss=123999989564.9524 entropy=17.5889 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 75380] reward=-112234999.5 actor_loss=0.3325 critic_loss=117178098868.7059 entropy=17.5955 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 75380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504497.1 mean_steps=14.1
|
|
[Episode 75390] reward=-119148015.5 actor_loss=0.3206 critic_loss=124611319870.0606 entropy=17.5819 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 75400] reward=-115946908.8 actor_loss=0.3130 critic_loss=121358414825.2444 entropy=17.5806 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 75400] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-682044.5 mean_steps=11.2
|
|
[Episode 75410] reward=-114897519.5 actor_loss=0.3105 critic_loss=120652115727.0588 entropy=17.5884 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 75420] reward=-119224058.5 actor_loss=0.2270 critic_loss=128092818454.7556 entropy=17.6051 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 75420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588798.4 mean_steps=13.9
|
|
[Episode 75430] reward=-110808993.9 actor_loss=0.2801 critic_loss=116531020322.1333 entropy=17.6057 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 75440] reward=-117562246.9 actor_loss=0.2483 critic_loss=122126820740.4138 entropy=17.5945 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 75440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532354.1 mean_steps=13.9
|
|
[Episode 75450] reward=-121067554.0 actor_loss=0.2113 critic_loss=131711565824.0000 entropy=17.6047 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 75460] reward=-120794258.3 actor_loss=0.2580 critic_loss=126470591146.6667 entropy=17.5985 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 75460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474837.5 mean_steps=14.8
|
|
[Episode 75470] reward=-113575596.0 actor_loss=0.2853 critic_loss=124927865978.8800 entropy=17.5962 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 75480] reward=-116391682.7 actor_loss=0.2796 critic_loss=122915193544.3478 entropy=17.5861 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 75480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-376029.1 mean_steps=15.1
|
|
[Episode 75490] reward=-116076779.0 actor_loss=0.3559 critic_loss=119565330773.3333 entropy=17.5889 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 75500] reward=-110497924.2 actor_loss=0.3363 critic_loss=117899148243.4783 entropy=17.5847 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 75500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582271.2 mean_steps=12.6
|
|
[Episode 75510] reward=-120683928.0 actor_loss=0.2377 critic_loss=129802411758.9333 entropy=17.5750 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 75520] reward=-116785176.6 actor_loss=0.2798 critic_loss=126696027136.0000 entropy=17.5765 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 75520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-504246.1 mean_steps=12.8
|
|
[Episode 75530] reward=-119716480.3 actor_loss=0.2169 critic_loss=129911456812.5217 entropy=17.5766 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 75540] reward=-116831578.6 actor_loss=0.2671 critic_loss=125750805299.2000 entropy=17.5726 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 75540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490190.0 mean_steps=14.8
|
|
[Episode 75550] reward=-115984344.3 actor_loss=0.2154 critic_loss=118677145479.5294 entropy=17.5868 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 75560] reward=-115161002.1 actor_loss=0.4310 critic_loss=117585046641.7778 entropy=17.5806 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 75560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457849.5 mean_steps=15.3
|
|
[Episode 75570] reward=-117139950.1 actor_loss=0.3231 critic_loss=123452215842.1333 entropy=17.5903 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 75580] reward=-115167406.2 actor_loss=0.2918 critic_loss=119302692119.2727 entropy=17.5929 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 75580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568601.4 mean_steps=13.2
|
|
[Episode 75590] reward=-122410025.8 actor_loss=0.2477 critic_loss=131445536849.9200 entropy=17.5966 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 75600] reward=-111743806.9 actor_loss=0.3703 critic_loss=127202323026.5806 entropy=17.5909 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 75600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-345895.1 mean_steps=16.6
|
|
[Episode 75610] reward=-115527174.7 actor_loss=0.3982 critic_loss=125610236859.7333 entropy=17.5942 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 75620] reward=-106318989.5 actor_loss=0.3996 critic_loss=115281223364.9231 entropy=17.5915 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 75620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-494925.3 mean_steps=12.8
|
|
[Episode 75630] reward=-116559815.5 actor_loss=0.2578 critic_loss=119979492120.7742 entropy=17.5819 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 75640] reward=-117802522.2 actor_loss=0.3058 critic_loss=122198013765.8182 entropy=17.5969 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 75640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538530.3 mean_steps=13.2
|
|
[Episode 75650] reward=-113255682.3 actor_loss=0.3430 critic_loss=121704875911.5294 entropy=17.5896 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 75660] reward=-114924725.5 actor_loss=0.2617 critic_loss=123186703112.8276 entropy=17.5819 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 75660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503417.1 mean_steps=14.7
|
|
[Episode 75670] reward=-115576681.4 actor_loss=0.3253 critic_loss=122922386864.3556 entropy=17.5810 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 75680] reward=-117595828.0 actor_loss=0.2826 critic_loss=130155222716.6316 entropy=17.5947 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 75680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599502.4 mean_steps=12.7
|
|
[Episode 75690] reward=-122318704.3 actor_loss=0.2979 critic_loss=138411412579.0968 entropy=17.5997 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 75700] reward=-113292561.5 actor_loss=0.3855 critic_loss=117033160612.9778 entropy=17.6026 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 75700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557058.1 mean_steps=13.4
|
|
[Episode 75710] reward=-120137742.5 actor_loss=0.2099 critic_loss=128122254540.8000 entropy=17.5965 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 75720] reward=-113850099.5 actor_loss=0.3253 critic_loss=122789082550.8571 entropy=17.5877 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 75720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-484461.5 mean_steps=15.7
|
|
[Episode 75730] reward=-111505199.5 actor_loss=0.3289 critic_loss=120450595498.6667 entropy=17.5942 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 75740] reward=-114322399.4 actor_loss=0.2803 critic_loss=146925644068.5714 entropy=17.6027 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 75740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529548.4 mean_steps=14.2
|
|
[Episode 75750] reward=-116174951.3 actor_loss=0.3289 critic_loss=123487137300.4800 entropy=17.5924 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 75760] reward=-114440293.7 actor_loss=0.2554 critic_loss=153163067632.9412 entropy=17.5921 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 75760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612467.5 mean_steps=12.9
|
|
[Episode 75770] reward=-122738805.5 actor_loss=0.2013 critic_loss=152385514442.1053 entropy=17.5926 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 75780] reward=-116967531.8 actor_loss=0.1998 critic_loss=140232257809.0667 entropy=17.5939 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 75780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616442.9 mean_steps=12.7
|
|
[Episode 75790] reward=-117235232.8 actor_loss=0.2932 critic_loss=122481795462.0952 entropy=17.5914 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 75800] reward=-115199857.4 actor_loss=0.3465 critic_loss=126047037547.7895 entropy=17.5930 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 75800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-528959.1 mean_steps=15.2
|
|
[Episode 75810] reward=-4428770264.3 actor_loss=0.3640 critic_loss=39200578827452416.0000 entropy=17.6151 approx_kl=0.0248 kl_stop=1 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 75820] reward=-117290199.4 actor_loss=0.3236 critic_loss=126283681792.0000 entropy=17.6164 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 75820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623126.5 mean_steps=12.9
|
|
[Episode 75830] reward=-111033854.4 actor_loss=0.3459 critic_loss=120285715983.5152 entropy=17.6227 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 75840] reward=-114203928.5 actor_loss=0.3759 critic_loss=125875256979.9111 entropy=17.6148 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 75840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-654226.1 mean_steps=13.4
|
|
[Episode 75850] reward=-116018624.9 actor_loss=0.3328 critic_loss=121471844898.1333 entropy=17.5961 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 75860] reward=-115852107.3 actor_loss=0.3049 critic_loss=124307276498.8235 entropy=17.5932 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 75860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-622874.8 mean_steps=14.1
|
|
[Episode 75870] reward=-111890979.4 actor_loss=0.2876 critic_loss=118842530929.7778 entropy=17.5823 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 75880] reward=-115961825.0 actor_loss=0.4082 critic_loss=186111255074.1333 entropy=17.5845 approx_kl=0.0034 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 75880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434440.3 mean_steps=15.3
|
|
[Episode 75890] reward=-115097474.7 actor_loss=0.3898 critic_loss=122298757120.0000 entropy=17.5891 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 75900] reward=-111608744.0 actor_loss=0.4903 critic_loss=119753844508.4444 entropy=17.6053 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1517 front_blocked=0
|
|
[Eval 75900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-582460.0 mean_steps=13.5
|
|
[Episode 75910] reward=-112965877.7 actor_loss=0.3740 critic_loss=123718841829.0526 entropy=17.6144 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 75920] reward=-114326021.8 actor_loss=0.2235 critic_loss=124996464913.0667 entropy=17.6190 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 75920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542786.4 mean_steps=13.0
|
|
[Episode 75930] reward=-107704585.1 actor_loss=0.3382 critic_loss=112995916502.7097 entropy=17.6051 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 75940] reward=-115794049.9 actor_loss=0.3247 critic_loss=160925338282.6667 entropy=17.6044 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 75940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440106.2 mean_steps=15.3
|
|
[Episode 75950] reward=-114955077.6 actor_loss=0.3370 critic_loss=123518705117.8667 entropy=17.6110 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 75960] reward=-119604003.6 actor_loss=0.2802 critic_loss=126071073611.2941 entropy=17.6073 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 75960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558306.3 mean_steps=13.6
|
|
[Episode 75970] reward=-119775429.0 actor_loss=0.2535 critic_loss=243555467264.0000 entropy=17.6219 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 75980] reward=-122935619.3 actor_loss=0.2506 critic_loss=135996940288.0000 entropy=17.6134 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 75980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-618617.6 mean_steps=14.0
|
|
[Episode 75990] reward=-117952128.9 actor_loss=0.3584 critic_loss=120485909663.2889 entropy=17.6030 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 76000] reward=-121048587.3 actor_loss=0.2503 critic_loss=131885709721.6000 entropy=17.5917 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 76000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441331.0 mean_steps=15.2
|
|
[Episode 76010] reward=-119512081.0 actor_loss=0.3101 critic_loss=129220799146.6667 entropy=17.5936 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 76020] reward=-117231313.5 actor_loss=0.2295 critic_loss=123144431206.4000 entropy=17.6213 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 76020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603116.5 mean_steps=11.9
|
|
[Episode 76030] reward=-116618347.6 actor_loss=0.3184 critic_loss=123816876667.5862 entropy=17.6302 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 76040] reward=-113048279.6 actor_loss=0.2704 critic_loss=121498848883.6129 entropy=17.6280 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 76040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477317.9 mean_steps=14.9
|
|
[Episode 76050] reward=-113159789.8 actor_loss=0.3218 critic_loss=156889038336.0000 entropy=17.6362 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 76060] reward=-120882708.8 actor_loss=0.3699 critic_loss=133791394474.6667 entropy=17.6586 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 76060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548064.7 mean_steps=13.1
|
|
[Episode 76070] reward=-115992676.0 actor_loss=0.3906 critic_loss=124408439148.0889 entropy=17.6656 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 76080] reward=-125076638.5 actor_loss=0.2980 critic_loss=602777758378.6666 entropy=17.6352 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 76080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481735.5 mean_steps=13.9
|
|
[Episode 76090] reward=-117020153.5 actor_loss=0.2801 critic_loss=125660580886.7556 entropy=17.6455 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 76100] reward=-121943157.2 actor_loss=0.1940 critic_loss=130041482444.8000 entropy=17.6418 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 76100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500685.4 mean_steps=13.9
|
|
[Episode 76110] reward=-129782351.0 actor_loss=0.3222 critic_loss=1330090483922.0513 entropy=17.6430 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 76120] reward=-119003741.6 actor_loss=0.2474 critic_loss=133415959639.7714 entropy=17.6512 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 76120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-330089.4 mean_steps=15.3
|
|
[Episode 76130] reward=-123700767.4 actor_loss=0.3053 critic_loss=230952088756.7059 entropy=17.6815 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 76140] reward=-119176683.6 actor_loss=0.3447 critic_loss=130114529962.6667 entropy=17.7064 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 76140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446223.8 mean_steps=14.4
|
|
[Episode 76150] reward=-115146198.5 actor_loss=0.3252 critic_loss=126160599722.6667 entropy=17.7076 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 76160] reward=-113896700.2 actor_loss=0.4091 critic_loss=128744861240.8889 entropy=17.6997 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 76160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505211.8 mean_steps=13.7
|
|
[Episode 76170] reward=-111528554.1 actor_loss=0.2167 critic_loss=134050491525.5652 entropy=17.6873 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 76180] reward=-115549016.1 actor_loss=0.3578 critic_loss=122288766128.5517 entropy=17.6854 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 76180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493775.9 mean_steps=14.8
|
|
[Episode 76190] reward=-116526792.0 actor_loss=0.3081 critic_loss=126439516594.4242 entropy=17.6910 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 76200] reward=-118488109.7 actor_loss=0.3060 critic_loss=130843568736.8649 entropy=17.6842 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 76200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581207.5 mean_steps=13.4
|
|
[Episode 76210] reward=-116124707.4 actor_loss=0.4119 critic_loss=124485428838.4000 entropy=17.7015 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 76220] reward=-120387735.8 actor_loss=0.2704 critic_loss=127952106291.2000 entropy=17.7052 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 76220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547217.4 mean_steps=12.8
|
|
[Episode 76230] reward=-120619418.2 actor_loss=0.2624 critic_loss=123744541013.3333 entropy=17.6986 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 76240] reward=-122149652.5 actor_loss=0.1640 critic_loss=129868844964.9778 entropy=17.7007 approx_kl=0.0113 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 76240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462222.3 mean_steps=14.6
|
|
[Episode 76250] reward=-118319170.2 actor_loss=0.2917 critic_loss=122489982643.8919 entropy=17.6967 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 76260] reward=-113216896.0 actor_loss=0.2636 critic_loss=121913053814.1538 entropy=17.6877 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 76260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439723.8 mean_steps=15.2
|
|
[Episode 76270] reward=-116879281.5 actor_loss=0.3215 critic_loss=124911072233.2444 entropy=17.6820 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 76280] reward=-111834142.2 actor_loss=0.2964 critic_loss=119079065258.6667 entropy=17.6908 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 76280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553235.6 mean_steps=14.3
|
|
[Episode 76290] reward=-116116514.1 actor_loss=0.2569 critic_loss=127409322325.3333 entropy=17.7012 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 76300] reward=-119408417.1 actor_loss=0.3135 critic_loss=128342197713.4545 entropy=17.6836 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 76300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-647397.7 mean_steps=12.8
|
|
[Episode 76310] reward=-116327007.2 actor_loss=0.3487 critic_loss=138982696172.3077 entropy=17.6824 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 76320] reward=-119144119.4 actor_loss=0.3328 critic_loss=125211505857.7297 entropy=17.6801 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 76320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-598686.5 mean_steps=11.9
|
|
[Episode 76330] reward=-120297739.2 actor_loss=0.2582 critic_loss=133129248577.4884 entropy=17.6709 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 76340] reward=-117485695.4 actor_loss=0.3428 critic_loss=127056162967.7037 entropy=17.6654 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 76340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-405822.5 mean_steps=14.0
|
|
[Episode 76350] reward=-115558938.5 actor_loss=0.2837 critic_loss=120207869584.4103 entropy=17.6646 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 76360] reward=-110271537.0 actor_loss=0.3512 critic_loss=113023646924.8000 entropy=17.6693 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 76360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-620822.6 mean_steps=13.4
|
|
[Episode 76370] reward=-127420843.9 actor_loss=0.2094 critic_loss=489135898624.0000 entropy=17.6534 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 76380] reward=-118055669.4 actor_loss=0.3410 critic_loss=124684216765.2174 entropy=17.6512 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 76380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464782.8 mean_steps=14.3
|
|
[Episode 76390] reward=-107043323.7 actor_loss=0.3309 critic_loss=112104787022.7692 entropy=17.6408 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 76400] reward=-118216288.2 actor_loss=0.1840 critic_loss=123759897506.9091 entropy=17.6266 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 76400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520144.4 mean_steps=14.1
|
|
[Episode 76410] reward=-118382880.9 actor_loss=0.2683 critic_loss=123889087170.2069 entropy=17.6245 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 76420] reward=-115700796.6 actor_loss=0.2826 critic_loss=116287509845.3333 entropy=17.6241 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 76420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435499.3 mean_steps=14.7
|
|
[Episode 76430] reward=-116283070.0 actor_loss=0.2271 critic_loss=123589733769.8462 entropy=17.6314 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 76440] reward=-113964728.7 actor_loss=0.3690 critic_loss=115987713303.2727 entropy=17.6254 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 76440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563673.4 mean_steps=12.2
|
|
[Episode 76450] reward=-114699230.8 actor_loss=0.3080 critic_loss=117929398130.7586 entropy=17.6379 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 76460] reward=-117285989.3 actor_loss=0.2437 critic_loss=125461543976.9600 entropy=17.6262 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 76460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-576889.3 mean_steps=12.4
|
|
[Episode 76470] reward=-121225939.5 actor_loss=0.3114 critic_loss=151684913015.4667 entropy=17.6208 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 76480] reward=-115472934.3 actor_loss=0.2647 critic_loss=122433765060.9231 entropy=17.6211 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 76480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492648.2 mean_steps=14.5
|
|
[Episode 76490] reward=-120773897.3 actor_loss=0.2488 critic_loss=124796961860.2667 entropy=17.6202 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 76500] reward=-119385997.8 actor_loss=0.2886 critic_loss=124278994757.8182 entropy=17.6304 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 76500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571855.8 mean_steps=13.4
|
|
[Episode 76510] reward=-112514467.3 actor_loss=0.3157 critic_loss=125429152153.6000 entropy=17.6260 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 76520] reward=-115807977.6 actor_loss=0.2751 critic_loss=129298093178.8800 entropy=17.6226 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 76520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-332778.9 mean_steps=15.9
|
|
[Episode 76530] reward=-117119402.7 actor_loss=0.2894 critic_loss=134328008073.8462 entropy=17.6271 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 76540] reward=-121038133.4 actor_loss=0.2815 critic_loss=125471384462.2222 entropy=17.6237 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 76540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509768.5 mean_steps=12.9
|
|
[Episode 76550] reward=-111950964.8 actor_loss=0.3355 critic_loss=115140812544.0000 entropy=17.6193 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 76560] reward=-121291843.2 actor_loss=0.3353 critic_loss=129192939520.0000 entropy=17.6175 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 76560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538504.3 mean_steps=14.2
|
|
[Episode 76570] reward=-117122485.2 actor_loss=0.3652 critic_loss=130817721230.2222 entropy=17.6302 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 76580] reward=-120738887.9 actor_loss=0.1714 critic_loss=123720252620.8000 entropy=17.6383 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 76580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400877.7 mean_steps=15.1
|
|
[Episode 76590] reward=-118753218.4 actor_loss=0.2263 critic_loss=123252014694.4000 entropy=17.6405 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 76600] reward=-110185195.4 actor_loss=0.3204 critic_loss=140153625631.0303 entropy=17.6440 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 76600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496973.3 mean_steps=13.9
|
|
[Episode 76610] reward=-114635042.5 actor_loss=0.3989 critic_loss=134733778780.1600 entropy=17.6656 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 76620] reward=-112592421.6 actor_loss=0.3452 critic_loss=185481654467.0476 entropy=17.6498 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 76620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537602.0 mean_steps=13.6
|
|
[Episode 76630] reward=-116173944.6 actor_loss=0.2986 critic_loss=119158072173.7143 entropy=17.6523 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 76640] reward=-119312109.6 actor_loss=0.2905 critic_loss=131236282563.0476 entropy=17.6536 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 76640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-576914.6 mean_steps=12.7
|
|
[Episode 76650] reward=-122625390.4 actor_loss=0.3894 critic_loss=223265706299.0769 entropy=17.6684 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 76660] reward=-114079807.7 actor_loss=0.3444 critic_loss=123035594752.0000 entropy=17.6690 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 76660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-648105.3 mean_steps=13.3
|
|
[Episode 76670] reward=-118768239.6 actor_loss=0.3057 critic_loss=122761744657.0667 entropy=17.6649 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 76680] reward=-115629257.9 actor_loss=0.2788 critic_loss=121875484190.1176 entropy=17.6498 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 76680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-431960.0 mean_steps=16.2
|
|
[Episode 76690] reward=-114710692.4 actor_loss=0.2208 critic_loss=123590248948.6222 entropy=17.6571 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 76700] reward=-110533611.7 actor_loss=0.3079 critic_loss=117472242892.8000 entropy=17.6645 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 76700] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-632438.6 mean_steps=11.6
|
|
[Episode 76710] reward=-115611252.9 actor_loss=0.3868 critic_loss=123700153730.8445 entropy=17.6705 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 76720] reward=-123427581.7 actor_loss=0.2559 critic_loss=135712054272.0000 entropy=17.6619 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 76720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440612.3 mean_steps=14.8
|
|
[Episode 76730] reward=-117586630.6 actor_loss=0.1990 critic_loss=126056336161.3913 entropy=17.6584 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 76740] reward=-106699627.5 actor_loss=0.3870 critic_loss=112289957660.4444 entropy=17.6564 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 76740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-579549.9 mean_steps=13.2
|
|
[Episode 76750] reward=-111745787.6 actor_loss=0.3597 critic_loss=121485039802.1818 entropy=17.6549 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 76760] reward=-118483466.0 actor_loss=0.2821 critic_loss=131487674152.4211 entropy=17.6469 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 76760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540073.3 mean_steps=13.4
|
|
[Episode 76770] reward=-112512631.0 actor_loss=0.2967 critic_loss=118622236493.9130 entropy=17.6273 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 76780] reward=-118729784.9 actor_loss=0.3020 critic_loss=128512086738.8235 entropy=17.6214 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 76780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479108.7 mean_steps=13.8
|
|
[Episode 76790] reward=-112423696.0 actor_loss=0.3393 critic_loss=117071950039.5789 entropy=17.6156 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 76800] reward=-113500399.8 actor_loss=0.3214 critic_loss=118257739776.0000 entropy=17.6087 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 76800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471601.3 mean_steps=13.6
|
|
[Episode 76810] reward=-116939051.9 actor_loss=0.4032 critic_loss=125314021239.4667 entropy=17.6200 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 76820] reward=-117477154.5 actor_loss=0.3826 critic_loss=124376599795.8095 entropy=17.6233 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 76820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-611789.1 mean_steps=11.8
|
|
[Episode 76830] reward=-117716970.0 actor_loss=0.2515 critic_loss=123945888426.6667 entropy=17.6277 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 76840] reward=-119101137.2 actor_loss=0.2375 critic_loss=126694448683.8857 entropy=17.6380 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 76840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-645306.6 mean_steps=12.2
|
|
[Episode 76850] reward=-121778543.3 actor_loss=0.3154 critic_loss=137463792128.0000 entropy=17.6311 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 76860] reward=-111004766.1 actor_loss=0.5608 critic_loss=171872387072.0000 entropy=17.6343 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 76860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-647871.6 mean_steps=11.9
|
|
[Episode 76870] reward=-118802868.5 actor_loss=0.2740 critic_loss=126429343037.7931 entropy=17.6315 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 76880] reward=-117646761.9 actor_loss=0.3089 critic_loss=126079659154.2857 entropy=17.6319 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 76880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430549.3 mean_steps=15.3
|
|
[Episode 76890] reward=-118412815.7 actor_loss=0.2723 critic_loss=150068883456.0000 entropy=17.6335 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 76900] reward=-115834883.8 actor_loss=0.3555 critic_loss=130929115880.7273 entropy=17.6413 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 76900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538622.8 mean_steps=14.6
|
|
[Episode 76910] reward=-117807745.4 actor_loss=0.3315 critic_loss=221844094411.0345 entropy=17.6352 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 76920] reward=-113656746.7 actor_loss=0.2776 critic_loss=118070634603.7895 entropy=17.6414 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 76920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507297.3 mean_steps=14.3
|
|
[Episode 76930] reward=-120930187.2 actor_loss=0.3041 critic_loss=176351593288.2051 entropy=17.6417 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 76940] reward=-117664815.2 actor_loss=0.3263 critic_loss=124861049955.0968 entropy=17.6426 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 76940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491281.1 mean_steps=14.5
|
|
[Episode 76950] reward=-117156693.1 actor_loss=0.2872 critic_loss=120749962399.2889 entropy=17.6417 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 76960] reward=-116162201.1 actor_loss=0.3600 critic_loss=121399640926.3158 entropy=17.6205 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 76960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566255.2 mean_steps=12.7
|
|
[Episode 76970] reward=-115200341.8 actor_loss=0.2183 critic_loss=120354130432.0000 entropy=17.6082 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 76980] reward=-114815293.3 actor_loss=0.2876 critic_loss=132055666883.0476 entropy=17.6114 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 76980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-625935.6 mean_steps=11.1
|
|
[Episode 76990] reward=-114254948.6 actor_loss=0.3414 critic_loss=121257171663.5676 entropy=17.6094 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 77000] reward=-112827525.4 actor_loss=0.2649 critic_loss=130168739345.6552 entropy=17.6128 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 77000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486635.1 mean_steps=14.4
|
|
[Episode 77010] reward=-112370092.4 actor_loss=0.2639 critic_loss=122422567440.5161 entropy=17.6134 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 77020] reward=-115050059.0 actor_loss=0.2626 critic_loss=122206289920.0000 entropy=17.6126 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 77020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-678408.2 mean_steps=12.5
|
|
[Episode 77030] reward=-115280108.7 actor_loss=0.3285 critic_loss=137145040896.0000 entropy=17.6112 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 77040] reward=-111952105.1 actor_loss=0.3312 critic_loss=126018372364.1905 entropy=17.6167 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 77040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-358650.2 mean_steps=15.8
|
|
[Episode 77050] reward=-121453764.7 actor_loss=0.2596 critic_loss=131438956380.1600 entropy=17.6081 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 77060] reward=-119150250.6 actor_loss=0.3528 critic_loss=130874543542.8571 entropy=17.6071 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 77060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447920.4 mean_steps=14.9
|
|
[Episode 77070] reward=-116950575.6 actor_loss=0.3962 critic_loss=123922128505.9048 entropy=17.6026 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 77080] reward=-120736291.8 actor_loss=0.2763 critic_loss=129622714368.0000 entropy=17.5940 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 77080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482686.6 mean_steps=14.2
|
|
[Episode 77090] reward=-113835712.2 actor_loss=0.2484 critic_loss=122239437824.0000 entropy=17.5938 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 77100] reward=-111970690.6 actor_loss=0.2265 critic_loss=116328370898.8235 entropy=17.6060 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 77100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449399.9 mean_steps=14.7
|
|
[Episode 77110] reward=-111503000.0 actor_loss=0.2733 critic_loss=114368359680.0000 entropy=17.5926 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 77120] reward=-113717846.6 actor_loss=0.3689 critic_loss=142841286851.0476 entropy=17.5916 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 77120] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-641801.3 mean_steps=11.1
|
|
[Episode 77130] reward=-115048931.0 actor_loss=0.3400 critic_loss=121702399717.5172 entropy=17.5986 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 77140] reward=-119763892.5 actor_loss=0.2739 critic_loss=130274124638.3158 entropy=17.6008 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 77140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475859.2 mean_steps=13.8
|
|
[Episode 77150] reward=-120035105.3 actor_loss=0.2095 critic_loss=126838905969.7778 entropy=17.5833 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 77160] reward=-118641254.3 actor_loss=0.4718 critic_loss=130243159906.4615 entropy=17.5759 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 77160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479517.8 mean_steps=14.9
|
|
[Episode 77170] reward=-113920605.1 actor_loss=0.3418 critic_loss=125706039705.6000 entropy=17.5677 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 77180] reward=-162626384.1 actor_loss=0.3707 critic_loss=9063881093211.0215 entropy=17.5761 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 77180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523544.9 mean_steps=13.8
|
|
[Episode 77190] reward=-116518902.7 actor_loss=0.3327 critic_loss=124616633002.6667 entropy=17.5785 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 77200] reward=-115964160.5 actor_loss=0.2448 critic_loss=126360273841.2308 entropy=17.5581 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 77200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535775.5 mean_steps=14.2
|
|
[Episode 77210] reward=-112353616.0 actor_loss=0.4472 critic_loss=116573796033.4222 entropy=17.5614 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 77220] reward=-114000255.2 actor_loss=0.2465 critic_loss=120771891427.5556 entropy=17.5595 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 77220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503435.5 mean_steps=13.8
|
|
[Episode 77230] reward=-119753743.6 actor_loss=0.3278 critic_loss=126555424995.5556 entropy=17.5488 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 77240] reward=-118534107.0 actor_loss=0.2838 critic_loss=123987635040.7111 entropy=17.5420 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 77240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555468.6 mean_steps=13.6
|
|
[Episode 77250] reward=-122621316.6 actor_loss=0.2409 critic_loss=123944646117.0526 entropy=17.5453 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 77260] reward=-115135352.1 actor_loss=0.3365 critic_loss=122812110402.7826 entropy=17.5370 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 77260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566930.0 mean_steps=13.4
|
|
[Episode 77270] reward=-117527166.4 actor_loss=0.2278 critic_loss=121226918343.1111 entropy=17.5386 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 77280] reward=-110330603.2 actor_loss=0.2928 critic_loss=110819475911.1111 entropy=17.5556 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 77280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-471133.4 mean_steps=15.8
|
|
[Episode 77290] reward=-116418689.3 actor_loss=0.3790 critic_loss=127265960671.1795 entropy=17.5514 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 77300] reward=-113792019.2 actor_loss=0.4162 critic_loss=119121160952.6857 entropy=17.5526 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 77300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614577.1 mean_steps=11.7
|
|
[Episode 77310] reward=-117995259.8 actor_loss=0.2272 critic_loss=126896436639.1351 entropy=17.5547 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 77320] reward=-137078907.0 actor_loss=0.2966 critic_loss=1228131881332.3635 entropy=17.5712 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 77320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614132.8 mean_steps=12.9
|
|
[Episode 77330] reward=-115066260.6 actor_loss=0.3331 critic_loss=118914676490.2400 entropy=17.5588 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 77340] reward=-117947066.7 actor_loss=0.2892 critic_loss=124509989728.7111 entropy=17.5504 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 77340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457633.9 mean_steps=14.6
|
|
[Episode 77350] reward=-114526822.3 actor_loss=0.3330 critic_loss=121562885851.4286 entropy=17.5550 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 77360] reward=-119738141.4 actor_loss=0.1866 critic_loss=123448178688.0000 entropy=17.5766 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 77360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-566160.3 mean_steps=14.2
|
|
[Episode 77370] reward=-118702819.9 actor_loss=0.2601 critic_loss=128969989597.8667 entropy=17.5760 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 77380] reward=-116565199.5 actor_loss=0.2827 critic_loss=123368689842.0870 entropy=17.5783 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 77380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509020.7 mean_steps=14.4
|
|
[Episode 77390] reward=-119415087.8 actor_loss=0.3178 critic_loss=128778819253.6774 entropy=17.5962 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 77400] reward=-117164582.9 actor_loss=0.2739 critic_loss=123359943533.7143 entropy=17.6016 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 77400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-579875.9 mean_steps=11.3
|
|
[Episode 77410] reward=-121100630.4 actor_loss=0.2828 critic_loss=128599352466.2857 entropy=17.5997 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 77420] reward=-113995679.5 actor_loss=0.3005 critic_loss=117349085866.6667 entropy=17.6173 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 77420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502063.7 mean_steps=13.7
|
|
[Episode 77430] reward=-123297601.8 actor_loss=0.1917 critic_loss=132430900019.2000 entropy=17.6146 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 77440] reward=-117071416.9 actor_loss=0.3219 critic_loss=119945055232.0000 entropy=17.6154 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 77440] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-337074.7 mean_steps=16.4
|
|
[Episode 77450] reward=-118072931.9 actor_loss=0.2829 critic_loss=126773120048.7619 entropy=17.6093 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 77460] reward=-120807138.9 actor_loss=0.2830 critic_loss=130582087972.5714 entropy=17.5969 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 77460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532775.8 mean_steps=13.3
|
|
[Episode 77470] reward=-116523194.1 actor_loss=0.2303 critic_loss=119119805039.3044 entropy=17.5930 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 77480] reward=-115233237.7 actor_loss=0.2514 critic_loss=121162121862.7368 entropy=17.5857 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 77480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578802.4 mean_steps=12.7
|
|
[Episode 77490] reward=-130777665.2 actor_loss=0.2782 critic_loss=660673362875.7333 entropy=17.5823 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 77500] reward=-165779518.2 actor_loss=2.3754 critic_loss=5012227374140.2354 entropy=17.5862 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 77500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-541511.9 mean_steps=11.4
|
|
[Episode 77510] reward=-844230820.5 actor_loss=82.2143 critic_loss=1445876412489546.0000 entropy=17.5919 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 77520] reward=-118195065.4 actor_loss=0.3048 critic_loss=137316261068.8000 entropy=17.5964 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 77520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547700.6 mean_steps=13.4
|
|
[Episode 77530] reward=-120385637.5 actor_loss=0.2673 critic_loss=135995459291.4286 entropy=17.5959 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 77540] reward=-111095993.2 actor_loss=0.2145 critic_loss=112716417496.6154 entropy=17.6037 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 77540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481877.4 mean_steps=13.9
|
|
[Episode 77550] reward=-137271298.9 actor_loss=0.2618 critic_loss=1907540514886.6206 entropy=17.6024 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 77560] reward=-118132272.5 actor_loss=0.2877 critic_loss=146831902037.3333 entropy=17.6026 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 77560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607444.5 mean_steps=12.6
|
|
[Episode 77570] reward=-122286937.3 actor_loss=0.2933 critic_loss=133682132190.6087 entropy=17.6074 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 77580] reward=-116838719.1 actor_loss=0.2881 critic_loss=128814567602.0870 entropy=17.6020 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 77580] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-673442.0 mean_steps=11.5
|
|
[Episode 77590] reward=-117880540.3 actor_loss=0.2478 critic_loss=126511015526.4000 entropy=17.6095 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 77600] reward=-116640028.4 actor_loss=0.3150 critic_loss=119687316697.2121 entropy=17.6096 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 77600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-624203.0 mean_steps=11.8
|
|
[Episode 77610] reward=-117062935.1 actor_loss=0.3011 critic_loss=123022636646.4000 entropy=17.6145 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 77620] reward=-116677567.0 actor_loss=0.2809 critic_loss=115549634852.5714 entropy=17.6198 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 77620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-627643.2 mean_steps=13.1
|
|
[Episode 77630] reward=-120208553.4 actor_loss=0.2236 critic_loss=121230755498.6667 entropy=17.6215 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 77640] reward=-114643830.3 actor_loss=0.5019 critic_loss=119989979065.3793 entropy=17.6059 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 77640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519911.7 mean_steps=14.3
|
|
[Episode 77650] reward=-116935370.0 actor_loss=0.2571 critic_loss=121481391786.6667 entropy=17.6075 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 77660] reward=-114120066.0 actor_loss=0.2686 critic_loss=115869443772.6316 entropy=17.5907 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 77660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460380.0 mean_steps=14.3
|
|
[Episode 77670] reward=-117481466.9 actor_loss=0.3179 critic_loss=123128424809.4118 entropy=17.5909 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 77680] reward=-113717788.2 actor_loss=0.3716 critic_loss=135890698240.0000 entropy=17.5807 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 77680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-611771.8 mean_steps=11.8
|
|
[Episode 77690] reward=-117618508.7 actor_loss=0.2174 critic_loss=125220101281.6842 entropy=17.5832 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 77700] reward=-115123295.1 actor_loss=0.2932 critic_loss=121686183662.9333 entropy=17.5817 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 77700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572532.2 mean_steps=13.4
|
|
[Episode 77710] reward=-115873471.6 actor_loss=0.2447 critic_loss=124610305683.9111 entropy=17.5632 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 77720] reward=-117085758.7 actor_loss=0.4038 critic_loss=126622684683.3778 entropy=17.5678 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 77720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-428191.6 mean_steps=14.2
|
|
[Episode 77730] reward=-122329426.7 actor_loss=0.2724 critic_loss=129960529768.2963 entropy=17.5705 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 77740] reward=-124265427.6 actor_loss=0.3538 critic_loss=462618060875.8519 entropy=17.5528 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 77740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467683.6 mean_steps=14.7
|
|
[Episode 77750] reward=-120043516.4 actor_loss=0.1708 critic_loss=227249249393.7778 entropy=17.5595 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 77760] reward=-116908959.2 actor_loss=0.1644 critic_loss=152848228048.5926 entropy=17.5586 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 77760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-577672.6 mean_steps=14.4
|
|
[Episode 77770] reward=-122361081.0 actor_loss=0.2104 critic_loss=126249981876.1481 entropy=17.5609 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 77780] reward=-114814517.1 actor_loss=0.2034 critic_loss=127149744128.0000 entropy=17.5727 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 77780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621650.1 mean_steps=11.8
|
|
[Episode 77790] reward=-115561979.9 actor_loss=0.2869 critic_loss=126320460721.2308 entropy=17.5768 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 77800] reward=-115626636.4 actor_loss=0.2167 critic_loss=119306697841.7778 entropy=17.5741 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 77800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581494.9 mean_steps=13.3
|
|
[Episode 77810] reward=-118872865.6 actor_loss=0.3224 critic_loss=131761764705.1035 entropy=17.5723 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 77820] reward=-120304104.1 actor_loss=0.3244 critic_loss=125434196787.2000 entropy=17.5694 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 77820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-593539.5 mean_steps=12.6
|
|
[Episode 77830] reward=-108369081.8 actor_loss=0.2520 critic_loss=108043925771.1304 entropy=17.5784 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 77840] reward=-111805034.0 actor_loss=0.3991 critic_loss=112618109805.7143 entropy=17.5798 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 77840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420750.0 mean_steps=16.1
|
|
[Episode 77850] reward=-118149944.3 actor_loss=0.3631 critic_loss=124847860736.0000 entropy=17.5807 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 77860] reward=-114635897.7 actor_loss=0.3362 critic_loss=118081827726.2222 entropy=17.5722 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 77860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622998.4 mean_steps=11.8
|
|
[Episode 77870] reward=-118527673.3 actor_loss=0.2137 critic_loss=123957811835.5862 entropy=17.5738 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 77880] reward=-118690023.7 actor_loss=0.3204 critic_loss=119275384490.6667 entropy=17.5778 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 77880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478702.1 mean_steps=13.8
|
|
[Episode 77890] reward=-111416375.1 actor_loss=0.3099 critic_loss=124192251461.1892 entropy=17.5868 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 77900] reward=-121793739.8 actor_loss=0.2068 critic_loss=120644317424.9412 entropy=17.5841 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 77900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-577173.3 mean_steps=11.4
|
|
[Episode 77910] reward=-116768255.5 actor_loss=0.2738 critic_loss=122524459311.4074 entropy=17.5919 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 77920] reward=-122403229.1 actor_loss=0.2813 critic_loss=127967195136.0000 entropy=17.5906 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 77920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-665165.4 mean_steps=12.8
|
|
[Episode 77930] reward=-117856796.4 actor_loss=0.3258 critic_loss=128562174049.5238 entropy=17.5958 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 77940] reward=-120251471.5 actor_loss=0.3043 critic_loss=124370439203.3103 entropy=17.5903 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 77940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474165.5 mean_steps=13.8
|
|
[Episode 77950] reward=-118209374.7 actor_loss=0.1623 critic_loss=123234488593.0667 entropy=17.5971 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 77960] reward=-119211167.3 actor_loss=0.3958 critic_loss=125074128115.8095 entropy=17.6031 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 77960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-622923.9 mean_steps=13.7
|
|
[Episode 77970] reward=-113006773.4 actor_loss=0.4122 critic_loss=122782233031.1111 entropy=17.6008 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 77980] reward=-123913893.3 actor_loss=0.2854 critic_loss=132242104320.0000 entropy=17.5977 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 77980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502709.4 mean_steps=14.2
|
|
[Episode 77990] reward=-117717974.5 actor_loss=0.3126 critic_loss=124639228489.1429 entropy=17.6012 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 78000] reward=-114459608.2 actor_loss=0.2692 critic_loss=121573069824.0000 entropy=17.6013 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 78000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-656571.2 mean_steps=11.4
|
|
[Episode 78010] reward=-568536546.8 actor_loss=3.8813 critic_loss=660972124154538.6250 entropy=17.6081 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 78020] reward=-119692212.7 actor_loss=0.3031 critic_loss=126178255710.3158 entropy=17.6107 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 78020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395542.5 mean_steps=16.1
|
|
[Episode 78030] reward=-114462905.2 actor_loss=0.2813 critic_loss=118745509152.8205 entropy=17.6081 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 78040] reward=-117799396.2 actor_loss=0.3050 critic_loss=122598917461.3333 entropy=17.5901 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 78040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580377.6 mean_steps=13.4
|
|
[Episode 78050] reward=-120276657.5 actor_loss=0.2560 critic_loss=129698861511.1111 entropy=17.5895 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 78060] reward=-121253386.0 actor_loss=0.2410 critic_loss=130687764480.0000 entropy=17.6012 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 78060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541502.8 mean_steps=13.9
|
|
[Episode 78070] reward=-114173877.2 actor_loss=0.2689 critic_loss=121538847175.1111 entropy=17.5968 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 78080] reward=-113901495.5 actor_loss=0.4327 critic_loss=117120676249.6000 entropy=17.5862 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 78080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-475140.6 mean_steps=13.1
|
|
[Episode 78090] reward=-114880455.8 actor_loss=0.2092 critic_loss=116880552163.5556 entropy=17.5933 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 78100] reward=-115056941.5 actor_loss=0.3026 critic_loss=120852187515.2593 entropy=17.5893 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 78100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-692941.8 mean_steps=13.2
|
|
[Episode 78110] reward=-120395958.6 actor_loss=0.2631 critic_loss=126157158263.4667 entropy=17.5943 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 78120] reward=-114224736.7 actor_loss=0.3723 critic_loss=123592303191.4146 entropy=17.5992 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 78120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390373.8 mean_steps=15.2
|
|
[Episode 78130] reward=-118891006.9 actor_loss=0.3002 critic_loss=127086181229.7143 entropy=17.6023 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 78140] reward=-114116301.7 actor_loss=0.2842 critic_loss=117750625803.3778 entropy=17.5981 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 78140] success_rate=0.750 qp_infeasible_rate=0.250 mean_return=-152576.1 mean_steps=19.4
|
|
[Episode 78150] reward=-116525778.6 actor_loss=0.3164 critic_loss=117927029418.6667 entropy=17.6052 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 78160] reward=-121972025.9 actor_loss=0.1841 critic_loss=133078290216.4211 entropy=17.6098 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 78160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445379.9 mean_steps=15.2
|
|
[Episode 78170] reward=-119333909.5 actor_loss=0.3223 critic_loss=126311624169.7391 entropy=17.6111 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 78180] reward=-120530068.6 actor_loss=0.3022 critic_loss=128410911012.5714 entropy=17.6041 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 78180] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-694291.4 mean_steps=11.7
|
|
[Episode 78190] reward=-114383451.0 actor_loss=0.2907 critic_loss=122988957013.3333 entropy=17.6017 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 78200] reward=-122011465.5 actor_loss=0.2910 critic_loss=172743148885.3333 entropy=17.6005 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 78200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394614.8 mean_steps=16.0
|
|
[Episode 78210] reward=-112959314.7 actor_loss=0.2682 critic_loss=116528253610.6667 entropy=17.6098 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 78220] reward=-125086765.1 actor_loss=0.2450 critic_loss=148853857075.2000 entropy=17.6092 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 78220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403803.5 mean_steps=15.2
|
|
[Episode 78230] reward=-161391957.0 actor_loss=1.9913 critic_loss=5649338323316.3633 entropy=17.6053 approx_kl=0.0040 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 78240] reward=-118213181.7 actor_loss=0.3515 critic_loss=119928295664.9412 entropy=17.6119 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 78240] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-626183.7 mean_steps=10.9
|
|
[Episode 78250] reward=-121879214.8 actor_loss=0.2291 critic_loss=213307145584.6400 entropy=17.6084 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 78260] reward=-119943505.7 actor_loss=0.3156 critic_loss=127556649728.0000 entropy=17.6040 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 78260] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-362254.0 mean_steps=17.9
|
|
[Episode 78270] reward=-117410948.2 actor_loss=0.2377 critic_loss=135632380928.0000 entropy=17.6072 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 78280] reward=-120395426.1 actor_loss=0.2767 critic_loss=128492853248.0000 entropy=17.6266 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 78280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-392032.9 mean_steps=17.1
|
|
[Episode 78290] reward=-114556634.1 actor_loss=0.2836 critic_loss=119054728165.7436 entropy=17.6116 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 78300] reward=-115556798.4 actor_loss=0.4300 critic_loss=124900360704.0000 entropy=17.5828 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 78300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-391738.9 mean_steps=15.3
|
|
[Episode 78310] reward=-115983412.6 actor_loss=0.3963 critic_loss=128053412022.0444 entropy=17.5759 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 78320] reward=-122109585.3 actor_loss=0.2850 critic_loss=126798341529.6000 entropy=17.5773 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 78320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400924.3 mean_steps=16.1
|
|
[Episode 78330] reward=-122078411.8 actor_loss=0.3088 critic_loss=126238421265.0667 entropy=17.5607 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 78340] reward=-119539955.6 actor_loss=0.1862 critic_loss=129052525129.1429 entropy=17.5715 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 78340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410325.6 mean_steps=15.3
|
|
[Episode 78350] reward=-121207290.2 actor_loss=0.2269 critic_loss=129400069363.8095 entropy=17.5711 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 78360] reward=-123454464.1 actor_loss=0.2990 critic_loss=129499692145.7778 entropy=17.5590 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 78360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-576620.5 mean_steps=12.2
|
|
[Episode 78370] reward=-121646348.7 actor_loss=0.2824 critic_loss=130387976874.6667 entropy=17.5473 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 78380] reward=-116193728.8 actor_loss=0.2355 critic_loss=124128895291.0769 entropy=17.5475 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 78380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488417.5 mean_steps=13.9
|
|
[Episode 78390] reward=-119833464.7 actor_loss=0.3629 critic_loss=126042612882.2857 entropy=17.5537 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 78400] reward=-119965722.5 actor_loss=0.3285 critic_loss=125879334353.4545 entropy=17.5479 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 78400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510872.8 mean_steps=13.9
|
|
[Episode 78410] reward=-113846152.3 actor_loss=0.3501 critic_loss=119534867636.7059 entropy=17.5543 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 78420] reward=-122603007.3 actor_loss=0.2950 critic_loss=131062286238.4762 entropy=17.5535 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 78420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-556759.9 mean_steps=12.2
|
|
[Episode 78430] reward=-114530287.8 actor_loss=0.4500 critic_loss=117836669838.2222 entropy=17.5543 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 78440] reward=-116452799.6 actor_loss=0.2527 critic_loss=116316322201.6000 entropy=17.5450 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 78440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-565165.0 mean_steps=14.2
|
|
[Episode 78450] reward=-122642330.6 actor_loss=0.2674 critic_loss=129397604923.5349 entropy=17.5303 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 78460] reward=-121624039.5 actor_loss=0.3437 critic_loss=146134063616.0000 entropy=17.5333 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 78460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508289.7 mean_steps=13.8
|
|
[Episode 78470] reward=-117715037.4 actor_loss=0.3110 critic_loss=121601279590.4000 entropy=17.5313 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 78480] reward=-113433393.5 actor_loss=0.3145 critic_loss=121156170618.4348 entropy=17.5350 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 78480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512434.4 mean_steps=14.2
|
|
[Episode 78490] reward=-124908806.5 actor_loss=0.3090 critic_loss=195182514449.0667 entropy=17.5444 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 78500] reward=-115934647.0 actor_loss=0.3967 critic_loss=153158530228.7059 entropy=17.5509 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 78500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480388.7 mean_steps=15.0
|
|
[Episode 78510] reward=-119559024.2 actor_loss=0.2872 critic_loss=127852104448.0000 entropy=17.5532 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 78520] reward=-119949942.3 actor_loss=0.2412 critic_loss=121635096628.5128 entropy=17.5441 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 78520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-466874.5 mean_steps=15.4
|
|
[Episode 78530] reward=-117918299.5 actor_loss=0.3402 critic_loss=125516449148.3428 entropy=17.5197 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 78540] reward=-119577181.4 actor_loss=0.2909 critic_loss=122914130375.1111 entropy=17.5186 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 78540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-414620.1 mean_steps=16.1
|
|
[Episode 78550] reward=-118338393.2 actor_loss=0.3006 critic_loss=124205061157.9259 entropy=17.4970 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 78560] reward=-121852147.6 actor_loss=0.3242 critic_loss=123138154760.2581 entropy=17.4890 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 78560] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-360807.2 mean_steps=16.9
|
|
[Episode 78570] reward=-116488947.8 actor_loss=0.3550 critic_loss=121020662215.1111 entropy=17.4930 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 78580] reward=-114879086.2 actor_loss=0.2784 critic_loss=115844919808.0000 entropy=17.4879 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 78580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386549.5 mean_steps=16.4
|
|
[Episode 78590] reward=-118161759.5 actor_loss=0.2602 critic_loss=121863650846.1176 entropy=17.5008 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 78600] reward=-122796824.4 actor_loss=0.2726 critic_loss=130756610642.5806 entropy=17.5121 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 78600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-537879.8 mean_steps=15.2
|
|
[Episode 78610] reward=-117121284.8 actor_loss=0.3177 critic_loss=122607439241.8462 entropy=17.5070 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 78620] reward=-117105799.5 actor_loss=0.3418 critic_loss=137284989747.2000 entropy=17.5153 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 78620] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-680409.2 mean_steps=12.2
|
|
[Episode 78630] reward=-119083939.6 actor_loss=0.2217 critic_loss=119097437388.8000 entropy=17.5127 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 78640] reward=-119689104.3 actor_loss=0.2711 critic_loss=131571130777.6000 entropy=17.5142 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 78640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491355.2 mean_steps=12.8
|
|
[Episode 78650] reward=-120186892.5 actor_loss=0.3251 critic_loss=123694069841.9200 entropy=17.5120 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 78660] reward=-117528179.8 actor_loss=0.2942 critic_loss=121264591667.2000 entropy=17.5063 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 78660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-538978.7 mean_steps=15.1
|
|
[Episode 78670] reward=-117009374.6 actor_loss=0.3595 critic_loss=122824870656.0000 entropy=17.4964 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 78680] reward=-120782530.6 actor_loss=0.2461 critic_loss=128513110388.3636 entropy=17.5146 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 78680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-537678.0 mean_steps=14.8
|
|
[Episode 78690] reward=-125244009.6 actor_loss=0.2715 critic_loss=485863394872.8889 entropy=17.5099 approx_kl=0.0044 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 78700] reward=-117315170.6 actor_loss=0.3018 critic_loss=136897594768.6956 entropy=17.5135 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 78700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610486.8 mean_steps=12.8
|
|
[Episode 78710] reward=-116372711.6 actor_loss=0.3359 critic_loss=139884902536.5333 entropy=17.5340 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 78720] reward=-114831716.7 actor_loss=0.2122 critic_loss=130507175362.5600 entropy=17.5290 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 78720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483673.7 mean_steps=14.2
|
|
[Episode 78730] reward=-114540520.1 actor_loss=0.3458 critic_loss=120412428060.4444 entropy=17.5415 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 78740] reward=-120591804.9 actor_loss=0.3438 critic_loss=271746420053.3333 entropy=17.5463 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 78740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-552547.0 mean_steps=11.6
|
|
[Episode 78750] reward=-118366571.7 actor_loss=0.3281 critic_loss=123297525225.7391 entropy=17.5547 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 78760] reward=-115821320.6 actor_loss=0.2999 critic_loss=117758290992.7619 entropy=17.5627 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 78760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-666907.1 mean_steps=13.1
|
|
[Episode 78770] reward=-122942645.9 actor_loss=0.2523 critic_loss=127930198278.5641 entropy=17.5474 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 78780] reward=-117114835.1 actor_loss=0.2924 critic_loss=118077887017.5135 entropy=17.5421 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 78780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-334721.1 mean_steps=15.7
|
|
[Episode 78790] reward=-118091139.2 actor_loss=0.2918 critic_loss=121617992635.7333 entropy=17.5284 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 78800] reward=-113499157.0 actor_loss=0.3354 critic_loss=118987202932.3636 entropy=17.5309 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 78800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-584474.7 mean_steps=14.8
|
|
[Episode 78810] reward=-110937020.9 actor_loss=0.3063 critic_loss=114586207232.0000 entropy=17.5131 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 78820] reward=-112858430.4 actor_loss=0.2932 critic_loss=118473682488.8889 entropy=17.5253 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 78820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-631057.1 mean_steps=13.1
|
|
[Episode 78830] reward=-111472586.3 actor_loss=0.3563 critic_loss=116463589239.4667 entropy=17.5218 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 78840] reward=-113118661.1 actor_loss=0.2992 critic_loss=122281068134.4000 entropy=17.5197 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 78840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623321.2 mean_steps=12.2
|
|
[Episode 78850] reward=-113683054.8 actor_loss=0.2371 critic_loss=118031200555.7073 entropy=17.5266 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 78860] reward=-120634802.7 actor_loss=0.2974 critic_loss=127391663640.3810 entropy=17.5210 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 78860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-336634.2 mean_steps=15.7
|
|
[Episode 78870] reward=-119302255.4 actor_loss=0.2461 critic_loss=121482718286.7692 entropy=17.5256 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 78880] reward=-110159726.7 actor_loss=0.3060 critic_loss=115389302460.6316 entropy=17.5385 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 78880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-388185.1 mean_steps=16.4
|
|
[Episode 78890] reward=-119522983.1 actor_loss=0.2982 critic_loss=119158593156.7407 entropy=17.5397 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 78900] reward=-110761019.0 actor_loss=0.2848 critic_loss=118809293619.2000 entropy=17.5370 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 78900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565516.5 mean_steps=13.2
|
|
[Episode 78910] reward=-115150521.2 actor_loss=0.1923 critic_loss=122014550698.6667 entropy=17.5345 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 78920] reward=-114070870.4 actor_loss=0.3244 critic_loss=124573295567.2381 entropy=17.5282 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 78920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-653767.4 mean_steps=12.9
|
|
[Episode 78930] reward=-113874255.1 actor_loss=0.3033 critic_loss=115276063092.3636 entropy=17.5303 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 78940] reward=-115419001.3 actor_loss=0.2250 critic_loss=116273042022.4000 entropy=17.5618 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 78940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401443.9 mean_steps=14.9
|
|
[Episode 78950] reward=-114800870.9 actor_loss=0.2797 critic_loss=123007429973.3333 entropy=17.5625 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 78960] reward=-120833949.3 actor_loss=0.3231 critic_loss=159657471759.0588 entropy=17.5546 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 78960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555773.0 mean_steps=13.2
|
|
[Episode 78970] reward=-120370349.3 actor_loss=0.2441 critic_loss=129396630702.8293 entropy=17.5653 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 78980] reward=-120706455.5 actor_loss=0.2719 critic_loss=132247433216.0000 entropy=17.5659 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 78980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-544448.9 mean_steps=11.6
|
|
[Episode 78990] reward=-732056492.9 actor_loss=3.8365 critic_loss=496804084842496.0000 entropy=17.5689 approx_kl=0.0031 kl_stop=1 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 79000] reward=-114570067.1 actor_loss=0.3189 critic_loss=123066071040.0000 entropy=17.5695 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 79000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-666486.7 mean_steps=11.4
|
|
[Episode 79010] reward=-118068334.1 actor_loss=0.1738 critic_loss=115745491148.8000 entropy=17.5785 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 79020] reward=-115634863.8 actor_loss=0.3119 critic_loss=116668838103.5789 entropy=17.5853 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 79020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519261.0 mean_steps=14.1
|
|
[Episode 79030] reward=-119552610.3 actor_loss=0.2235 critic_loss=133396574012.9524 entropy=17.5908 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 79040] reward=-116513195.9 actor_loss=0.2221 critic_loss=121379202914.4615 entropy=17.5948 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 79040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544016.5 mean_steps=13.3
|
|
[Episode 79050] reward=-115179908.8 actor_loss=0.2869 critic_loss=120012740783.5429 entropy=17.6017 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 79060] reward=-112346759.2 actor_loss=0.3809 critic_loss=120230662400.0000 entropy=17.6021 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 79060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-573708.5 mean_steps=11.7
|
|
[Episode 79070] reward=-114867850.2 actor_loss=0.3216 critic_loss=132766055168.0000 entropy=17.5958 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 79080] reward=-1805856307.8 actor_loss=4.1289 critic_loss=6945761303671603.0000 entropy=17.6009 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 79080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462040.8 mean_steps=13.7
|
|
[Episode 79090] reward=-113163711.9 actor_loss=0.3403 critic_loss=114122667349.3333 entropy=17.5912 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 79100] reward=-113693211.1 actor_loss=0.3496 critic_loss=117700044032.0000 entropy=17.6020 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 79100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603081.3 mean_steps=12.8
|
|
[Episode 79110] reward=-112888828.0 actor_loss=0.4333 critic_loss=122072294223.4483 entropy=17.6012 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 79120] reward=-113383151.0 actor_loss=0.3818 critic_loss=122119760802.9091 entropy=17.6061 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 79120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565748.3 mean_steps=13.2
|
|
[Episode 79130] reward=-117572127.4 actor_loss=0.3106 critic_loss=116043109120.0000 entropy=17.6164 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 79140] reward=-120178205.2 actor_loss=0.2138 critic_loss=128479395840.0000 entropy=17.6064 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 79140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487400.1 mean_steps=14.1
|
|
[Episode 79150] reward=-180757889.2 actor_loss=0.5847 critic_loss=16720035708928.0000 entropy=17.5843 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 79160] reward=-133487894.9 actor_loss=0.2199 critic_loss=1204571676344.3201 entropy=17.5812 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 79160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622070.9 mean_steps=11.7
|
|
[Episode 79170] reward=-115640676.3 actor_loss=0.2936 critic_loss=118890300047.3600 entropy=17.5883 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 79180] reward=-117444766.2 actor_loss=0.3000 critic_loss=126651233621.3333 entropy=17.5975 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 79180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-579008.5 mean_steps=11.4
|
|
[Episode 79190] reward=-405555027.3 actor_loss=0.2624 critic_loss=208186676828886.7188 entropy=17.6124 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 79200] reward=-112075934.8 actor_loss=0.3794 critic_loss=131478809693.0909 entropy=17.6136 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 79200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503368.9 mean_steps=13.2
|
|
[Episode 79210] reward=-114880103.7 actor_loss=0.3278 critic_loss=119860557716.2105 entropy=17.6035 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 79220] reward=-123114051.3 actor_loss=0.2489 critic_loss=169343407445.3333 entropy=17.5938 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 79220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-472602.9 mean_steps=12.8
|
|
[Episode 79230] reward=-115827443.6 actor_loss=0.2797 critic_loss=126354123707.7333 entropy=17.6002 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 79240] reward=-115794665.7 actor_loss=0.3286 critic_loss=118308719820.8000 entropy=17.5997 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 79240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517519.5 mean_steps=13.7
|
|
[Episode 79250] reward=-119145914.9 actor_loss=0.2495 critic_loss=129235563642.8800 entropy=17.5915 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 79260] reward=-114863617.1 actor_loss=0.3180 critic_loss=122005253916.4444 entropy=17.5972 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 79260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394448.5 mean_steps=14.9
|
|
[Episode 79270] reward=-116720198.1 actor_loss=0.3764 critic_loss=115334328320.0000 entropy=17.5828 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 79280] reward=-114550741.4 actor_loss=0.3305 critic_loss=119129342862.2222 entropy=17.5959 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 79280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-473598.1 mean_steps=15.3
|
|
[Episode 79290] reward=-116973915.1 actor_loss=0.3288 critic_loss=120493989341.8667 entropy=17.5972 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 79300] reward=-120342530.3 actor_loss=0.2671 critic_loss=141540959027.2000 entropy=17.5892 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 79300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527244.9 mean_steps=13.7
|
|
[Episode 79310] reward=-116192976.5 actor_loss=0.2990 critic_loss=137669409541.6889 entropy=17.5933 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 79320] reward=-134390203.2 actor_loss=0.2809 critic_loss=1984015710435.5557 entropy=17.5891 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 79320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434845.1 mean_steps=15.5
|
|
[Episode 79330] reward=-121519449.4 actor_loss=0.2680 critic_loss=132593695322.3529 entropy=17.5923 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 79340] reward=-114492583.7 actor_loss=0.2334 critic_loss=121438003590.0952 entropy=17.5980 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 79340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559162.3 mean_steps=13.3
|
|
[Episode 79350] reward=-121549477.2 actor_loss=0.3436 critic_loss=131304328138.1053 entropy=17.6042 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 79360] reward=-112503058.0 actor_loss=0.2700 critic_loss=120017416465.0667 entropy=17.5937 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 79360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-672013.7 mean_steps=12.4
|
|
[Episode 79370] reward=-118916108.0 actor_loss=0.3740 critic_loss=124107090106.1818 entropy=17.5915 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 79380] reward=-116815663.3 actor_loss=0.3312 critic_loss=124707280709.8182 entropy=17.5980 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 79380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-478062.6 mean_steps=12.7
|
|
[Episode 79390] reward=-121105632.8 actor_loss=0.3537 critic_loss=132026816698.1818 entropy=17.5986 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 79400] reward=-114126453.1 actor_loss=0.3589 critic_loss=127704341115.5862 entropy=17.6023 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 79400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475660.3 mean_steps=13.7
|
|
[Episode 79410] reward=-1501310163.0 actor_loss=2.1463 critic_loss=5065273170198528.0000 entropy=17.5942 approx_kl=0.0307 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 79420] reward=-111001801.1 actor_loss=0.3087 critic_loss=123373504284.4444 entropy=17.6091 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 79420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479478.2 mean_steps=13.7
|
|
[Episode 79430] reward=-116604080.6 actor_loss=0.2882 critic_loss=125148447582.3158 entropy=17.6139 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 79440] reward=-115429622.1 actor_loss=0.3633 critic_loss=119221737927.1111 entropy=17.6090 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 79440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478674.7 mean_steps=13.7
|
|
[Episode 79450] reward=-920707564.1 actor_loss=1.9733 critic_loss=1735784664995653.7500 entropy=17.6008 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 79460] reward=-115200630.6 actor_loss=0.4294 critic_loss=121831327607.4667 entropy=17.6129 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 79460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-557324.6 mean_steps=14.2
|
|
[Episode 79470] reward=-113495912.9 actor_loss=0.2591 critic_loss=116889395017.9556 entropy=17.6138 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 79480] reward=-119041168.4 actor_loss=0.3001 critic_loss=121417341048.4706 entropy=17.6227 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 79480] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-648136.8 mean_steps=10.8
|
|
[Episode 79490] reward=-116349941.4 actor_loss=0.2892 critic_loss=127811867841.7297 entropy=17.6178 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 79500] reward=-121124080.4 actor_loss=0.2476 critic_loss=128726289066.6667 entropy=17.6254 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 79500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539353.8 mean_steps=13.8
|
|
[Episode 79510] reward=-115626024.5 actor_loss=0.2914 critic_loss=121019648000.0000 entropy=17.6246 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 79520] reward=-118502481.8 actor_loss=0.2297 critic_loss=129455548229.8182 entropy=17.6383 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 79520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-547646.4 mean_steps=11.6
|
|
[Episode 79530] reward=-116536040.1 actor_loss=0.3149 critic_loss=127081353588.3636 entropy=17.6449 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 79540] reward=-3756184862.2 actor_loss=0.9521 critic_loss=29969142704804424.0000 entropy=17.6408 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 79540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586811.4 mean_steps=12.6
|
|
[Episode 79550] reward=-115586394.4 actor_loss=0.2787 critic_loss=122208983267.5556 entropy=17.6514 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 79560] reward=-114294029.8 actor_loss=0.3584 critic_loss=125087715228.0976 entropy=17.6645 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 79560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509216.9 mean_steps=13.8
|
|
[Episode 79570] reward=-120493464.2 actor_loss=0.2355 critic_loss=128772564542.4390 entropy=17.6681 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 79580] reward=-7505655003.0 actor_loss=134.1150 critic_loss=44849198482325504.0000 entropy=17.6701 approx_kl=0.0198 kl_stop=1 intervention_rate=0.1074 front_blocked=0
|
|
[Eval 79580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-684517.6 mean_steps=12.5
|
|
[Episode 79590] reward=-230672091.2 actor_loss=1.3937 critic_loss=43537431498296.8906 entropy=17.6618 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 79600] reward=-114390658.4 actor_loss=0.2132 critic_loss=127663932672.0000 entropy=17.6570 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 79600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416632.7 mean_steps=15.3
|
|
[Episode 79610] reward=-1778448184.7 actor_loss=132.7672 critic_loss=6580112269403750.0000 entropy=17.6551 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 79620] reward=-117632978.3 actor_loss=0.3404 critic_loss=124552854291.6923 entropy=17.6602 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 79620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402089.8 mean_steps=16.1
|
|
[Episode 79630] reward=-113959769.3 actor_loss=0.3018 critic_loss=124050455893.3333 entropy=17.6470 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 79640] reward=-146098306.2 actor_loss=0.3609 critic_loss=4820019102260.9658 entropy=17.6461 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 79640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-638430.5 mean_steps=12.9
|
|
[Episode 79650] reward=-110394973.4 actor_loss=0.3417 critic_loss=118049496177.7778 entropy=17.6280 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 79660] reward=-117454350.1 actor_loss=0.2815 critic_loss=130165546825.9556 entropy=17.6318 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 79660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590524.3 mean_steps=13.1
|
|
[Episode 79670] reward=-116680805.2 actor_loss=0.2593 critic_loss=119741784792.1778 entropy=17.6247 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 79680] reward=-120729269.2 actor_loss=0.2246 critic_loss=127430178708.2105 entropy=17.6157 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 79680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510817.4 mean_steps=14.1
|
|
[Episode 79690] reward=-123716317.4 actor_loss=0.2573 critic_loss=132500598784.0000 entropy=17.6132 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 79700] reward=-116392196.3 actor_loss=0.3377 critic_loss=120020349123.0476 entropy=17.6203 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 79700] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-749202.4 mean_steps=10.9
|
|
[Episode 79710] reward=-117099660.2 actor_loss=0.2049 critic_loss=124850340271.1579 entropy=17.6315 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 79720] reward=-117707178.0 actor_loss=0.2710 critic_loss=123311488477.8667 entropy=17.6158 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 79720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552914.4 mean_steps=13.4
|
|
[Episode 79730] reward=-117532941.9 actor_loss=0.3664 critic_loss=125355030795.1304 entropy=17.5921 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 79740] reward=-120332963.3 actor_loss=0.1712 critic_loss=125053539669.3333 entropy=17.5848 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 79740] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-673703.4 mean_steps=10.5
|
|
[Episode 79750] reward=-118109461.7 actor_loss=0.1905 critic_loss=126221063509.3333 entropy=17.5885 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 79760] reward=-117448539.2 actor_loss=0.2628 critic_loss=119824204868.2667 entropy=17.5829 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 79760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584339.3 mean_steps=12.7
|
|
[Episode 79770] reward=-113998680.1 actor_loss=0.3820 critic_loss=123293877134.2222 entropy=17.5828 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 79780] reward=-117079276.8 actor_loss=0.4081 critic_loss=125139568932.5714 entropy=17.5887 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 79780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-570062.5 mean_steps=14.6
|
|
[Episode 79790] reward=-114357787.6 actor_loss=0.3468 critic_loss=118587951033.3793 entropy=17.5850 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 79800] reward=-117798091.4 actor_loss=0.1752 critic_loss=123157781805.1765 entropy=17.5826 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 79800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419993.3 mean_steps=15.3
|
|
[Episode 79810] reward=-112584976.7 actor_loss=0.3422 critic_loss=113283894784.0000 entropy=17.5921 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 79820] reward=-113865066.9 actor_loss=0.2997 critic_loss=113599149124.2667 entropy=17.6029 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 79820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637777.7 mean_steps=12.0
|
|
[Episode 79830] reward=-121841339.5 actor_loss=0.2670 critic_loss=137405005467.8261 entropy=17.6023 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 79840] reward=-117398906.6 actor_loss=0.1986 critic_loss=119928289572.5714 entropy=17.6024 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 79840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548763.2 mean_steps=12.3
|
|
[Episode 79850] reward=-117941759.7 actor_loss=0.3084 critic_loss=123978272589.9130 entropy=17.5963 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 79860] reward=-119354373.7 actor_loss=0.3215 critic_loss=175002642750.5778 entropy=17.5971 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 79860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552240.6 mean_steps=13.7
|
|
[Episode 79870] reward=-117400140.6 actor_loss=0.2974 critic_loss=120232966702.5455 entropy=17.6113 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 79880] reward=-113752264.7 actor_loss=0.3729 critic_loss=116792485205.3333 entropy=17.5963 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 79880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-608866.5 mean_steps=13.7
|
|
[Episode 79890] reward=-112753459.8 actor_loss=0.2449 critic_loss=121004246357.3333 entropy=17.5931 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 79900] reward=-290110237.7 actor_loss=16.0825 critic_loss=100050074075136.0000 entropy=17.5952 approx_kl=0.0029 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 79900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515947.2 mean_steps=12.8
|
|
[Episode 79910] reward=-119844219.4 actor_loss=0.2814 critic_loss=126731942479.6444 entropy=17.6028 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 79920] reward=-225207078.0 actor_loss=4.4930 critic_loss=34496944257979.7344 entropy=17.6035 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 79920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449869.0 mean_steps=14.4
|
|
[Episode 79930] reward=-116574535.8 actor_loss=0.2705 critic_loss=120352233179.4286 entropy=17.5940 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 79940] reward=-116406930.5 actor_loss=0.2388 critic_loss=120760217384.4211 entropy=17.6189 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 79940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450801.4 mean_steps=14.3
|
|
[Episode 79950] reward=-111655402.0 actor_loss=0.3526 critic_loss=114412208990.3158 entropy=17.6134 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 79960] reward=-120156696.5 actor_loss=0.2588 critic_loss=126379785420.8000 entropy=17.6091 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 79960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440643.9 mean_steps=14.7
|
|
[Episode 79970] reward=-122003652.0 actor_loss=0.2953 critic_loss=127199510970.8108 entropy=17.6091 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 79980] reward=-124140428.2 actor_loss=0.2544 critic_loss=146590381397.3333 entropy=17.6087 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 79980] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-314199.4 mean_steps=17.8
|
|
[Episode 79990] reward=-120254247.5 actor_loss=0.2648 critic_loss=131417648150.7556 entropy=17.6186 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 80000] reward=-114853376.1 actor_loss=0.2825 critic_loss=120987233657.2632 entropy=17.6437 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 80000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481168.8 mean_steps=15.2
|
|
[Episode 80010] reward=-125080378.0 actor_loss=0.4109 critic_loss=200288723334.0952 entropy=17.6463 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 80020] reward=-110256984.5 actor_loss=0.3410 critic_loss=116753233664.0000 entropy=17.6436 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 80020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610723.0 mean_steps=12.9
|
|
[Episode 80030] reward=-117247526.7 actor_loss=0.2679 critic_loss=120420326149.6889 entropy=17.6396 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 80040] reward=-118766894.2 actor_loss=0.3198 critic_loss=133173145314.2326 entropy=17.6374 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 80040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561673.0 mean_steps=14.7
|
|
[Episode 80050] reward=-120629549.8 actor_loss=0.2851 critic_loss=139435615300.2667 entropy=17.6366 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 80060] reward=-1813009242.9 actor_loss=1.2939 critic_loss=6034266513211392.0000 entropy=17.6331 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 80060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-632585.8 mean_steps=12.7
|
|
[Episode 80070] reward=-122752285.2 actor_loss=0.2675 critic_loss=184784204893.0909 entropy=17.6275 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 80080] reward=-120631447.4 actor_loss=0.2311 critic_loss=126366096497.7778 entropy=17.6341 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 80080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528959.7 mean_steps=14.2
|
|
[Episode 80090] reward=-115035450.1 actor_loss=0.3045 critic_loss=184472326567.7242 entropy=17.6268 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 80100] reward=-142965189.2 actor_loss=0.3322 critic_loss=2952014078904.5581 entropy=17.6267 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 80100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-584503.9 mean_steps=14.3
|
|
[Episode 80110] reward=-115762145.5 actor_loss=0.3191 critic_loss=118850364302.2222 entropy=17.6238 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 80120] reward=-124763548.5 actor_loss=0.3137 critic_loss=218922749132.8000 entropy=17.6359 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 80120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493102.7 mean_steps=14.0
|
|
[Episode 80130] reward=-115403902.5 actor_loss=0.2838 critic_loss=117285446724.2667 entropy=17.6385 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 80140] reward=-118478971.4 actor_loss=0.3170 critic_loss=123120388050.4889 entropy=17.6486 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 80140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536051.2 mean_steps=14.1
|
|
[Episode 80150] reward=-116434587.7 actor_loss=0.4044 critic_loss=127842410868.3636 entropy=17.6467 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 80160] reward=-116602870.7 actor_loss=0.3630 critic_loss=120676531404.8000 entropy=17.6387 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 80160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551625.3 mean_steps=13.2
|
|
[Episode 80170] reward=-119739313.7 actor_loss=0.2680 critic_loss=126528881049.6000 entropy=17.6422 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 80180] reward=-115209714.3 actor_loss=0.4059 critic_loss=121370502576.3556 entropy=17.6384 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 80180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451335.3 mean_steps=15.8
|
|
[Episode 80190] reward=-118221959.7 actor_loss=0.2694 critic_loss=131378272841.1429 entropy=17.6417 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 80200] reward=-121854232.6 actor_loss=0.3398 critic_loss=131399090380.8000 entropy=17.6491 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 80200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545709.8 mean_steps=14.2
|
|
[Episode 80210] reward=-119548693.6 actor_loss=0.2672 critic_loss=147084068376.3810 entropy=17.6599 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 80220] reward=-115154766.8 actor_loss=0.3970 critic_loss=120317356259.5556 entropy=17.6662 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 80220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433596.5 mean_steps=14.6
|
|
[Episode 80230] reward=-111028833.2 actor_loss=0.3880 critic_loss=116946989147.0222 entropy=17.6595 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 80240] reward=-116928452.8 actor_loss=0.1969 critic_loss=118952273874.4889 entropy=17.6633 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 80240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536601.3 mean_steps=13.0
|
|
[Episode 80250] reward=-119892506.4 actor_loss=0.3569 critic_loss=185644477488.7619 entropy=17.6495 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 80260] reward=-119333175.6 actor_loss=0.3448 critic_loss=128338918695.8222 entropy=17.6499 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 80260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-362052.7 mean_steps=16.8
|
|
[Episode 80270] reward=-113834983.7 actor_loss=0.3274 critic_loss=121946342951.3846 entropy=17.6369 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 80280] reward=-120670561.9 actor_loss=0.2668 critic_loss=142310698188.8000 entropy=17.6414 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 80280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558432.5 mean_steps=13.3
|
|
[Episode 80290] reward=-118232318.1 actor_loss=0.3782 critic_loss=121980645251.8788 entropy=17.6206 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 80300] reward=-116087626.1 actor_loss=0.4692 critic_loss=120222494915.0476 entropy=17.6282 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 80300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-447138.3 mean_steps=13.8
|
|
[Episode 80310] reward=-118297274.6 actor_loss=0.3414 critic_loss=123187037758.4390 entropy=17.6258 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 80320] reward=-114879654.7 actor_loss=0.2615 critic_loss=116242887052.3871 entropy=17.6241 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 80320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-568004.2 mean_steps=12.5
|
|
[Episode 80330] reward=-118193754.1 actor_loss=0.3804 critic_loss=130357139319.4667 entropy=17.6202 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 80340] reward=-116193026.2 actor_loss=0.2922 critic_loss=149566511634.9630 entropy=17.6096 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 80340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656756.9 mean_steps=12.2
|
|
[Episode 80350] reward=-119349018.7 actor_loss=0.3597 critic_loss=147302672185.8065 entropy=17.6092 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 80360] reward=-120953306.8 actor_loss=0.3786 critic_loss=147295765754.3111 entropy=17.6098 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 80360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-620772.6 mean_steps=13.0
|
|
[Episode 80370] reward=-117473277.9 actor_loss=0.2545 critic_loss=145039655692.1905 entropy=17.6347 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 80380] reward=-120555540.0 actor_loss=0.2643 critic_loss=205321789622.0444 entropy=17.6328 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 80380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613007.9 mean_steps=13.2
|
|
[Episode 80390] reward=-126436504.8 actor_loss=0.3771 critic_loss=246288587836.2353 entropy=17.6292 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 80400] reward=-121927405.5 actor_loss=0.2466 critic_loss=128265909953.4222 entropy=17.6308 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 80400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-373810.9 mean_steps=16.2
|
|
[Episode 80410] reward=-122485901.3 actor_loss=0.2917 critic_loss=125723051986.4889 entropy=17.6192 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 80420] reward=-120819921.9 actor_loss=0.2669 critic_loss=130767065460.3636 entropy=17.6078 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 80420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550787.0 mean_steps=12.8
|
|
[Episode 80430] reward=-116713698.8 actor_loss=0.2969 critic_loss=117292660963.5556 entropy=17.6102 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 80440] reward=-117911993.4 actor_loss=0.3361 critic_loss=125319622469.8182 entropy=17.5975 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 80440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599548.1 mean_steps=12.8
|
|
[Episode 80450] reward=-122664916.7 actor_loss=0.2627 critic_loss=130400866125.9130 entropy=17.5890 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 80460] reward=-114502354.3 actor_loss=0.3631 critic_loss=119741739953.2308 entropy=17.5926 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 80460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422391.0 mean_steps=16.5
|
|
[Episode 80470] reward=-114905709.6 actor_loss=0.3310 critic_loss=119048161416.5333 entropy=17.5732 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 80480] reward=-113978303.8 actor_loss=0.3542 critic_loss=121023709184.0000 entropy=17.5715 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 80480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540754.0 mean_steps=13.1
|
|
[Episode 80490] reward=-115394660.4 actor_loss=0.3472 critic_loss=123864737746.4889 entropy=17.5762 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 80500] reward=-116633172.9 actor_loss=0.3262 critic_loss=120768543129.6000 entropy=17.5669 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 80500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-403199.5 mean_steps=15.8
|
|
[Episode 80510] reward=-117846951.0 actor_loss=0.3291 critic_loss=119027505015.4667 entropy=17.5605 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 80520] reward=-116326372.3 actor_loss=0.3180 critic_loss=126573340808.5333 entropy=17.5814 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 80520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583091.5 mean_steps=13.3
|
|
[Episode 80530] reward=-623275871.9 actor_loss=1.2426 critic_loss=698068358636264.7500 entropy=17.5890 approx_kl=0.0044 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 80540] reward=-23842626952.4 actor_loss=225.8155 critic_loss=694409175109939968.0000 entropy=17.5962 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1107 front_blocked=0
|
|
[Eval 80540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-553306.5 mean_steps=12.8
|
|
[Episode 80550] reward=-371731232.3 actor_loss=2.5657 critic_loss=199890540604074.6562 entropy=17.6101 approx_kl=0.0041 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 80560] reward=-11498048579.3 actor_loss=208.0933 critic_loss=217609720688891232.0000 entropy=17.6152 approx_kl=0.0037 kl_stop=1 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 80560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-414094.4 mean_steps=14.2
|
|
[Episode 80570] reward=-144954729.2 actor_loss=1.5601 critic_loss=2591125225107.9111 entropy=17.6206 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 80580] reward=-125274823.9 actor_loss=0.3504 critic_loss=596531347456.0000 entropy=17.6179 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 80580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455316.4 mean_steps=15.5
|
|
[Episode 80590] reward=-114221136.0 actor_loss=0.2975 critic_loss=116784606958.9333 entropy=17.6202 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 80600] reward=-116954966.8 actor_loss=0.2818 critic_loss=126176550619.4286 entropy=17.6424 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 80600] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-245876.9 mean_steps=18.1
|
|
[Episode 80610] reward=-118207724.8 actor_loss=0.3545 critic_loss=128039969132.0889 entropy=17.6530 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 80620] reward=-121696991.9 actor_loss=0.2517 critic_loss=123648638976.0000 entropy=17.6539 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 80620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-616229.3 mean_steps=11.3
|
|
[Episode 80630] reward=-115067925.9 actor_loss=0.3245 critic_loss=121285850112.0000 entropy=17.6352 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 80640] reward=-111845919.2 actor_loss=0.4816 critic_loss=115546740833.5238 entropy=17.6450 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 80640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423774.2 mean_steps=14.3
|
|
[Episode 80650] reward=-121315129.3 actor_loss=0.2236 critic_loss=122550880574.5778 entropy=17.6502 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 80660] reward=-118427763.7 actor_loss=0.2723 critic_loss=146179515642.3111 entropy=17.6634 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 80660] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-633700.8 mean_steps=10.9
|
|
[Episode 80670] reward=-123049342.5 actor_loss=0.2790 critic_loss=130505656612.5714 entropy=17.6607 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 80680] reward=-120019566.9 actor_loss=0.2799 critic_loss=129321599249.0667 entropy=17.6769 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 80680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493140.5 mean_steps=14.8
|
|
[Episode 80690] reward=-118921140.2 actor_loss=0.2521 critic_loss=125644120941.7143 entropy=17.6612 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 80700] reward=-116539156.0 actor_loss=0.3434 critic_loss=120315835343.2381 entropy=17.6658 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 80700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642074.0 mean_steps=12.2
|
|
[Episode 80710] reward=-121622454.6 actor_loss=0.2820 critic_loss=142541361315.8400 entropy=17.6747 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 80720] reward=-121947302.2 actor_loss=0.3125 critic_loss=125714049854.2703 entropy=17.6719 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 80720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450424.8 mean_steps=14.4
|
|
[Episode 80730] reward=-115320207.1 actor_loss=0.3527 critic_loss=180651273830.4000 entropy=17.6854 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 80740] reward=-113966275.2 actor_loss=0.2620 critic_loss=116298953159.1111 entropy=17.6897 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 80740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432135.6 mean_steps=14.8
|
|
[Episode 80750] reward=-119984186.9 actor_loss=0.2681 critic_loss=147213971955.5122 entropy=17.6742 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 80760] reward=-119115352.5 actor_loss=0.3399 critic_loss=128958322326.5882 entropy=17.6871 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 80760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419747.6 mean_steps=15.5
|
|
[Episode 80770] reward=-118465502.6 actor_loss=0.3574 critic_loss=122920540647.6190 entropy=17.6967 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 80780] reward=-121243937.3 actor_loss=0.2410 critic_loss=129717493975.5789 entropy=17.6992 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 80780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591694.3 mean_steps=13.8
|
|
[Episode 80790] reward=-117718336.7 actor_loss=0.3099 critic_loss=123646946690.8445 entropy=17.6930 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 80800] reward=-120436857.0 actor_loss=0.3129 critic_loss=124725801451.5200 entropy=17.6947 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 80800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-471008.3 mean_steps=15.5
|
|
[Episode 80810] reward=-117417192.4 actor_loss=0.3038 critic_loss=131070481359.2381 entropy=17.7076 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 80820] reward=-119704206.8 actor_loss=0.3092 critic_loss=218753215692.8000 entropy=17.7009 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 80820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515902.9 mean_steps=13.1
|
|
[Episode 80830] reward=-116474529.5 actor_loss=0.2398 critic_loss=179332830159.2381 entropy=17.7000 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 80840] reward=-122646590.9 actor_loss=0.2910 critic_loss=131345947209.1429 entropy=17.7049 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 80840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626722.1 mean_steps=12.7
|
|
[Episode 80850] reward=-117939246.0 actor_loss=0.2738 critic_loss=125013683814.4000 entropy=17.7198 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 80860] reward=-119916105.3 actor_loss=0.3455 critic_loss=124596521803.2941 entropy=17.7086 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 80860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385151.4 mean_steps=15.7
|
|
[Episode 80870] reward=-118747426.2 actor_loss=0.3220 critic_loss=129048127577.0435 entropy=17.7171 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 80880] reward=-121648030.8 actor_loss=0.2813 critic_loss=127853037410.4615 entropy=17.7263 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 80880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591538.0 mean_steps=13.8
|
|
[Episode 80890] reward=-112410752.0 actor_loss=0.3334 critic_loss=119734243827.5122 entropy=17.7321 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 80900] reward=-117347955.8 actor_loss=0.2649 critic_loss=127221303462.0540 entropy=17.7192 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 80900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-559748.3 mean_steps=14.2
|
|
[Episode 80910] reward=-119601269.0 actor_loss=0.3406 critic_loss=128167088583.1111 entropy=17.6989 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 80920] reward=-115019184.6 actor_loss=0.3103 critic_loss=118484798122.6667 entropy=17.7080 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 80920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591319.6 mean_steps=13.7
|
|
[Episode 80930] reward=-117910120.5 actor_loss=0.3296 critic_loss=123966963496.4211 entropy=17.6933 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 80940] reward=-119175029.4 actor_loss=0.2727 critic_loss=125140084177.4545 entropy=17.7018 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 80940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495317.1 mean_steps=13.9
|
|
[Episode 80950] reward=-117635519.6 actor_loss=0.2145 critic_loss=123235798454.8571 entropy=17.6870 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 80960] reward=-114913002.8 actor_loss=0.2848 critic_loss=115794249500.4444 entropy=17.6843 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 80960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517545.0 mean_steps=14.2
|
|
[Episode 80970] reward=-116353516.3 actor_loss=0.3919 critic_loss=126765081206.1538 entropy=17.6875 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 80980] reward=-118425861.9 actor_loss=0.2525 critic_loss=126860747776.0000 entropy=17.6866 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 80980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545738.1 mean_steps=14.6
|
|
[Episode 80990] reward=-117742729.4 actor_loss=0.2456 critic_loss=121238551893.3333 entropy=17.6970 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 81000] reward=-117923395.9 actor_loss=0.2971 critic_loss=121957285660.4444 entropy=17.6984 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 81000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-580586.5 mean_steps=11.8
|
|
[Episode 81010] reward=-118021793.3 actor_loss=0.2011 critic_loss=124143706658.1333 entropy=17.6831 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 81020] reward=-115898519.2 actor_loss=0.2878 critic_loss=122666223206.4000 entropy=17.6776 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 81020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531699.6 mean_steps=12.8
|
|
[Episode 81030] reward=-118377408.1 actor_loss=0.2359 critic_loss=124692251055.1579 entropy=17.6715 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 81040] reward=-117751495.5 actor_loss=0.2495 critic_loss=125528683471.2381 entropy=17.6608 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 81040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581193.8 mean_steps=12.7
|
|
[Episode 81050] reward=-116046983.7 actor_loss=0.2762 critic_loss=123305089706.6667 entropy=17.6547 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 81060] reward=-117999396.9 actor_loss=0.3995 critic_loss=120650208779.3778 entropy=17.6525 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 81060] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-221130.6 mean_steps=16.9
|
|
[Episode 81070] reward=-116752360.4 actor_loss=0.2980 critic_loss=122441959521.5238 entropy=17.6464 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 81080] reward=-123688326.7 actor_loss=0.2786 critic_loss=129987218545.7778 entropy=17.6355 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 81080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571839.3 mean_steps=12.5
|
|
[Episode 81090] reward=-117420449.6 actor_loss=0.2164 critic_loss=119894934764.3077 entropy=17.6500 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 81100] reward=-118524815.3 actor_loss=0.3021 critic_loss=129148559125.9429 entropy=17.6316 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 81100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566921.3 mean_steps=13.2
|
|
[Episode 81110] reward=-120062743.2 actor_loss=0.2612 critic_loss=125378459039.1351 entropy=17.6276 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 81120] reward=-117220427.0 actor_loss=0.3753 critic_loss=127641297160.2581 entropy=17.6201 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 81120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495016.2 mean_steps=13.8
|
|
[Episode 81130] reward=-120248763.3 actor_loss=0.3348 critic_loss=125652616461.4737 entropy=17.6217 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 81140] reward=-117722022.9 actor_loss=0.3228 critic_loss=124108097126.4000 entropy=17.6156 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 81140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-560239.5 mean_steps=14.2
|
|
[Episode 81150] reward=-119429455.8 actor_loss=0.2064 critic_loss=123638708269.5111 entropy=17.6173 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 81160] reward=-118379919.6 actor_loss=0.2693 critic_loss=122179598433.5238 entropy=17.6184 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 81160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-427796.7 mean_steps=15.9
|
|
[Episode 81170] reward=-114569283.5 actor_loss=0.2650 critic_loss=115271504502.1538 entropy=17.6144 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 81180] reward=-115686499.3 actor_loss=0.3218 critic_loss=119445740264.7273 entropy=17.6045 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 81180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-453741.2 mean_steps=16.3
|
|
[Episode 81190] reward=-137433664.9 actor_loss=0.2139 critic_loss=1657153365242.3110 entropy=17.6085 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 81200] reward=-119599025.5 actor_loss=0.3279 critic_loss=154853407182.4516 entropy=17.6174 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 81200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612090.8 mean_steps=11.8
|
|
[Episode 81210] reward=-117402742.3 actor_loss=0.2077 critic_loss=120786082474.6667 entropy=17.6153 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 81220] reward=-114972776.1 actor_loss=0.2915 critic_loss=122904210195.6923 entropy=17.6090 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 81220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386068.6 mean_steps=15.9
|
|
[Episode 81230] reward=-115776417.3 actor_loss=0.3383 critic_loss=124254067790.7692 entropy=17.6019 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 81240] reward=-122720769.9 actor_loss=0.1906 critic_loss=127721399684.4138 entropy=17.5970 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 81240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-509281.1 mean_steps=12.4
|
|
[Episode 81250] reward=-121099236.2 actor_loss=0.2921 critic_loss=128607361954.9091 entropy=17.6008 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 81260] reward=-121712262.8 actor_loss=0.1436 critic_loss=136192333824.0000 entropy=17.6082 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 81260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-442140.7 mean_steps=16.2
|
|
[Episode 81270] reward=-119250292.8 actor_loss=0.2907 critic_loss=127275310917.8182 entropy=17.6005 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 81280] reward=-119669838.9 actor_loss=0.2906 critic_loss=126910613822.5778 entropy=17.6004 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 81280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614049.6 mean_steps=12.7
|
|
[Episode 81290] reward=-116667896.0 actor_loss=0.3064 critic_loss=131776077004.8000 entropy=17.5958 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 81300] reward=-109794838.6 actor_loss=0.3890 critic_loss=115999728753.7778 entropy=17.6030 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 81300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-553751.9 mean_steps=12.3
|
|
[Episode 81310] reward=-124902264.6 actor_loss=0.2001 critic_loss=129959496908.8000 entropy=17.6070 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 81320] reward=-114688620.0 actor_loss=0.3836 critic_loss=124304127317.3333 entropy=17.6006 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 81320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390210.3 mean_steps=15.2
|
|
[Episode 81330] reward=-117503152.5 actor_loss=0.3582 critic_loss=119342137584.9412 entropy=17.5793 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 81340] reward=-119947397.2 actor_loss=0.2549 critic_loss=123457611499.2432 entropy=17.5776 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 81340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-412091.4 mean_steps=14.1
|
|
[Episode 81350] reward=-115695270.1 actor_loss=0.2992 critic_loss=119694915629.5111 entropy=17.5662 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 81360] reward=-119737847.3 actor_loss=0.2972 critic_loss=126496417018.3111 entropy=17.5631 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 81360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-495420.2 mean_steps=15.8
|
|
[Episode 81370] reward=-112667794.6 actor_loss=0.3694 critic_loss=114366175004.4444 entropy=17.5631 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 81380] reward=-119047956.4 actor_loss=0.2534 critic_loss=124374706537.4118 entropy=17.5693 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 81380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-342218.3 mean_steps=15.5
|
|
[Episode 81390] reward=-112716512.5 actor_loss=0.2815 critic_loss=113631824190.5778 entropy=17.5598 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 81400] reward=-113440933.6 actor_loss=0.2533 critic_loss=114011491452.8781 entropy=17.5660 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 81400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580228.5 mean_steps=13.5
|
|
[Episode 81410] reward=-115726081.2 actor_loss=0.3270 critic_loss=119507796748.1905 entropy=17.5771 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 81420] reward=-120322677.1 actor_loss=0.2512 critic_loss=125850036782.5455 entropy=17.5846 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 81420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-619308.1 mean_steps=13.3
|
|
[Episode 81430] reward=-118047535.9 actor_loss=0.3925 critic_loss=123980597090.4615 entropy=17.5906 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 81440] reward=-119784981.6 actor_loss=0.2914 critic_loss=124434902907.8710 entropy=17.6088 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 81440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548638.7 mean_steps=13.4
|
|
[Episode 81450] reward=-114041700.7 actor_loss=0.3984 critic_loss=121149701688.8889 entropy=17.6204 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 81460] reward=-118853836.3 actor_loss=0.2005 critic_loss=131031691264.0000 entropy=17.6349 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 81460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524883.1 mean_steps=14.2
|
|
[Episode 81470] reward=-120194472.8 actor_loss=0.2358 critic_loss=131853566537.1429 entropy=17.6253 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 81480] reward=-123200588.7 actor_loss=0.2267 critic_loss=130213302389.0286 entropy=17.6326 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 81480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462887.6 mean_steps=13.6
|
|
[Episode 81490] reward=-117556894.8 actor_loss=0.3455 critic_loss=124773548578.1333 entropy=17.6228 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 81500] reward=-116433351.7 actor_loss=0.2809 critic_loss=123012943530.6667 entropy=17.6201 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 81500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383707.4 mean_steps=15.9
|
|
[Episode 81510] reward=-116550068.7 actor_loss=0.3553 critic_loss=120263791411.2000 entropy=17.6102 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 81520] reward=-119160395.6 actor_loss=0.2998 critic_loss=148029368092.4445 entropy=17.6058 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 81520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446486.8 mean_steps=15.3
|
|
[Episode 81530] reward=-117206606.7 actor_loss=0.2393 critic_loss=122753401651.2000 entropy=17.5988 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 81540] reward=-113431690.2 actor_loss=0.3517 critic_loss=125642202067.4783 entropy=17.6002 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 81540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-412965.6 mean_steps=17.0
|
|
[Episode 81550] reward=-122106355.9 actor_loss=0.2928 critic_loss=131465911539.8095 entropy=17.5999 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 81560] reward=-118021767.3 actor_loss=0.2943 critic_loss=120131917462.5882 entropy=17.6095 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 81560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574997.6 mean_steps=12.4
|
|
[Episode 81570] reward=-115017287.4 actor_loss=0.3538 critic_loss=121087299310.9333 entropy=17.5935 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 81580] reward=-115651224.1 actor_loss=0.2088 critic_loss=122954702119.8222 entropy=17.6013 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 81580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527581.4 mean_steps=14.2
|
|
[Episode 81590] reward=-114341827.6 actor_loss=0.2469 critic_loss=125084928000.0000 entropy=17.6066 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 81600] reward=-117142497.4 actor_loss=0.2908 critic_loss=122633659313.2308 entropy=17.6002 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 81600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-634488.7 mean_steps=13.1
|
|
[Episode 81610] reward=-114793763.4 actor_loss=0.2772 critic_loss=115467997696.0000 entropy=17.6117 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 81620] reward=-121785896.7 actor_loss=0.2211 critic_loss=250074135552.0000 entropy=17.6235 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 81620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-470983.3 mean_steps=15.6
|
|
[Episode 81630] reward=-123050372.7 actor_loss=0.2885 critic_loss=133112977675.1304 entropy=17.6202 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 81640] reward=-118848415.8 actor_loss=0.2867 critic_loss=124017892583.2258 entropy=17.6088 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 81640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-597217.7 mean_steps=13.6
|
|
[Episode 81650] reward=-109928734.3 actor_loss=0.3884 critic_loss=114283579684.5714 entropy=17.6071 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 81660] reward=-114584584.1 actor_loss=0.2819 critic_loss=127560656738.4615 entropy=17.6112 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 81660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-636717.9 mean_steps=11.9
|
|
[Episode 81670] reward=-113633845.0 actor_loss=0.2296 critic_loss=117019467385.9048 entropy=17.6028 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 81680] reward=-117123641.3 actor_loss=0.2481 critic_loss=121571262107.8261 entropy=17.6061 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 81680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461228.7 mean_steps=14.8
|
|
[Episode 81690] reward=-118968408.6 actor_loss=0.3638 critic_loss=121657643932.9032 entropy=17.6251 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 81700] reward=-116514907.2 actor_loss=0.2325 critic_loss=116732121600.0000 entropy=17.6111 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 81700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515145.3 mean_steps=13.9
|
|
[Episode 81710] reward=-119725042.3 actor_loss=0.2751 critic_loss=130484223271.8222 entropy=17.6159 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 81720] reward=-117692918.7 actor_loss=0.3419 critic_loss=120861901328.5161 entropy=17.6093 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 81720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563256.2 mean_steps=13.3
|
|
[Episode 81730] reward=-115544075.6 actor_loss=0.3256 critic_loss=115249157356.3077 entropy=17.6088 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 81740] reward=-113844951.2 actor_loss=0.3650 critic_loss=122908574192.4848 entropy=17.6194 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 81740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-624987.0 mean_steps=12.1
|
|
[Episode 81750] reward=-111466395.0 actor_loss=0.3213 critic_loss=113555550208.0000 entropy=17.6194 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 81760] reward=-121422686.3 actor_loss=0.2347 critic_loss=130028573889.7297 entropy=17.6195 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 81760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-513668.8 mean_steps=14.8
|
|
[Episode 81770] reward=-112883273.2 actor_loss=0.3335 critic_loss=122229683044.8485 entropy=17.6327 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 81780] reward=-114942897.3 actor_loss=0.2246 critic_loss=118140058737.7778 entropy=17.6344 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 81780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426885.9 mean_steps=15.1
|
|
[Episode 81790] reward=-117746861.0 actor_loss=0.4124 critic_loss=125755866704.8421 entropy=17.6361 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 81800] reward=-105551297.3 actor_loss=0.3328 critic_loss=108021118174.6087 entropy=17.6325 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 81800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494384.1 mean_steps=15.1
|
|
[Episode 81810] reward=-117377099.1 actor_loss=0.2995 critic_loss=132240656745.4118 entropy=17.6389 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 81820] reward=-113651364.3 actor_loss=0.2500 critic_loss=118155457658.8800 entropy=17.6410 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 81820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-549051.5 mean_steps=12.4
|
|
[Episode 81830] reward=-118136319.3 actor_loss=0.3179 critic_loss=123538011229.0909 entropy=17.6415 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 81840] reward=-131509116.2 actor_loss=0.2557 critic_loss=744037994354.7587 entropy=17.6412 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 81840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-594153.8 mean_steps=13.9
|
|
[Episode 81850] reward=-131452073.2 actor_loss=1.3004 critic_loss=635169779712.0000 entropy=17.6612 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 81860] reward=-178920911.6 actor_loss=0.1723 critic_loss=13893473478610.4883 entropy=17.6765 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 81860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591711.1 mean_steps=12.7
|
|
[Episode 81870] reward=-115397937.1 actor_loss=0.2363 critic_loss=121159824437.8947 entropy=17.6768 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 81880] reward=-111387398.1 actor_loss=0.4294 critic_loss=118077387532.1905 entropy=17.6764 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 81880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558084.9 mean_steps=13.1
|
|
[Episode 81890] reward=-118351492.9 actor_loss=0.3077 critic_loss=125635987046.4000 entropy=17.6957 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 81900] reward=-124441294.6 actor_loss=0.2240 critic_loss=132209926564.1026 entropy=17.7171 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 81900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-520476.2 mean_steps=14.8
|
|
[Episode 81910] reward=-115749774.3 actor_loss=0.2464 critic_loss=127697558062.5455 entropy=17.7270 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 81920] reward=-185861198.5 actor_loss=1.1656 critic_loss=17701876463549.9336 entropy=17.7122 approx_kl=0.0045 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 81920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436520.9 mean_steps=15.2
|
|
[Episode 81930] reward=-113804685.6 actor_loss=0.3533 critic_loss=132781976675.0968 entropy=17.7092 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 81940] reward=-117171812.1 actor_loss=0.2293 critic_loss=122048985586.1622 entropy=17.7192 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 81940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506572.1 mean_steps=13.0
|
|
[Episode 81950] reward=-115706133.5 actor_loss=0.3069 critic_loss=119935391334.4000 entropy=17.7272 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 81960] reward=-115094260.0 actor_loss=0.3862 critic_loss=118824845312.0000 entropy=17.7202 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 81960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422638.5 mean_steps=15.2
|
|
[Episode 81970] reward=-115787080.4 actor_loss=0.3745 critic_loss=119720423245.9130 entropy=17.7239 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 81980] reward=-121694365.3 actor_loss=0.2957 critic_loss=132419711249.0667 entropy=17.7181 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 81980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495619.0 mean_steps=13.8
|
|
[Episode 81990] reward=-116163049.0 actor_loss=0.3091 critic_loss=120213266747.0769 entropy=17.7224 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 82000] reward=-115160195.8 actor_loss=0.2852 critic_loss=120606075259.2593 entropy=17.7335 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 82000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470473.1 mean_steps=14.4
|
|
[Episode 82010] reward=-128777434.6 actor_loss=0.2806 critic_loss=319487391886.8837 entropy=17.7646 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 82020] reward=-121787614.5 actor_loss=0.3560 critic_loss=259223602014.3158 entropy=17.7642 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 82020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529078.0 mean_steps=14.3
|
|
[Episode 82030] reward=-123257758.3 actor_loss=0.3103 critic_loss=169418132232.8276 entropy=17.7608 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 82040] reward=-141303698.0 actor_loss=0.2509 critic_loss=1970820794987.1628 entropy=17.7665 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 82040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502445.4 mean_steps=14.0
|
|
[Episode 82050] reward=-117898411.6 actor_loss=0.2691 critic_loss=128936778226.1622 entropy=17.7670 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 82060] reward=-116617858.7 actor_loss=0.3535 critic_loss=137879504018.2857 entropy=17.7745 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 82060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493255.0 mean_steps=14.7
|
|
[Episode 82070] reward=-121418457.0 actor_loss=0.3153 critic_loss=145957278326.1538 entropy=17.7640 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 82080] reward=-114448163.9 actor_loss=0.2579 critic_loss=130983268260.9778 entropy=17.7685 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 82080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-340606.6 mean_steps=17.0
|
|
[Episode 82090] reward=-117387046.5 actor_loss=0.2694 critic_loss=120212638196.6222 entropy=17.7629 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 82100] reward=-115033465.4 actor_loss=0.1770 critic_loss=123242050355.2000 entropy=17.7656 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 82100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-544589.1 mean_steps=12.2
|
|
[Episode 82110] reward=-114937535.0 actor_loss=0.3320 critic_loss=122737490329.6000 entropy=17.7386 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 82120] reward=-116984426.8 actor_loss=0.2554 critic_loss=148080369863.8049 entropy=17.7655 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 82120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-438688.6 mean_steps=13.7
|
|
[Episode 82130] reward=-118856092.5 actor_loss=0.3415 critic_loss=132009147713.8286 entropy=17.7519 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 82140] reward=-118000001.2 actor_loss=0.2313 critic_loss=125381773630.5778 entropy=17.7513 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 82140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471410.4 mean_steps=14.6
|
|
[Episode 82150] reward=-121141336.1 actor_loss=0.1847 critic_loss=132341587968.0000 entropy=17.7431 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 82160] reward=-115112770.5 actor_loss=0.2558 critic_loss=121993065995.3778 entropy=17.7284 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 82160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526302.0 mean_steps=13.8
|
|
[Episode 82170] reward=-115978858.3 actor_loss=0.2370 critic_loss=121778311903.1795 entropy=17.7276 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 82180] reward=-118151689.6 actor_loss=0.3181 critic_loss=124594050025.2444 entropy=17.7221 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 82180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-572471.2 mean_steps=11.8
|
|
[Episode 82190] reward=-114890122.3 actor_loss=0.2791 critic_loss=119769975284.6222 entropy=17.7053 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 82200] reward=-117134482.3 actor_loss=0.3075 critic_loss=122657810204.4444 entropy=17.7008 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 82200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455097.4 mean_steps=15.4
|
|
[Episode 82210] reward=-118372041.8 actor_loss=0.2354 critic_loss=122638855859.8919 entropy=17.6902 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 82220] reward=-116948160.8 actor_loss=0.2959 critic_loss=122437454165.3333 entropy=17.6876 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 82220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541597.0 mean_steps=13.2
|
|
[Episode 82230] reward=-114203763.1 actor_loss=0.3826 critic_loss=126093432877.5111 entropy=17.6973 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 82240] reward=-115002692.5 actor_loss=0.3128 critic_loss=118148881785.2632 entropy=17.6967 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 82240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-406138.6 mean_steps=15.2
|
|
[Episode 82250] reward=-111932668.9 actor_loss=0.4198 critic_loss=114393212635.4286 entropy=17.6947 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 82260] reward=-117453173.1 actor_loss=0.2520 critic_loss=128318944220.6897 entropy=17.6959 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 82260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501998.9 mean_steps=12.9
|
|
[Episode 82270] reward=-116981922.7 actor_loss=0.2379 critic_loss=118510568448.0000 entropy=17.6882 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 82280] reward=-118347378.7 actor_loss=0.3648 critic_loss=120296003089.6552 entropy=17.6807 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 82280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435150.6 mean_steps=15.2
|
|
[Episode 82290] reward=-115678007.9 actor_loss=0.2917 critic_loss=120319219979.1304 entropy=17.6849 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 82300] reward=-116188964.7 actor_loss=0.4012 critic_loss=121976409474.8445 entropy=17.6775 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 82300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437759.9 mean_steps=15.2
|
|
[Episode 82310] reward=-119870529.7 actor_loss=0.2388 critic_loss=129255050210.7429 entropy=17.6546 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 82320] reward=-113794948.6 actor_loss=0.3058 critic_loss=115253493031.8222 entropy=17.6428 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 82320] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-662757.4 mean_steps=11.2
|
|
[Episode 82330] reward=-120051171.2 actor_loss=0.2213 critic_loss=123386110244.5714 entropy=17.6449 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 82340] reward=-113932239.9 actor_loss=0.2475 critic_loss=123471548631.5789 entropy=17.6464 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 82340] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-672606.0 mean_steps=11.4
|
|
[Episode 82350] reward=-112579933.6 actor_loss=0.3042 critic_loss=119293961362.2857 entropy=17.6332 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 82360] reward=-113632099.4 actor_loss=0.3649 critic_loss=127611755269.6889 entropy=17.6170 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 82360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570233.0 mean_steps=13.0
|
|
[Episode 82370] reward=-115415366.6 actor_loss=0.2576 critic_loss=113266344500.9655 entropy=17.6125 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 82380] reward=-111969516.5 actor_loss=0.1664 critic_loss=117804155335.1111 entropy=17.6309 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 82380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-381304.2 mean_steps=15.9
|
|
[Episode 82390] reward=-115801746.3 actor_loss=0.3157 critic_loss=115521769658.1818 entropy=17.6227 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 82400] reward=-115417788.4 actor_loss=0.2744 critic_loss=118158546176.0000 entropy=17.6217 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 82400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-593531.7 mean_steps=12.4
|
|
[Episode 82410] reward=-113153834.0 actor_loss=0.3332 critic_loss=114409759129.6000 entropy=17.6076 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 82420] reward=-111410234.5 actor_loss=0.2444 critic_loss=113857388908.0889 entropy=17.6114 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 82420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527466.5 mean_steps=13.0
|
|
[Episode 82430] reward=-115803044.2 actor_loss=0.2461 critic_loss=118574266595.5556 entropy=17.6045 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 82440] reward=-118570131.9 actor_loss=0.2778 critic_loss=130675292501.3333 entropy=17.5934 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 82440] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-421858.9 mean_steps=17.1
|
|
[Episode 82450] reward=-114537105.9 actor_loss=0.3211 critic_loss=119076566973.9355 entropy=17.6169 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 82460] reward=-115238972.9 actor_loss=0.3301 critic_loss=121058832839.1111 entropy=17.6137 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 82460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614403.4 mean_steps=12.8
|
|
[Episode 82470] reward=-118141729.4 actor_loss=0.3008 critic_loss=127120385092.2667 entropy=17.5998 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 82480] reward=-120369262.6 actor_loss=0.3444 critic_loss=126888984576.0000 entropy=17.5995 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 82480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-638682.5 mean_steps=12.1
|
|
[Episode 82490] reward=-117279823.3 actor_loss=0.3357 critic_loss=127358368881.7778 entropy=17.5993 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 82500] reward=-119258318.0 actor_loss=0.3047 critic_loss=151069232241.7778 entropy=17.6061 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 82500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529412.3 mean_steps=14.3
|
|
[Episode 82510] reward=-116243236.6 actor_loss=0.3089 critic_loss=156319640046.3448 entropy=17.6200 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 82520] reward=-125176073.7 actor_loss=0.3873 critic_loss=466109197750.8571 entropy=17.6282 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 82520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387365.7 mean_steps=15.0
|
|
[Episode 82530] reward=-117510984.3 actor_loss=0.3177 critic_loss=124211900142.9333 entropy=17.6124 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 82540] reward=-109144020.9 actor_loss=0.5096 critic_loss=114537204576.7111 entropy=17.6171 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 82540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612748.0 mean_steps=12.9
|
|
[Episode 82550] reward=-119366631.6 actor_loss=0.2996 critic_loss=122428340815.6444 entropy=17.6110 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 82560] reward=-119132174.2 actor_loss=0.2770 critic_loss=120298947689.9310 entropy=17.6014 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 82560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583257.4 mean_steps=12.8
|
|
[Episode 82570] reward=-119583621.2 actor_loss=0.2471 critic_loss=121747036387.5556 entropy=17.5880 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 82580] reward=-116410830.7 actor_loss=0.3753 critic_loss=129961361623.5789 entropy=17.5738 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 82580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560518.3 mean_steps=12.2
|
|
[Episode 82590] reward=-115986845.2 actor_loss=0.3190 critic_loss=119499579847.1111 entropy=17.5621 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 82600] reward=-118659010.1 actor_loss=0.2534 critic_loss=120193295506.2857 entropy=17.5731 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 82600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626769.4 mean_steps=12.1
|
|
[Episode 82610] reward=-119196530.0 actor_loss=0.3300 critic_loss=123028860836.9778 entropy=17.5709 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 82620] reward=-120422431.4 actor_loss=0.2762 critic_loss=123150215460.5714 entropy=17.5716 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 82620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-588042.9 mean_steps=14.5
|
|
[Episode 82630] reward=-120220278.4 actor_loss=0.2747 critic_loss=125939514026.6667 entropy=17.5578 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 82640] reward=-118430273.8 actor_loss=0.3298 critic_loss=123146890489.0811 entropy=17.5677 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 82640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478588.7 mean_steps=14.8
|
|
[Episode 82650] reward=-116194631.4 actor_loss=0.3639 critic_loss=114370773937.2308 entropy=17.5807 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 82660] reward=-119086388.8 actor_loss=0.2227 critic_loss=121446982479.4483 entropy=17.5940 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 82660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516183.1 mean_steps=13.8
|
|
[Episode 82670] reward=-115265985.1 actor_loss=0.3244 critic_loss=117024816487.7838 entropy=17.5939 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 82680] reward=-117747294.4 actor_loss=0.3091 critic_loss=119036984797.8667 entropy=17.5886 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 82680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-385394.4 mean_steps=17.1
|
|
[Episode 82690] reward=-117500588.5 actor_loss=0.3347 critic_loss=121055908003.8400 entropy=17.6081 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 82700] reward=-118079531.2 actor_loss=0.2102 critic_loss=122051348546.0645 entropy=17.6151 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 82700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468107.5 mean_steps=15.0
|
|
[Episode 82710] reward=-117537151.7 actor_loss=0.2986 critic_loss=122950984590.2222 entropy=17.6156 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 82720] reward=-119591298.0 actor_loss=0.3260 critic_loss=127099629112.8889 entropy=17.6098 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 82720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-377255.3 mean_steps=15.2
|
|
[Episode 82730] reward=-114692701.5 actor_loss=0.3815 critic_loss=121097249981.6296 entropy=17.6150 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 82740] reward=-110770736.2 actor_loss=0.2598 critic_loss=118239099471.6444 entropy=17.6106 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 82740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-592297.1 mean_steps=14.7
|
|
[Episode 82750] reward=-114521494.8 actor_loss=0.3520 critic_loss=120245716946.4889 entropy=17.6134 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 82760] reward=-118036743.4 actor_loss=0.2534 critic_loss=134572564728.2424 entropy=17.6209 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 82760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600635.6 mean_steps=12.9
|
|
[Episode 82770] reward=-116197551.7 actor_loss=0.3704 critic_loss=125264375125.3333 entropy=17.6257 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 82780] reward=-173103828.7 actor_loss=1.3593 critic_loss=12033664282864.9414 entropy=17.6171 approx_kl=0.0030 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 82780] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-299578.8 mean_steps=16.4
|
|
[Episode 82790] reward=-116783748.6 actor_loss=0.3458 critic_loss=136595441994.3226 entropy=17.6560 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 82800] reward=-117564543.5 actor_loss=0.3192 critic_loss=124163844960.7111 entropy=17.6564 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 82800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566322.7 mean_steps=13.7
|
|
[Episode 82810] reward=-116564288.8 actor_loss=0.2952 critic_loss=134571765304.8889 entropy=17.6542 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 82820] reward=-119184313.1 actor_loss=0.2493 critic_loss=161015815281.7778 entropy=17.6481 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 82820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348156.4 mean_steps=16.6
|
|
[Episode 82830] reward=-110125362.9 actor_loss=0.4059 critic_loss=116040037058.2069 entropy=17.6461 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 82840] reward=-115477001.5 actor_loss=0.2602 critic_loss=121388721977.8065 entropy=17.6382 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 82840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491836.1 mean_steps=14.8
|
|
[Episode 82850] reward=-120008872.0 actor_loss=0.3146 critic_loss=137092664206.2222 entropy=17.6408 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 82860] reward=-114838077.8 actor_loss=0.2160 critic_loss=121558099194.3111 entropy=17.6623 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 82860] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-297725.5 mean_steps=17.4
|
|
[Episode 82870] reward=-116772811.3 actor_loss=0.2818 critic_loss=136577587307.7895 entropy=17.6758 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 82880] reward=-116319750.4 actor_loss=0.2645 critic_loss=119250444105.9556 entropy=17.6698 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 82880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-388074.6 mean_steps=15.1
|
|
[Episode 82890] reward=-115520692.6 actor_loss=0.3273 critic_loss=116493083898.3111 entropy=17.6743 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 82900] reward=-116136254.2 actor_loss=0.2825 critic_loss=118928883183.4839 entropy=17.6700 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 82900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511865.9 mean_steps=15.1
|
|
[Episode 82910] reward=-122089977.5 actor_loss=0.4620 critic_loss=126878448298.6667 entropy=17.6687 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1543 front_blocked=0
|
|
[Episode 82920] reward=-115576907.6 actor_loss=0.2601 critic_loss=121328626710.7556 entropy=17.6565 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 82920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429044.0 mean_steps=14.4
|
|
[Episode 82930] reward=-122537189.6 actor_loss=0.2745 critic_loss=133117887556.2667 entropy=17.6520 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 82940] reward=-119375816.1 actor_loss=0.1744 critic_loss=125100936396.8000 entropy=17.6506 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 82940] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-289324.0 mean_steps=17.6
|
|
[Episode 82950] reward=-116897270.4 actor_loss=0.2471 critic_loss=129101009715.2000 entropy=17.6402 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 82960] reward=-117124635.0 actor_loss=0.3320 critic_loss=117804179456.0000 entropy=17.6449 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 82960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-673783.2 mean_steps=12.6
|
|
[Episode 82970] reward=-122146032.7 actor_loss=0.2693 critic_loss=125513204440.1778 entropy=17.6389 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 82980] reward=-118055720.8 actor_loss=0.3375 critic_loss=367353393152.0000 entropy=17.6438 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 82980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-330644.3 mean_steps=15.7
|
|
[Episode 82990] reward=-122774586.1 actor_loss=0.2935 critic_loss=120638956612.2667 entropy=17.6498 approx_kl=0.0008 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 83000] reward=-121708264.0 actor_loss=0.2541 critic_loss=123833715734.7556 entropy=17.6594 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 83000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353932.5 mean_steps=16.8
|
|
[Episode 83010] reward=-115879292.1 actor_loss=0.3315 critic_loss=117077746210.1333 entropy=17.6618 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 83020] reward=-112148460.4 actor_loss=0.3082 critic_loss=115035740569.6000 entropy=17.6543 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 83020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-544883.9 mean_steps=12.4
|
|
[Episode 83030] reward=-120658971.0 actor_loss=0.2367 critic_loss=121878597358.9333 entropy=17.6450 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 83040] reward=-116764200.5 actor_loss=0.3004 critic_loss=119344166229.3333 entropy=17.6343 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 83040] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-698436.5 mean_steps=10.7
|
|
[Episode 83050] reward=-120336109.4 actor_loss=0.2689 critic_loss=121750934505.2444 entropy=17.6311 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 83060] reward=-118153389.7 actor_loss=0.1998 critic_loss=119679014047.2889 entropy=17.6325 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 83060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-601421.4 mean_steps=12.6
|
|
[Episode 83070] reward=-115309941.4 actor_loss=0.2537 critic_loss=126763154181.6889 entropy=17.6075 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 83080] reward=-122681164.8 actor_loss=0.3375 critic_loss=182735875822.9333 entropy=17.6103 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 83080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577763.9 mean_steps=13.4
|
|
[Episode 83090] reward=-120947141.5 actor_loss=0.3116 critic_loss=206240330956.8000 entropy=17.6258 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 83100] reward=-119618457.3 actor_loss=0.3587 critic_loss=121513109458.4889 entropy=17.6344 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 83100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407813.8 mean_steps=16.4
|
|
[Episode 83110] reward=-124929910.0 actor_loss=0.2362 critic_loss=133104764700.4444 entropy=17.6414 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 83120] reward=-124396088.9 actor_loss=0.2558 critic_loss=132012200556.6061 entropy=17.6365 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 83120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-463976.2 mean_steps=12.8
|
|
[Episode 83130] reward=-117149799.0 actor_loss=0.3516 critic_loss=123385477277.5385 entropy=17.6369 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 83140] reward=-118363135.3 actor_loss=0.3178 critic_loss=124318124714.6667 entropy=17.6191 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 83140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-492080.7 mean_steps=13.3
|
|
[Episode 83150] reward=-118381954.1 actor_loss=0.3254 critic_loss=116308850551.4667 entropy=17.6281 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 83160] reward=-121479757.0 actor_loss=0.3767 critic_loss=169801176795.4286 entropy=17.6385 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 83160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-499566.8 mean_steps=13.4
|
|
[Episode 83170] reward=-120396947.1 actor_loss=0.3115 critic_loss=126496709745.7778 entropy=17.6489 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 83180] reward=-120312671.7 actor_loss=0.2684 critic_loss=123570767714.4615 entropy=17.6470 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 83180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-381161.3 mean_steps=15.1
|
|
[Episode 83190] reward=-122169541.2 actor_loss=0.3599 critic_loss=889297354039.6522 entropy=17.6314 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 83200] reward=-121700948.0 actor_loss=0.3744 critic_loss=161881699279.2381 entropy=17.6301 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 83200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-406344.6 mean_steps=15.2
|
|
[Episode 83210] reward=-141801676.1 actor_loss=0.7836 critic_loss=2095558426624.0000 entropy=17.6343 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 83220] reward=-116576962.2 actor_loss=0.2475 critic_loss=118511031532.3077 entropy=17.6526 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 83220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526888.1 mean_steps=13.1
|
|
[Episode 83230] reward=-120762073.5 actor_loss=0.2737 critic_loss=187461852793.9048 entropy=17.6442 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 83240] reward=-120155286.9 actor_loss=0.2440 critic_loss=125315044693.3333 entropy=17.6427 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 83240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498629.6 mean_steps=14.2
|
|
[Episode 83250] reward=-218831708.8 actor_loss=0.7083 critic_loss=31433621809834.6680 entropy=17.6622 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 83260] reward=-144931288.4 actor_loss=0.2750 critic_loss=2751225439846.3999 entropy=17.6649 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 83260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-617698.5 mean_steps=12.8
|
|
[Episode 83270] reward=-118414324.5 actor_loss=0.3292 critic_loss=153910276915.2000 entropy=17.6905 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 83280] reward=-118296099.7 actor_loss=0.3126 critic_loss=118482294717.9355 entropy=17.6826 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 83280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-466505.6 mean_steps=13.8
|
|
[Episode 83290] reward=-117269284.3 actor_loss=0.2992 critic_loss=122494443246.9333 entropy=17.6827 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 83300] reward=-120599561.1 actor_loss=0.2465 critic_loss=126133036373.3333 entropy=17.6925 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 83300] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-389673.5 mean_steps=17.4
|
|
[Episode 83310] reward=-131194796.9 actor_loss=0.2995 critic_loss=687378757058.5601 entropy=17.6935 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 83320] reward=-119682005.5 actor_loss=0.2293 critic_loss=122590252782.9333 entropy=17.6899 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 83320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-374361.7 mean_steps=16.1
|
|
[Episode 83330] reward=-122280599.8 actor_loss=0.3053 critic_loss=123677949952.0000 entropy=17.6902 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 83340] reward=-124347190.1 actor_loss=0.3019 critic_loss=157951085226.6667 entropy=17.6991 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 83340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-561952.5 mean_steps=12.2
|
|
[Episode 83350] reward=-117361219.7 actor_loss=0.2981 critic_loss=124100936542.3158 entropy=17.6874 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 83360] reward=-119971511.5 actor_loss=0.2240 critic_loss=138510300501.3333 entropy=17.6903 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 83360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477013.6 mean_steps=14.2
|
|
[Episode 83370] reward=-121500699.2 actor_loss=0.3325 critic_loss=127297263254.5882 entropy=17.6872 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 83380] reward=-113918370.6 actor_loss=0.2942 critic_loss=118617896365.4194 entropy=17.6880 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 83380] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-273323.2 mean_steps=18.0
|
|
[Episode 83390] reward=-121266016.8 actor_loss=0.3027 critic_loss=124733407887.3600 entropy=17.6914 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 83400] reward=-114402014.0 actor_loss=0.2317 critic_loss=117654231176.5333 entropy=17.6825 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 83400] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-659771.9 mean_steps=11.4
|
|
[Episode 83410] reward=-112069954.6 actor_loss=0.2922 critic_loss=116691066606.9333 entropy=17.6859 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 83420] reward=-114445610.4 actor_loss=0.3020 critic_loss=117810737643.5200 entropy=17.6812 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 83420] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-582693.0 mean_steps=11.1
|
|
[Episode 83430] reward=-124376052.8 actor_loss=0.3073 critic_loss=131888567565.4737 entropy=17.6705 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 83440] reward=-123870747.9 actor_loss=0.2023 critic_loss=127415538249.1429 entropy=17.6600 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 83440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369659.1 mean_steps=15.8
|
|
[Episode 83450] reward=-112432126.6 actor_loss=0.3481 critic_loss=122031084410.4348 entropy=17.6653 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 83460] reward=-121783789.3 actor_loss=0.2018 critic_loss=125227254837.8947 entropy=17.6594 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 83460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471797.7 mean_steps=13.7
|
|
[Episode 83470] reward=-113254094.9 actor_loss=0.2247 critic_loss=117732338346.6667 entropy=17.6528 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 83480] reward=-121685468.8 actor_loss=0.2705 critic_loss=130065320345.6000 entropy=17.6426 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 83480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542908.7 mean_steps=13.6
|
|
[Episode 83490] reward=-117754253.8 actor_loss=0.2708 critic_loss=130067566787.0476 entropy=17.6297 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 83500] reward=-113315961.0 actor_loss=0.3631 critic_loss=116254978916.8485 entropy=17.6407 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 83500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538199.1 mean_steps=14.3
|
|
[Episode 83510] reward=-122718101.4 actor_loss=0.1865 critic_loss=127467882440.6487 entropy=17.6410 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 83520] reward=-118524017.4 actor_loss=0.2803 critic_loss=117640646780.1212 entropy=17.6499 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 83520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566307.9 mean_steps=13.2
|
|
[Episode 83530] reward=-117245702.2 actor_loss=0.3024 critic_loss=125123451289.6000 entropy=17.6487 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 83540] reward=-117096784.6 actor_loss=0.2406 critic_loss=120079793212.2353 entropy=17.6488 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 83540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-674533.9 mean_steps=12.1
|
|
[Episode 83550] reward=-122585781.8 actor_loss=0.2520 critic_loss=126759629824.0000 entropy=17.6451 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 83560] reward=-113654108.7 actor_loss=0.3610 critic_loss=115852375808.0000 entropy=17.6507 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 83560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513780.8 mean_steps=13.4
|
|
[Episode 83570] reward=-118324291.6 actor_loss=0.2215 critic_loss=124906241501.8667 entropy=17.6327 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 83580] reward=-119759793.7 actor_loss=0.2523 critic_loss=122380347164.4444 entropy=17.6348 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 83580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495725.3 mean_steps=14.2
|
|
[Episode 83590] reward=-124481355.7 actor_loss=0.1490 critic_loss=125896924728.8889 entropy=17.6067 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 83600] reward=-118699411.3 actor_loss=0.3774 critic_loss=118953910871.4146 entropy=17.5899 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 83600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560967.8 mean_steps=12.8
|
|
[Episode 83610] reward=-112772980.0 actor_loss=0.3041 critic_loss=117467893444.9231 entropy=17.5805 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 83620] reward=-120348927.8 actor_loss=0.3034 critic_loss=123493462447.1579 entropy=17.5919 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 83620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493030.8 mean_steps=14.6
|
|
[Episode 83630] reward=-119931487.5 actor_loss=0.2810 critic_loss=120673389129.1429 entropy=17.6069 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 83640] reward=-121207482.3 actor_loss=0.2235 critic_loss=121482241365.3333 entropy=17.6077 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 83640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582738.1 mean_steps=12.9
|
|
[Episode 83650] reward=-115964763.5 actor_loss=0.3233 critic_loss=123245557077.3333 entropy=17.6051 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 83660] reward=-120853505.9 actor_loss=0.2809 critic_loss=131442314581.3333 entropy=17.5950 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 83660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556928.0 mean_steps=13.4
|
|
[Episode 83670] reward=-122685760.7 actor_loss=0.1867 critic_loss=127547932964.5714 entropy=17.5882 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 83680] reward=-113512814.9 actor_loss=0.3423 critic_loss=118063062853.8182 entropy=17.5873 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 83680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400846.5 mean_steps=16.4
|
|
[Episode 83690] reward=-118288321.1 actor_loss=0.3217 critic_loss=123537951948.8000 entropy=17.5724 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 83700] reward=-121284474.0 actor_loss=0.3766 critic_loss=123182683659.3778 entropy=17.5688 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 83700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494937.1 mean_steps=14.9
|
|
[Episode 83710] reward=-113797837.8 actor_loss=0.3072 critic_loss=114747892972.3077 entropy=17.5785 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 83720] reward=-116901972.4 actor_loss=0.4481 critic_loss=122154674312.5333 entropy=17.5924 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 83720] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-302653.1 mean_steps=16.4
|
|
[Episode 83730] reward=-118669535.9 actor_loss=0.2207 critic_loss=124136047684.2667 entropy=17.5939 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 83740] reward=-120027679.1 actor_loss=0.2314 critic_loss=119103976477.2571 entropy=17.5900 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 83740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378654.6 mean_steps=16.0
|
|
[Episode 83750] reward=-118490094.1 actor_loss=0.3939 critic_loss=121267646763.7073 entropy=17.5823 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 83760] reward=-125667184.8 actor_loss=0.3041 critic_loss=128210651136.0000 entropy=17.5844 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 83760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-584178.6 mean_steps=11.7
|
|
[Episode 83770] reward=-124822716.9 actor_loss=0.3110 critic_loss=134033104440.8889 entropy=17.5698 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 83780] reward=-118194778.2 actor_loss=0.3331 critic_loss=122745243109.0526 entropy=17.5589 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 83780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532167.8 mean_steps=13.2
|
|
[Episode 83790] reward=-117397161.0 actor_loss=0.2436 critic_loss=122866240534.7556 entropy=17.5522 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 83800] reward=-115116665.6 actor_loss=0.2442 critic_loss=113756740919.6522 entropy=17.5447 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 83800] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-649891.3 mean_steps=11.3
|
|
[Episode 83810] reward=-164937901.5 actor_loss=1.1413 critic_loss=9365455048463.0586 entropy=17.5444 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 83820] reward=-114659856.2 actor_loss=0.2427 critic_loss=117238013587.9111 entropy=17.5410 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 83820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472919.9 mean_steps=14.7
|
|
[Episode 83830] reward=-116602358.4 actor_loss=0.3470 critic_loss=118408238239.2889 entropy=17.5363 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 83840] reward=-113601112.1 actor_loss=0.4516 critic_loss=124701663232.0000 entropy=17.5317 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 83840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-536106.6 mean_steps=12.5
|
|
[Episode 83850] reward=-114484342.2 actor_loss=0.3540 critic_loss=116566225028.1290 entropy=17.5346 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 83860] reward=-121045016.3 actor_loss=0.3524 critic_loss=126829944649.9556 entropy=17.5292 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 83860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464889.5 mean_steps=14.6
|
|
[Episode 83870] reward=-120012620.7 actor_loss=0.3064 critic_loss=124241880314.3111 entropy=17.5231 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 83880] reward=-109107392.5 actor_loss=0.3859 critic_loss=115005017331.8095 entropy=17.5212 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 83880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-401422.4 mean_steps=14.2
|
|
[Episode 83890] reward=-117583429.6 actor_loss=0.3368 critic_loss=123445765337.2121 entropy=17.5083 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 83900] reward=-116333281.9 actor_loss=0.2834 critic_loss=118589736240.4324 entropy=17.5112 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 83900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570133.6 mean_steps=13.2
|
|
[Episode 83910] reward=-111109711.2 actor_loss=0.4023 critic_loss=108864020239.0588 entropy=17.5364 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 83920] reward=-114127863.6 actor_loss=0.2073 critic_loss=120120834457.6000 entropy=17.5384 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 83920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-417809.0 mean_steps=13.6
|
|
[Episode 83930] reward=-117789711.6 actor_loss=0.2850 critic_loss=116902440504.8889 entropy=17.5523 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 83940] reward=-116676502.0 actor_loss=0.3229 critic_loss=121120089156.2667 entropy=17.5604 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 83940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559231.8 mean_steps=13.7
|
|
[Episode 83950] reward=-116216659.8 actor_loss=0.2011 critic_loss=117854767422.5778 entropy=17.5536 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 83960] reward=-118710217.3 actor_loss=0.2302 critic_loss=118706861215.2889 entropy=17.5553 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 83960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532193.8 mean_steps=13.3
|
|
[Episode 83970] reward=-116611493.3 actor_loss=0.3152 critic_loss=121886333983.0303 entropy=17.5663 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 83980] reward=-109138396.3 actor_loss=0.4110 critic_loss=112846272739.5556 entropy=17.5685 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 83980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477779.8 mean_steps=15.1
|
|
[Episode 83990] reward=-113501169.7 actor_loss=0.2964 critic_loss=120229757952.0000 entropy=17.5795 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 84000] reward=-120115814.7 actor_loss=0.2794 critic_loss=120432195447.4667 entropy=17.5604 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 84000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534493.3 mean_steps=14.1
|
|
[Episode 84010] reward=-521506502.1 actor_loss=2.2375 critic_loss=562390039003136.0000 entropy=17.5815 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 84020] reward=-115431138.6 actor_loss=0.1945 critic_loss=120126913389.7143 entropy=17.5871 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 84020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540510.5 mean_steps=13.3
|
|
[Episode 84030] reward=-115793735.3 actor_loss=0.3412 critic_loss=114243627144.5333 entropy=17.5840 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 84040] reward=-116843338.5 actor_loss=0.3136 critic_loss=124019286016.0000 entropy=17.5879 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 84040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435424.3 mean_steps=15.4
|
|
[Episode 84050] reward=-115977346.6 actor_loss=0.3233 critic_loss=116875536942.5455 entropy=17.6053 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 84060] reward=-112835139.5 actor_loss=0.2990 critic_loss=116888717230.0800 entropy=17.6060 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 84060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580443.4 mean_steps=12.5
|
|
[Episode 84070] reward=-116302577.3 actor_loss=0.3961 critic_loss=150156093547.7895 entropy=17.5983 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 84080] reward=-123868068.0 actor_loss=0.2816 critic_loss=132017712034.9091 entropy=17.5977 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 84080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510739.1 mean_steps=13.9
|
|
[Episode 84090] reward=-119022088.4 actor_loss=0.3180 critic_loss=120255487219.8095 entropy=17.6068 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 84100] reward=-119422165.3 actor_loss=0.2588 critic_loss=122355437568.0000 entropy=17.6109 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 84100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512467.5 mean_steps=13.8
|
|
[Episode 84110] reward=-116345951.7 actor_loss=0.3371 critic_loss=121986372230.7368 entropy=17.6078 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 84120] reward=-117514947.3 actor_loss=0.3418 critic_loss=121080547689.4118 entropy=17.6082 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 84120] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-360006.8 mean_steps=16.7
|
|
[Episode 84130] reward=-118252186.7 actor_loss=0.2315 critic_loss=117926626508.8000 entropy=17.6092 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 84140] reward=-118165276.0 actor_loss=0.2946 critic_loss=116356262731.2941 entropy=17.6197 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 84140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399432.7 mean_steps=15.4
|
|
[Episode 84150] reward=-115383104.3 actor_loss=0.3158 critic_loss=115312851626.6667 entropy=17.6144 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 84160] reward=-117109826.0 actor_loss=0.2202 critic_loss=119777806215.5294 entropy=17.6214 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 84160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-593143.1 mean_steps=13.4
|
|
[Episode 84170] reward=-121675010.6 actor_loss=0.3157 critic_loss=130143670031.0588 entropy=17.6280 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 84180] reward=-113638035.8 actor_loss=0.3469 critic_loss=118641777664.0000 entropy=17.6425 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 84180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505879.3 mean_steps=13.1
|
|
[Episode 84190] reward=-118565273.2 actor_loss=0.2370 critic_loss=125389514888.5333 entropy=17.6337 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 84200] reward=-119915120.6 actor_loss=0.2471 critic_loss=121648138834.5806 entropy=17.6310 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 84200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432094.7 mean_steps=16.5
|
|
[Episode 84210] reward=-128094694.2 actor_loss=0.2955 critic_loss=570389318314.6666 entropy=17.6271 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 84220] reward=-122470057.1 actor_loss=0.2083 critic_loss=130945749143.7037 entropy=17.6197 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 84220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467123.1 mean_steps=14.8
|
|
[Episode 84230] reward=-113801649.9 actor_loss=0.3253 critic_loss=132620344779.0345 entropy=17.6027 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 84240] reward=-1156068708.5 actor_loss=306.7492 critic_loss=1531933699866624.0000 entropy=17.6098 approx_kl=0.0139 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 84240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510736.7 mean_steps=15.3
|
|
[Episode 84250] reward=-115451850.3 actor_loss=0.3036 critic_loss=124246962034.7586 entropy=17.6104 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 84260] reward=-117912093.3 actor_loss=0.2656 critic_loss=121511951915.8857 entropy=17.6045 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 84260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473059.9 mean_steps=15.0
|
|
[Episode 84270] reward=-113902530.0 actor_loss=0.2096 critic_loss=114032526950.4000 entropy=17.6057 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 84280] reward=-138796548.2 actor_loss=14.5845 critic_loss=1835256250368.0000 entropy=17.6116 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 84280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-663374.5 mean_steps=13.2
|
|
[Episode 84290] reward=-117683592.8 actor_loss=0.3138 critic_loss=130730123264.0000 entropy=17.6316 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 84300] reward=-388645081.2 actor_loss=119.1141 critic_loss=216139742943641.5938 entropy=17.6341 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 84300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497780.5 mean_steps=15.0
|
|
[Episode 84310] reward=-112776241.7 actor_loss=0.2818 critic_loss=124499590537.8462 entropy=17.6409 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 84320] reward=-116698984.9 actor_loss=0.2808 critic_loss=119462371328.0000 entropy=17.6465 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 84320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498296.2 mean_steps=15.0
|
|
[Episode 84330] reward=-120167543.5 actor_loss=0.3460 critic_loss=128676242897.4545 entropy=17.6440 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 84340] reward=-121379704.8 actor_loss=0.3426 critic_loss=126593895992.8889 entropy=17.6486 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 84340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460074.3 mean_steps=14.2
|
|
[Episode 84350] reward=-123294340.1 actor_loss=0.2651 critic_loss=127827889960.4211 entropy=17.6498 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 84360] reward=-116471887.8 actor_loss=0.3498 critic_loss=120504894508.5217 entropy=17.6502 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 84360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471262.0 mean_steps=13.5
|
|
[Episode 84370] reward=-119868356.4 actor_loss=0.3123 critic_loss=120207882028.1379 entropy=17.6505 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 84380] reward=-122722665.6 actor_loss=0.4100 critic_loss=128685420999.1111 entropy=17.6462 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 84380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557576.6 mean_steps=13.3
|
|
[Episode 84390] reward=-119594283.0 actor_loss=0.2648 critic_loss=125522179072.0000 entropy=17.6463 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 84400] reward=-120423517.1 actor_loss=0.3044 critic_loss=126775351296.0000 entropy=17.6493 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 84400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-593317.4 mean_steps=12.6
|
|
[Episode 84410] reward=-115692434.5 actor_loss=0.3372 critic_loss=117658963595.6364 entropy=17.6553 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 84420] reward=-122338656.3 actor_loss=0.3014 critic_loss=126271359268.5714 entropy=17.6615 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 84420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493881.9 mean_steps=13.8
|
|
[Episode 84430] reward=-108179631.5 actor_loss=0.3388 critic_loss=114901975222.0444 entropy=17.6690 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 84440] reward=-121105210.0 actor_loss=0.2312 critic_loss=121612974715.5862 entropy=17.6694 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 84440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641911.7 mean_steps=12.2
|
|
[Episode 84450] reward=-116481762.2 actor_loss=0.2129 critic_loss=115883864064.0000 entropy=17.6600 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 84460] reward=-132971690.7 actor_loss=0.4017 critic_loss=1900591258737.7778 entropy=17.6464 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 84460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392166.2 mean_steps=15.9
|
|
[Episode 84470] reward=-114001901.2 actor_loss=0.3317 critic_loss=120953010086.9565 entropy=17.6560 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 84480] reward=-117329697.0 actor_loss=0.4072 critic_loss=113875035883.2432 entropy=17.6550 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 84480] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-729983.4 mean_steps=10.8
|
|
[Episode 84490] reward=-116915021.8 actor_loss=0.3365 critic_loss=119012804835.5556 entropy=17.6596 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 84500] reward=-118766221.2 actor_loss=0.2932 critic_loss=120038487133.0909 entropy=17.6577 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 84500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505571.0 mean_steps=14.1
|
|
[Episode 84510] reward=-115758747.9 actor_loss=0.2734 critic_loss=120216228295.1111 entropy=17.6614 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 84520] reward=-116200889.4 actor_loss=0.3614 critic_loss=114447319040.0000 entropy=17.6515 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 84520] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-276433.1 mean_steps=17.4
|
|
[Episode 84530] reward=-118820298.2 actor_loss=0.2804 critic_loss=122039207799.4667 entropy=17.6564 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 84540] reward=-118820228.2 actor_loss=0.2731 critic_loss=119456300208.5517 entropy=17.6573 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 84540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499563.4 mean_steps=13.8
|
|
[Episode 84550] reward=-119166446.8 actor_loss=0.2588 critic_loss=127376464099.5556 entropy=17.6439 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 84560] reward=-119630074.9 actor_loss=0.3052 critic_loss=134782086894.9333 entropy=17.6473 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 84560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-587289.4 mean_steps=12.0
|
|
[Episode 84570] reward=-115259061.8 actor_loss=0.2995 critic_loss=122774567450.9474 entropy=17.6312 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 84580] reward=-113638647.1 actor_loss=0.2994 critic_loss=119201529856.0000 entropy=17.6155 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 84580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-385637.0 mean_steps=14.8
|
|
[Episode 84590] reward=-121896705.1 actor_loss=0.2601 critic_loss=121532858880.0000 entropy=17.6080 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 84600] reward=-116129734.0 actor_loss=0.3285 critic_loss=116987145178.0741 entropy=17.6030 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 84600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-632500.0 mean_steps=13.9
|
|
[Episode 84610] reward=-115927925.5 actor_loss=0.4042 critic_loss=122941878763.5200 entropy=17.6015 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 84620] reward=-116794352.7 actor_loss=0.1798 critic_loss=118369262445.7143 entropy=17.6126 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 84620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-356762.5 mean_steps=16.6
|
|
[Episode 84630] reward=-117906607.3 actor_loss=0.3037 critic_loss=119366150436.5714 entropy=17.6189 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 84640] reward=-118735515.2 actor_loss=0.3702 critic_loss=128509180586.6667 entropy=17.6240 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 84640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-365443.9 mean_steps=14.1
|
|
[Episode 84650] reward=-118016185.8 actor_loss=0.2742 critic_loss=121975700873.8462 entropy=17.6271 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 84660] reward=-118519826.2 actor_loss=0.3716 critic_loss=131554564973.7143 entropy=17.6225 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 84660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-478196.7 mean_steps=16.2
|
|
[Episode 84670] reward=-116508951.9 actor_loss=0.2488 critic_loss=116265236967.6190 entropy=17.6254 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 84680] reward=-114388479.2 actor_loss=0.2392 critic_loss=117742621095.7241 entropy=17.6162 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 84680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553510.1 mean_steps=14.3
|
|
[Episode 84690] reward=-114104377.7 actor_loss=0.3502 critic_loss=114744610816.0000 entropy=17.6016 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 84700] reward=-118069088.0 actor_loss=0.3300 critic_loss=119872537288.3478 entropy=17.5986 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 84700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447980.1 mean_steps=14.3
|
|
[Episode 84710] reward=-119593353.6 actor_loss=0.3009 critic_loss=121948813066.2400 entropy=17.5953 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 84720] reward=-116595685.2 actor_loss=0.1961 critic_loss=123890105958.4000 entropy=17.5975 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 84720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485714.5 mean_steps=13.7
|
|
[Episode 84730] reward=-116893238.3 actor_loss=0.2506 critic_loss=123024639902.4762 entropy=17.5877 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 84740] reward=-114446399.9 actor_loss=0.3009 critic_loss=115294158241.1852 entropy=17.5633 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 84740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488176.0 mean_steps=13.9
|
|
[Episode 84750] reward=-113651063.5 actor_loss=0.2353 critic_loss=115632710334.1714 entropy=17.5535 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 84760] reward=-117609329.6 actor_loss=0.4389 critic_loss=120326790439.8222 entropy=17.5318 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 84760] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-333272.7 mean_steps=17.1
|
|
[Episode 84770] reward=-121043344.6 actor_loss=0.3189 critic_loss=124783057814.0690 entropy=17.5365 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 84780] reward=-115058473.5 actor_loss=0.2791 critic_loss=115096877153.5238 entropy=17.5598 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 84780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-503156.9 mean_steps=15.8
|
|
[Episode 84790] reward=-113960237.1 actor_loss=0.2579 critic_loss=115664075124.3636 entropy=17.5740 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 84800] reward=-114810887.6 actor_loss=0.2712 critic_loss=114865335896.2759 entropy=17.5763 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 84800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580368.0 mean_steps=13.4
|
|
[Episode 84810] reward=-123011931.5 actor_loss=0.1989 critic_loss=129546949099.5200 entropy=17.5661 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 84820] reward=-119413822.4 actor_loss=0.2925 critic_loss=120025347601.6552 entropy=17.5553 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 84820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-581837.9 mean_steps=11.6
|
|
[Episode 84830] reward=-119671288.9 actor_loss=0.3023 critic_loss=117590332136.7273 entropy=17.5606 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 84840] reward=-112736278.8 actor_loss=0.4364 critic_loss=117159020498.4889 entropy=17.5841 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 84840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-621062.7 mean_steps=12.9
|
|
[Episode 84850] reward=-122950668.2 actor_loss=0.2353 critic_loss=129483431680.0000 entropy=17.5876 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 84860] reward=-119047437.7 actor_loss=0.2816 critic_loss=122747482732.6061 entropy=17.5895 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 84860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533094.9 mean_steps=14.2
|
|
[Episode 84870] reward=-115700146.6 actor_loss=0.2974 critic_loss=124980877994.6667 entropy=17.5804 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 84880] reward=-116264392.5 actor_loss=0.3706 critic_loss=117524287363.8788 entropy=17.5719 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 84880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534652.6 mean_steps=13.4
|
|
[Episode 84890] reward=-124215321.8 actor_loss=0.3023 critic_loss=133918109995.7073 entropy=17.5797 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 84900] reward=-115139397.8 actor_loss=0.3765 critic_loss=123089089270.5185 entropy=17.5771 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 84900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523212.8 mean_steps=13.8
|
|
[Episode 84910] reward=-117987399.7 actor_loss=0.2415 critic_loss=120302704993.1035 entropy=17.5810 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 84920] reward=-114210037.5 actor_loss=0.3035 critic_loss=113432897877.3333 entropy=17.5751 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 84920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-541966.1 mean_steps=12.3
|
|
[Episode 84930] reward=-121051770.2 actor_loss=0.2908 critic_loss=125844140247.5789 entropy=17.5747 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 84940] reward=-119859175.2 actor_loss=0.2288 critic_loss=120749232850.8235 entropy=17.5677 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 84940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585987.0 mean_steps=12.7
|
|
[Episode 84950] reward=-119225781.3 actor_loss=0.2715 critic_loss=126065951350.1538 entropy=17.5658 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 84960] reward=-115357071.0 actor_loss=0.3666 critic_loss=115989113241.6000 entropy=17.5666 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 84960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480310.7 mean_steps=14.6
|
|
[Episode 84970] reward=-116408907.4 actor_loss=0.2453 critic_loss=115078827394.8445 entropy=17.5725 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 84980] reward=-115078266.4 actor_loss=0.3795 critic_loss=117928793573.0526 entropy=17.5646 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 84980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-307657.3 mean_steps=16.5
|
|
[Episode 84990] reward=-111724647.4 actor_loss=0.3231 critic_loss=118312283964.9524 entropy=17.5701 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 85000] reward=-116547053.1 actor_loss=0.2504 critic_loss=120644005181.7931 entropy=17.5773 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 85000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-590705.5 mean_steps=14.8
|
|
[Episode 85010] reward=-119525595.0 actor_loss=0.2288 critic_loss=115917734632.7273 entropy=17.5838 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 85020] reward=-121476975.9 actor_loss=0.2858 critic_loss=123763273193.7391 entropy=17.5912 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 85020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581300.3 mean_steps=13.4
|
|
[Episode 85030] reward=-115256536.8 actor_loss=0.3142 critic_loss=115633993955.5556 entropy=17.5928 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 85040] reward=-117474601.6 actor_loss=0.2510 critic_loss=119757972573.0909 entropy=17.5979 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 85040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-606386.2 mean_steps=13.4
|
|
[Episode 85050] reward=-118713048.0 actor_loss=0.3682 critic_loss=123555065856.0000 entropy=17.5805 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 85060] reward=-117685764.2 actor_loss=0.2944 critic_loss=116276838912.0000 entropy=17.5708 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 85060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-656255.2 mean_steps=13.2
|
|
[Episode 85070] reward=-119049576.3 actor_loss=0.2227 critic_loss=122723751253.3333 entropy=17.5624 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 85080] reward=-118303052.0 actor_loss=0.3018 critic_loss=119057221159.3846 entropy=17.5571 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 85080] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-693908.2 mean_steps=11.3
|
|
[Episode 85090] reward=-122251611.1 actor_loss=0.2671 critic_loss=125806108190.1176 entropy=17.5592 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 85100] reward=-117331389.2 actor_loss=0.2914 critic_loss=117185860169.1429 entropy=17.5599 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 85100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537890.0 mean_steps=13.4
|
|
[Episode 85110] reward=-119345820.9 actor_loss=0.2357 critic_loss=120510990874.9474 entropy=17.5594 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 85120] reward=-118509267.9 actor_loss=0.3161 critic_loss=119974150799.3600 entropy=17.5674 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 85120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592954.7 mean_steps=12.3
|
|
[Episode 85130] reward=-108790403.0 actor_loss=0.2947 critic_loss=114030880475.4286 entropy=17.5630 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 85140] reward=-116287474.3 actor_loss=0.3343 critic_loss=118989011025.9200 entropy=17.5620 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 85140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454803.5 mean_steps=15.3
|
|
[Episode 85150] reward=-116019339.7 actor_loss=0.2683 critic_loss=116715310622.1176 entropy=17.5621 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 85160] reward=-118291857.4 actor_loss=0.2125 critic_loss=118742222620.4444 entropy=17.5658 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 85160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-404585.7 mean_steps=15.4
|
|
[Episode 85170] reward=-118479218.0 actor_loss=0.2338 critic_loss=123986623201.2800 entropy=17.5716 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 85180] reward=-115696326.0 actor_loss=0.2727 critic_loss=113172502755.5556 entropy=17.5935 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 85180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512101.9 mean_steps=13.9
|
|
[Episode 85190] reward=-115313977.7 actor_loss=0.3925 critic_loss=112534724126.1176 entropy=17.6145 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 85200] reward=-113560025.9 actor_loss=0.2822 critic_loss=115336116815.6444 entropy=17.6034 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 85200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-351606.3 mean_steps=16.8
|
|
[Episode 85210] reward=-115225220.7 actor_loss=0.3824 critic_loss=118162324229.6889 entropy=17.5912 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 85220] reward=-111341488.4 actor_loss=0.3872 critic_loss=112770819413.3333 entropy=17.5988 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 85220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-417140.5 mean_steps=14.6
|
|
[Episode 85230] reward=-118505595.4 actor_loss=0.3340 critic_loss=125158454067.2000 entropy=17.5947 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 85240] reward=-117188663.1 actor_loss=0.3338 critic_loss=116187348081.7778 entropy=17.6059 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 85240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520268.3 mean_steps=13.8
|
|
[Episode 85250] reward=-115909469.9 actor_loss=0.3758 critic_loss=120657434851.5556 entropy=17.6014 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 85260] reward=-114533326.6 actor_loss=0.3223 critic_loss=116317749065.9556 entropy=17.6024 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 85260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588348.8 mean_steps=12.3
|
|
[Episode 85270] reward=-118359475.6 actor_loss=0.2851 critic_loss=122209728879.5897 entropy=17.5983 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 85280] reward=-116286721.0 actor_loss=0.2949 critic_loss=116746757867.2432 entropy=17.5857 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 85280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521099.7 mean_steps=14.1
|
|
[Episode 85290] reward=-119068768.9 actor_loss=0.2648 critic_loss=120041445699.3684 entropy=17.5763 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 85300] reward=-114162297.9 actor_loss=0.2021 critic_loss=115123291750.4000 entropy=17.5857 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 85300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455426.8 mean_steps=14.8
|
|
[Episode 85310] reward=-115116470.5 actor_loss=0.3785 critic_loss=116330617811.4783 entropy=17.5955 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 85320] reward=-119522041.3 actor_loss=0.2424 critic_loss=120447440213.3333 entropy=17.5904 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 85320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585547.7 mean_steps=12.5
|
|
[Episode 85330] reward=-118888883.9 actor_loss=0.2473 critic_loss=126450398673.4545 entropy=17.5781 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 85340] reward=-114683634.4 actor_loss=0.2615 critic_loss=113946597083.4286 entropy=17.5713 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 85340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513279.2 mean_steps=14.2
|
|
[Episode 85350] reward=-115934179.1 actor_loss=0.2097 critic_loss=117117892949.3333 entropy=17.5811 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 85360] reward=-113861680.2 actor_loss=0.3611 critic_loss=120058667476.1143 entropy=17.5853 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 85360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576306.2 mean_steps=13.8
|
|
[Episode 85370] reward=-117468603.7 actor_loss=0.3498 critic_loss=118475458787.5556 entropy=17.5854 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 85380] reward=-120572842.1 actor_loss=0.2385 critic_loss=121129169618.8235 entropy=17.5858 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 85380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426532.3 mean_steps=15.1
|
|
[Episode 85390] reward=-116707001.9 actor_loss=0.3714 critic_loss=120079476280.8889 entropy=17.5889 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 85400] reward=-121694753.6 actor_loss=0.3800 critic_loss=124545058982.0540 entropy=17.5963 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 85400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641737.1 mean_steps=11.9
|
|
[Episode 85410] reward=-119623226.8 actor_loss=0.3336 critic_loss=121644227394.3704 entropy=17.6019 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 85420] reward=-115929750.3 actor_loss=0.2527 critic_loss=113506100257.0323 entropy=17.5971 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 85420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623844.0 mean_steps=12.1
|
|
[Episode 85430] reward=-117834182.9 actor_loss=0.3379 critic_loss=121414655369.8462 entropy=17.5785 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 85440] reward=-113066358.4 actor_loss=0.3179 critic_loss=115503958589.4400 entropy=17.5786 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 85440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507706.4 mean_steps=12.8
|
|
[Episode 85450] reward=-122801062.6 actor_loss=0.3019 critic_loss=127648143962.3529 entropy=17.5695 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 85460] reward=-121652275.7 actor_loss=0.3750 critic_loss=124159193198.7027 entropy=17.5855 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 85460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517911.7 mean_steps=14.2
|
|
[Episode 85470] reward=-108691392.4 actor_loss=0.4997 critic_loss=103061735014.4000 entropy=17.5988 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1523 front_blocked=0
|
|
[Episode 85480] reward=-115600477.3 actor_loss=0.3017 critic_loss=118002689638.4000 entropy=17.6179 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 85480] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-277704.0 mean_steps=16.9
|
|
[Episode 85490] reward=-116650035.7 actor_loss=0.1828 critic_loss=112066227479.2727 entropy=17.6179 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 85500] reward=-113251922.4 actor_loss=0.2520 critic_loss=116829252887.2727 entropy=17.6077 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 85500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482296.8 mean_steps=14.7
|
|
[Episode 85510] reward=-114019698.9 actor_loss=0.3109 critic_loss=120265150919.1111 entropy=17.6184 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 85520] reward=-116337862.4 actor_loss=0.3274 critic_loss=116798158165.3333 entropy=17.6147 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 85520] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-680587.7 mean_steps=10.9
|
|
[Episode 85530] reward=-116346454.0 actor_loss=0.2861 critic_loss=120481816064.0000 entropy=17.6058 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 85540] reward=-112314316.4 actor_loss=0.4028 critic_loss=114508227610.2564 entropy=17.6021 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 85540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596097.8 mean_steps=13.4
|
|
[Episode 85550] reward=-121100410.7 actor_loss=0.2377 critic_loss=121269119470.3448 entropy=17.5984 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 85560] reward=-118668127.9 actor_loss=0.2242 critic_loss=122843074816.0000 entropy=17.5964 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 85560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524307.4 mean_steps=14.2
|
|
[Episode 85570] reward=-112689699.6 actor_loss=0.2978 critic_loss=117911202702.2222 entropy=17.6004 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 85580] reward=-118521965.9 actor_loss=0.2845 critic_loss=121741784405.3333 entropy=17.5968 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 85580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644653.0 mean_steps=12.1
|
|
[Episode 85590] reward=-119907804.4 actor_loss=0.2544 critic_loss=122358306816.0000 entropy=17.5943 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 85600] reward=-122565137.7 actor_loss=0.2946 critic_loss=126722774220.8000 entropy=17.5927 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 85600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-619668.2 mean_steps=14.7
|
|
[Episode 85610] reward=-121233520.3 actor_loss=0.2430 critic_loss=129891730188.1905 entropy=17.5913 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 85620] reward=-117772892.3 actor_loss=0.2719 critic_loss=116406858450.8235 entropy=17.5944 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 85620] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656609.6 mean_steps=12.2
|
|
[Episode 85630] reward=-117884536.6 actor_loss=0.2129 critic_loss=118618467181.7143 entropy=17.5937 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 85640] reward=-116868691.1 actor_loss=0.2228 critic_loss=123308302053.5172 entropy=17.5949 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 85640] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-759198.3 mean_steps=9.9
|
|
[Episode 85650] reward=-122808400.2 actor_loss=0.1864 critic_loss=128441951846.4000 entropy=17.6012 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 85660] reward=-117752237.1 actor_loss=0.2695 critic_loss=127003353634.1333 entropy=17.6029 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 85660] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-281416.0 mean_steps=17.4
|
|
[Episode 85670] reward=-111704550.2 actor_loss=0.3174 critic_loss=112640272091.4286 entropy=17.6022 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 85680] reward=-112416732.4 actor_loss=0.2847 critic_loss=112574360546.7429 entropy=17.6019 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 85680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-609620.5 mean_steps=12.8
|
|
[Episode 85690] reward=-118610859.4 actor_loss=0.2907 critic_loss=121233339572.7059 entropy=17.6052 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 85700] reward=-121773725.6 actor_loss=0.2449 critic_loss=124974018286.9333 entropy=17.6005 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 85700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-392842.6 mean_steps=17.1
|
|
[Episode 85710] reward=-120795822.2 actor_loss=0.3306 critic_loss=130120492935.5294 entropy=17.5945 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 85720] reward=-116961469.5 actor_loss=0.2818 critic_loss=126623569317.6471 entropy=17.6023 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 85720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-566166.4 mean_steps=14.2
|
|
[Episode 85730] reward=-118318818.8 actor_loss=0.3091 critic_loss=127402074824.3478 entropy=17.5959 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 85740] reward=-120990835.2 actor_loss=0.2148 critic_loss=122071011328.0000 entropy=17.5916 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 85740] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-295838.3 mean_steps=16.9
|
|
[Episode 85750] reward=-117729833.6 actor_loss=0.3791 critic_loss=119459170304.0000 entropy=17.5906 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 85760] reward=-116628691.7 actor_loss=0.3288 critic_loss=119237752422.4000 entropy=17.5938 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 85760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-448232.3 mean_steps=16.1
|
|
[Episode 85770] reward=-114209689.0 actor_loss=0.3691 critic_loss=118770048256.0000 entropy=17.5919 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 85780] reward=-116760121.5 actor_loss=0.2336 critic_loss=117065718009.7561 entropy=17.6004 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 85780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475179.1 mean_steps=14.8
|
|
[Episode 85790] reward=-118001348.4 actor_loss=0.3361 critic_loss=118006329659.0769 entropy=17.5979 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 85800] reward=-118462660.1 actor_loss=0.1645 critic_loss=125708506089.2444 entropy=17.5750 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 85800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482467.2 mean_steps=14.8
|
|
[Episode 85810] reward=-112195787.0 actor_loss=0.2665 critic_loss=117082181453.9130 entropy=17.5685 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 85820] reward=-114268038.0 actor_loss=0.3019 critic_loss=123500046874.9474 entropy=17.5576 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 85820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-653100.9 mean_steps=12.1
|
|
[Episode 85830] reward=-127644499.8 actor_loss=0.3663 critic_loss=395637997568.0000 entropy=17.5535 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 85840] reward=-114480646.4 actor_loss=0.4173 critic_loss=109650784529.0667 entropy=17.5514 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 85840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-619119.9 mean_steps=13.8
|
|
[Episode 85850] reward=-115403683.1 actor_loss=0.3344 critic_loss=124441793589.8947 entropy=17.5542 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 85860] reward=-119204123.3 actor_loss=0.2132 critic_loss=122129334784.0000 entropy=17.5583 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 85860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482272.1 mean_steps=14.8
|
|
[Episode 85870] reward=-117515665.4 actor_loss=0.2623 critic_loss=120898879488.0000 entropy=17.5436 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 85880] reward=-117121424.8 actor_loss=0.3019 critic_loss=116081417575.7838 entropy=17.5369 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 85880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414719.4 mean_steps=15.1
|
|
[Episode 85890] reward=-117630395.8 actor_loss=0.3397 critic_loss=124323755061.8947 entropy=17.5289 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 85900] reward=-116581547.1 actor_loss=0.2746 critic_loss=118642141985.3913 entropy=17.5274 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 85900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-630621.8 mean_steps=11.8
|
|
[Episode 85910] reward=-117926268.9 actor_loss=0.2915 critic_loss=125828958680.6154 entropy=17.5299 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 85920] reward=-121666510.0 actor_loss=0.2810 critic_loss=128369388202.6667 entropy=17.5417 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 85920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533193.1 mean_steps=13.6
|
|
[Episode 85930] reward=-114635245.1 actor_loss=0.2534 critic_loss=111233624171.7895 entropy=17.5406 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 85940] reward=-115492890.3 actor_loss=0.3618 critic_loss=120788192109.7143 entropy=17.5491 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 85940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429778.7 mean_steps=14.5
|
|
[Episode 85950] reward=-115548431.8 actor_loss=0.2283 critic_loss=112269129435.4286 entropy=17.5536 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 85960] reward=-118061810.0 actor_loss=0.1787 critic_loss=116144475460.6829 entropy=17.5676 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 85960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-438791.2 mean_steps=13.6
|
|
[Episode 85970] reward=-115834581.5 actor_loss=0.3079 critic_loss=117042938766.2222 entropy=17.5651 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 85980] reward=-115921946.2 actor_loss=0.3867 critic_loss=117160054979.0476 entropy=17.5607 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 85980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520609.6 mean_steps=14.1
|
|
[Episode 85990] reward=-109344990.0 actor_loss=0.2511 critic_loss=111567701674.6667 entropy=17.5624 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 86000] reward=-110768501.2 actor_loss=0.3344 critic_loss=116425193062.4000 entropy=17.5581 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 86000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-361191.8 mean_steps=16.0
|
|
[Episode 86010] reward=-115477824.6 actor_loss=0.2943 critic_loss=117634530017.2800 entropy=17.5485 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 86020] reward=-117492053.0 actor_loss=0.2084 critic_loss=119061784348.4444 entropy=17.5504 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 86020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469173.4 mean_steps=15.0
|
|
[Episode 86030] reward=-112089632.0 actor_loss=0.3245 critic_loss=115226018247.1111 entropy=17.5495 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 86040] reward=-113284125.6 actor_loss=0.3607 critic_loss=119318415435.8519 entropy=17.5500 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 86040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-650182.2 mean_steps=12.2
|
|
[Episode 86050] reward=-193396921.8 actor_loss=0.8521 critic_loss=24287051205108.6211 entropy=17.5587 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 86060] reward=-117342576.3 actor_loss=0.2828 critic_loss=121972471928.4706 entropy=17.5613 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 86060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551125.1 mean_steps=13.2
|
|
[Episode 86070] reward=-116179208.6 actor_loss=0.3621 critic_loss=117006785266.5263 entropy=17.5684 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 86080] reward=-130695839.5 actor_loss=0.3958 critic_loss=1562574604580.5715 entropy=17.5634 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 86080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-431767.8 mean_steps=13.8
|
|
[Episode 86090] reward=-116867899.9 actor_loss=0.2487 critic_loss=122348149028.5714 entropy=17.5536 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 86100] reward=-123878102.1 actor_loss=0.2594 critic_loss=123083976960.0000 entropy=17.5547 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 86100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547289.2 mean_steps=13.1
|
|
[Episode 86110] reward=-117345617.8 actor_loss=0.3841 critic_loss=116152066048.0000 entropy=17.5498 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 86120] reward=-114763529.9 actor_loss=0.4177 critic_loss=117719749254.7368 entropy=17.5355 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 86120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438960.0 mean_steps=14.5
|
|
[Episode 86130] reward=-137392880.3 actor_loss=0.2729 critic_loss=3421824103219.2002 entropy=17.5428 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 86140] reward=-127130487.0 actor_loss=0.3363 critic_loss=609555835198.5778 entropy=17.5408 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 86140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431662.3 mean_steps=15.4
|
|
[Episode 86150] reward=-116395868.1 actor_loss=0.3250 critic_loss=121639454310.4000 entropy=17.5454 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 86160] reward=-110942515.4 actor_loss=0.3672 critic_loss=114710318518.8571 entropy=17.5398 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 86160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485164.4 mean_steps=13.8
|
|
[Episode 86170] reward=-117904289.4 actor_loss=0.3015 critic_loss=122693037947.8710 entropy=17.5498 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 86180] reward=-114942019.8 actor_loss=0.2604 critic_loss=115191881355.6364 entropy=17.5544 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 86180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408323.1 mean_steps=14.8
|
|
[Episode 86190] reward=-116566618.4 actor_loss=0.3375 critic_loss=114628648695.7419 entropy=17.5630 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 86200] reward=-112632465.6 actor_loss=0.3575 critic_loss=113276980512.8205 entropy=17.5538 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 86200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526880.9 mean_steps=13.4
|
|
[Episode 86210] reward=-116009512.5 actor_loss=0.2707 critic_loss=117086194801.7778 entropy=17.5545 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 86220] reward=-115181908.8 actor_loss=0.2432 critic_loss=118771360674.9091 entropy=17.5503 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 86220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-523531.0 mean_steps=15.1
|
|
[Episode 86230] reward=-116929270.6 actor_loss=0.2479 critic_loss=117346584733.5385 entropy=17.5348 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 86240] reward=-113261189.6 actor_loss=0.3290 critic_loss=118336125337.6000 entropy=17.5301 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 86240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483519.1 mean_steps=14.0
|
|
[Episode 86250] reward=-117211193.0 actor_loss=0.2065 critic_loss=116587988309.3333 entropy=17.5275 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 86260] reward=-113313449.6 actor_loss=0.3679 critic_loss=113268698994.7586 entropy=17.5278 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 86260] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-581455.9 mean_steps=10.8
|
|
[Episode 86270] reward=-116782636.7 actor_loss=0.4150 critic_loss=115997771452.6316 entropy=17.5256 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 86280] reward=-113999142.5 actor_loss=0.3034 critic_loss=120346947049.7391 entropy=17.5228 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 86280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485133.5 mean_steps=13.8
|
|
[Episode 86290] reward=-116187307.3 actor_loss=0.3394 critic_loss=117854861432.4706 entropy=17.5168 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 86300] reward=-114867297.9 actor_loss=0.3934 critic_loss=119631978213.5172 entropy=17.5282 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 86300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484200.0 mean_steps=13.9
|
|
[Episode 86310] reward=-115843786.7 actor_loss=0.2699 critic_loss=118184463564.8000 entropy=17.5279 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 86320] reward=-120553901.2 actor_loss=0.2437 critic_loss=117602473660.6316 entropy=17.5286 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 86320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405744.9 mean_steps=14.9
|
|
[Episode 86330] reward=-117538490.0 actor_loss=0.3472 critic_loss=115970454978.5600 entropy=17.5309 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 86340] reward=-116562983.9 actor_loss=0.3139 critic_loss=119241248312.8889 entropy=17.5295 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 86340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541530.1 mean_steps=13.4
|
|
[Episode 86350] reward=-119461644.7 actor_loss=0.2744 critic_loss=121071306524.4444 entropy=17.5329 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 86360] reward=-117523315.4 actor_loss=0.2246 critic_loss=129568121651.2000 entropy=17.5376 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 86360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522504.1 mean_steps=13.3
|
|
[Episode 86370] reward=-118375563.3 actor_loss=0.2847 critic_loss=119554102193.2308 entropy=17.5418 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 86380] reward=-114589147.0 actor_loss=0.2670 critic_loss=114659214441.9310 entropy=17.5437 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 86380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526890.1 mean_steps=13.2
|
|
[Episode 86390] reward=-112934863.9 actor_loss=0.3121 critic_loss=153995256520.3478 entropy=17.5445 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 86400] reward=-113269727.9 actor_loss=0.2262 critic_loss=121333481472.0000 entropy=17.5390 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 86400] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-265198.2 mean_steps=18.3
|
|
[Episode 86410] reward=-117826536.3 actor_loss=0.3029 critic_loss=140933633117.0909 entropy=17.5432 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 86420] reward=-111222998.3 actor_loss=0.3414 critic_loss=115103188237.4737 entropy=17.5506 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 86420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-397936.9 mean_steps=15.9
|
|
[Episode 86430] reward=-112058349.4 actor_loss=0.3239 critic_loss=112629719950.2222 entropy=17.5561 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 86440] reward=-109481319.7 actor_loss=0.2671 critic_loss=107722956800.0000 entropy=17.5433 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 86440] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-218479.1 mean_steps=18.8
|
|
[Episode 86450] reward=-120795693.9 actor_loss=0.3449 critic_loss=124295617422.2222 entropy=17.5474 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 86460] reward=-117634131.2 actor_loss=0.3656 critic_loss=116227612672.0000 entropy=17.5509 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 86460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-531327.9 mean_steps=15.2
|
|
[Episode 86470] reward=-114809608.2 actor_loss=0.3257 critic_loss=116332662084.6829 entropy=17.5544 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 86480] reward=-113620840.5 actor_loss=0.3779 critic_loss=112700237824.0000 entropy=17.5513 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 86480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548730.6 mean_steps=13.4
|
|
[Episode 86490] reward=-116432454.9 actor_loss=0.3217 critic_loss=119195164672.0000 entropy=17.5523 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 86500] reward=-118896892.2 actor_loss=0.3339 critic_loss=123542163228.4444 entropy=17.5494 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 86500] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-711275.4 mean_steps=11.6
|
|
[Episode 86510] reward=-107148179.3 actor_loss=0.3311 critic_loss=111194781816.4706 entropy=17.5535 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 86520] reward=-116039021.1 actor_loss=0.3048 critic_loss=118037187977.8462 entropy=17.5526 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 86520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508456.7 mean_steps=14.4
|
|
[Episode 86530] reward=-114517379.6 actor_loss=0.3621 critic_loss=120398299136.0000 entropy=17.5510 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 86540] reward=-116486733.1 actor_loss=0.3481 critic_loss=118909188739.6572 entropy=17.5536 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 86540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489668.5 mean_steps=14.8
|
|
[Episode 86550] reward=-113014700.5 actor_loss=0.2753 critic_loss=113187083520.0000 entropy=17.5601 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 86560] reward=-113793454.0 actor_loss=0.2976 critic_loss=115923340681.8462 entropy=17.5589 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 86560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532426.5 mean_steps=14.4
|
|
[Episode 86570] reward=-117704409.7 actor_loss=0.3752 critic_loss=149636958050.4615 entropy=17.5557 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 86580] reward=-116695990.6 actor_loss=0.3314 critic_loss=124959342592.0000 entropy=17.5588 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 86580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641284.2 mean_steps=12.1
|
|
[Episode 86590] reward=-115492990.8 actor_loss=0.3564 critic_loss=117341956778.6667 entropy=17.5647 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 86600] reward=-115496648.4 actor_loss=0.3074 critic_loss=118544966087.1111 entropy=17.5615 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 86600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538555.1 mean_steps=14.2
|
|
[Episode 86610] reward=-119299751.3 actor_loss=0.3784 critic_loss=120167912314.4348 entropy=17.5701 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 86620] reward=-117601917.8 actor_loss=0.3511 critic_loss=120845429504.0000 entropy=17.5765 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 86620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-357490.7 mean_steps=15.9
|
|
[Episode 86630] reward=-114943005.1 actor_loss=0.2528 critic_loss=125791304704.0000 entropy=17.5744 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 86640] reward=-119332464.8 actor_loss=0.2648 critic_loss=122112584362.6667 entropy=17.5736 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 86640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-576224.9 mean_steps=12.6
|
|
[Episode 86650] reward=-120126484.7 actor_loss=0.2295 critic_loss=121742008554.0571 entropy=17.5696 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 86660] reward=-110833422.5 actor_loss=0.2929 critic_loss=116255036393.2444 entropy=17.5671 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 86660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572488.9 mean_steps=13.4
|
|
[Episode 86670] reward=-114726464.4 actor_loss=0.3614 critic_loss=177197231421.7931 entropy=17.5672 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 86680] reward=-118108487.6 actor_loss=0.4032 critic_loss=121141609078.1538 entropy=17.5658 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 86680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492669.3 mean_steps=14.8
|
|
[Episode 86690] reward=-116602970.3 actor_loss=0.2316 critic_loss=119729269059.3684 entropy=17.5628 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 86700] reward=-123098952.6 actor_loss=0.3087 critic_loss=123514286395.0769 entropy=17.5447 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 86700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368250.8 mean_steps=16.1
|
|
[Episode 86710] reward=-116117206.0 actor_loss=0.2869 critic_loss=112380994948.4138 entropy=17.5489 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 86720] reward=-114240979.5 actor_loss=0.2437 critic_loss=112380808578.8445 entropy=17.5547 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 86720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462004.7 mean_steps=15.1
|
|
[Episode 86730] reward=-117760602.8 actor_loss=0.3011 critic_loss=115258724462.7027 entropy=17.5606 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 86740] reward=-120003579.8 actor_loss=0.2007 critic_loss=119220893923.5556 entropy=17.5818 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 86740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-421115.0 mean_steps=14.7
|
|
[Episode 86750] reward=-117232333.4 actor_loss=0.2483 critic_loss=113568564413.6296 entropy=17.5713 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 86760] reward=-123082580.3 actor_loss=0.2384 critic_loss=129672653045.7600 entropy=17.5788 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 86760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602093.1 mean_steps=12.7
|
|
[Episode 86770] reward=-112282567.0 actor_loss=0.3401 critic_loss=113515777884.1600 entropy=17.5839 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 86780] reward=-115080271.9 actor_loss=0.2753 critic_loss=117571373658.3529 entropy=17.5833 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 86780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433483.6 mean_steps=15.7
|
|
[Episode 86790] reward=-113183444.7 actor_loss=0.2880 critic_loss=112087473245.0909 entropy=17.5785 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 86800] reward=-109350140.5 actor_loss=0.4062 critic_loss=120230942720.0000 entropy=17.5806 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 86800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-415731.3 mean_steps=14.9
|
|
[Episode 86810] reward=-119710194.5 actor_loss=0.2158 critic_loss=126607038171.4286 entropy=17.5926 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 86820] reward=-118907734.4 actor_loss=0.2975 critic_loss=132931153920.0000 entropy=17.5965 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 86820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-608924.4 mean_steps=12.6
|
|
[Episode 86830] reward=-119996808.3 actor_loss=0.3040 critic_loss=133799816396.8000 entropy=17.5969 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 86840] reward=-115728104.6 actor_loss=0.2843 critic_loss=118589016590.6286 entropy=17.6021 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 86840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574164.2 mean_steps=13.8
|
|
[Episode 86850] reward=-114238861.0 actor_loss=0.3205 critic_loss=112584097792.0000 entropy=17.5957 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 86860] reward=-119320064.9 actor_loss=0.3151 critic_loss=119613586711.2727 entropy=17.5995 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 86860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-598809.4 mean_steps=13.8
|
|
[Episode 86870] reward=-112393774.2 actor_loss=0.4213 critic_loss=115332807258.3529 entropy=17.5905 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 86880] reward=-121892558.7 actor_loss=0.4638 critic_loss=507518098825.8461 entropy=17.6070 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 86880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548400.6 mean_steps=12.3
|
|
[Episode 86890] reward=-118135362.1 actor_loss=0.2214 critic_loss=123387166720.0000 entropy=17.6083 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 86900] reward=-117097262.3 actor_loss=0.3029 critic_loss=117917681020.3428 entropy=17.6097 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 86900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550915.2 mean_steps=13.1
|
|
[Episode 86910] reward=-116616449.8 actor_loss=0.4017 critic_loss=139403166720.0000 entropy=17.6228 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 86920] reward=-118188813.4 actor_loss=0.2294 critic_loss=120843017875.9111 entropy=17.6206 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 86920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551900.5 mean_steps=13.1
|
|
[Episode 86930] reward=-118785270.1 actor_loss=0.3159 critic_loss=119391528368.3556 entropy=17.6228 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 86940] reward=-115118946.5 actor_loss=0.4115 critic_loss=117050404119.2727 entropy=17.6136 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 86940] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-354066.5 mean_steps=16.8
|
|
[Episode 86950] reward=-111899892.0 actor_loss=0.2968 critic_loss=110919606636.0889 entropy=17.6196 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 86960] reward=-113079036.0 actor_loss=0.2809 critic_loss=114425128368.3556 entropy=17.6226 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 86960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-579408.7 mean_steps=14.3
|
|
[Episode 86970] reward=-113082068.0 actor_loss=0.3695 critic_loss=111623595576.8889 entropy=17.6140 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 86980] reward=-120784493.0 actor_loss=0.2583 critic_loss=435192431684.2667 entropy=17.6036 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 86980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512988.9 mean_steps=14.2
|
|
[Episode 86990] reward=-118059761.5 actor_loss=0.2663 critic_loss=119657778569.8462 entropy=17.5911 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 87000] reward=-116449069.0 actor_loss=0.3371 critic_loss=124020700501.3333 entropy=17.5894 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 87000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488553.3 mean_steps=13.6
|
|
[Episode 87010] reward=-126380919.8 actor_loss=0.3069 critic_loss=471590799571.8621 entropy=17.5921 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 87020] reward=-116058311.2 actor_loss=0.2816 critic_loss=127834920748.1379 entropy=17.6020 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 87020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540709.5 mean_steps=14.0
|
|
[Episode 87030] reward=-120722792.7 actor_loss=0.3246 critic_loss=125520376832.0000 entropy=17.5879 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 87040] reward=-117387570.2 actor_loss=0.3066 critic_loss=118093790625.1852 entropy=17.5806 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 87040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554555.2 mean_steps=13.2
|
|
[Episode 87050] reward=-119514771.5 actor_loss=0.2212 critic_loss=122610154948.4651 entropy=17.5966 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 87060] reward=-113376079.0 actor_loss=0.2122 critic_loss=117202361587.8095 entropy=17.5910 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 87060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533627.2 mean_steps=14.2
|
|
[Episode 87070] reward=-121490106.5 actor_loss=0.3064 critic_loss=123183565846.7556 entropy=17.5901 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 87080] reward=-114915984.4 actor_loss=0.2957 critic_loss=116270945000.7273 entropy=17.5798 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 87080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-364403.1 mean_steps=16.6
|
|
[Episode 87090] reward=-122242553.7 actor_loss=0.1875 critic_loss=125556286385.2308 entropy=17.5760 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 87100] reward=-121930808.7 actor_loss=0.2779 critic_loss=150875656370.0869 entropy=17.5818 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 87100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438534.3 mean_steps=14.9
|
|
[Episode 87110] reward=-112334546.6 actor_loss=0.3392 critic_loss=107706526245.4634 entropy=17.5769 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 87120] reward=-114709726.9 actor_loss=0.3032 critic_loss=121247744496.4848 entropy=17.5972 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 87120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520233.0 mean_steps=13.3
|
|
[Episode 87130] reward=-116981286.8 actor_loss=0.2143 critic_loss=121513094972.9524 entropy=17.5907 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 87140] reward=-116802459.2 actor_loss=0.2811 critic_loss=118515023872.0000 entropy=17.5881 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 87140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-502034.1 mean_steps=12.2
|
|
[Episode 87150] reward=-116955379.4 actor_loss=0.2695 critic_loss=142047860660.1482 entropy=17.5955 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 87160] reward=-116071685.1 actor_loss=0.2813 critic_loss=121236560678.7879 entropy=17.5982 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 87160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-480080.6 mean_steps=15.6
|
|
[Episode 87170] reward=-114355265.5 actor_loss=0.3235 critic_loss=114025807416.8889 entropy=17.6079 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 87180] reward=-115826901.5 actor_loss=0.2669 critic_loss=116701962240.0000 entropy=17.5973 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 87180] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-690682.6 mean_steps=11.3
|
|
[Episode 87190] reward=-119506293.9 actor_loss=0.3044 critic_loss=124976882511.4483 entropy=17.5911 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 87200] reward=-111871340.3 actor_loss=0.3699 critic_loss=109580841155.0476 entropy=17.5876 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 87200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-634057.5 mean_steps=12.3
|
|
[Episode 87210] reward=-116223446.8 actor_loss=0.2890 critic_loss=110440001991.1111 entropy=17.6120 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 87220] reward=-111760088.4 actor_loss=0.3052 critic_loss=112803860571.0222 entropy=17.6147 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 87220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-629720.7 mean_steps=12.8
|
|
[Episode 87230] reward=-121613881.8 actor_loss=0.3470 critic_loss=126798989642.3226 entropy=17.5992 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 87240] reward=-118118840.2 actor_loss=0.3551 critic_loss=118826548535.6522 entropy=17.6027 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 87240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502283.9 mean_steps=14.3
|
|
[Episode 87250] reward=-115373089.4 actor_loss=0.2178 critic_loss=118003866282.6667 entropy=17.6022 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 87260] reward=-115735462.6 actor_loss=0.2818 critic_loss=115926677258.2400 entropy=17.5923 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 87260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-474095.4 mean_steps=15.8
|
|
[Episode 87270] reward=-116583537.9 actor_loss=0.2490 critic_loss=115072570533.1613 entropy=17.5875 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 87280] reward=-117868684.5 actor_loss=0.2795 critic_loss=116872223526.7879 entropy=17.6007 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 87280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483953.5 mean_steps=14.6
|
|
[Episode 87290] reward=-126395705.4 actor_loss=0.2644 critic_loss=930994473642.6666 entropy=17.6071 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 87300] reward=-112685761.9 actor_loss=0.2712 critic_loss=108937932500.2927 entropy=17.6056 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 87300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602963.8 mean_steps=12.7
|
|
[Episode 87310] reward=-117358385.4 actor_loss=0.3121 critic_loss=116178048837.8182 entropy=17.6040 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 87320] reward=-112491949.3 actor_loss=0.2812 critic_loss=114539317020.4444 entropy=17.6110 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 87320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465622.1 mean_steps=14.8
|
|
[Episode 87330] reward=-114906189.0 actor_loss=0.2296 critic_loss=117131032576.0000 entropy=17.6188 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 87340] reward=-115827519.8 actor_loss=0.2677 critic_loss=126944123503.3044 entropy=17.6271 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 87340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420896.6 mean_steps=14.6
|
|
[Episode 87350] reward=-114217965.9 actor_loss=0.3075 critic_loss=114897817873.0667 entropy=17.6336 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 87360] reward=-127426553.5 actor_loss=0.2252 critic_loss=428304155822.8293 entropy=17.6612 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 87360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397493.3 mean_steps=15.5
|
|
[Episode 87370] reward=-117373903.8 actor_loss=0.2969 critic_loss=119841317774.2222 entropy=17.6688 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 87380] reward=-121512088.6 actor_loss=0.2852 critic_loss=125495417605.6889 entropy=17.6927 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 87380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500071.5 mean_steps=14.9
|
|
[Episode 87390] reward=-112601170.2 actor_loss=0.3220 critic_loss=118232474510.2222 entropy=17.6873 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 87400] reward=-118994864.4 actor_loss=0.2827 critic_loss=121418658065.0667 entropy=17.6865 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 87400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446977.1 mean_steps=14.8
|
|
[Episode 87410] reward=-123597943.9 actor_loss=0.3305 critic_loss=1368176637451.3777 entropy=17.6852 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 87420] reward=-120337997.8 actor_loss=0.2996 critic_loss=131355678667.4872 entropy=17.6902 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 87420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359040.9 mean_steps=17.1
|
|
[Episode 87430] reward=-119392369.3 actor_loss=0.2590 critic_loss=126140649472.0000 entropy=17.6870 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 87440] reward=-120702263.6 actor_loss=0.2089 critic_loss=120223362048.0000 entropy=17.6876 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 87440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519143.7 mean_steps=13.1
|
|
[Episode 87450] reward=-117505223.5 actor_loss=0.2391 critic_loss=118449578894.2222 entropy=17.6925 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 87460] reward=-115278905.8 actor_loss=0.1620 critic_loss=118991753728.0000 entropy=17.6920 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 87460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515644.4 mean_steps=14.1
|
|
[Episode 87470] reward=-118788457.2 actor_loss=0.1973 critic_loss=121189147794.2857 entropy=17.6945 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 87480] reward=-117924866.7 actor_loss=0.3460 critic_loss=128699144601.6000 entropy=17.7060 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 87480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420226.5 mean_steps=16.6
|
|
[Episode 87490] reward=-118207764.7 actor_loss=0.2696 critic_loss=119615840886.1538 entropy=17.6992 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 87500] reward=-117434155.7 actor_loss=0.2574 critic_loss=116942518681.6000 entropy=17.6960 approx_kl=0.0105 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 87500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-483569.9 mean_steps=12.8
|
|
[Episode 87510] reward=-115727309.2 actor_loss=0.2566 critic_loss=116280011776.0000 entropy=17.6990 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 87520] reward=-117701838.2 actor_loss=0.3031 critic_loss=115153835082.9268 entropy=17.6869 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 87520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416810.8 mean_steps=15.8
|
|
[Episode 87530] reward=-120226550.4 actor_loss=0.2556 critic_loss=190016396492.8000 entropy=17.6826 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 87540] reward=-121799859.0 actor_loss=0.3318 critic_loss=126981127281.7778 entropy=17.6551 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 87540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521808.1 mean_steps=14.7
|
|
[Episode 87550] reward=-122386849.0 actor_loss=0.3056 critic_loss=125993959024.3902 entropy=17.6492 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 87560] reward=-119061461.2 actor_loss=0.2370 critic_loss=122825240667.0222 entropy=17.6431 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 87560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524614.8 mean_steps=12.8
|
|
[Episode 87570] reward=-114437344.6 actor_loss=0.3615 critic_loss=114105931366.4000 entropy=17.6497 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 87580] reward=-123987486.4 actor_loss=0.2465 critic_loss=130653045156.1026 entropy=17.6504 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 87580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550444.6 mean_steps=14.2
|
|
[Episode 87590] reward=-120446655.6 actor_loss=0.2112 critic_loss=121304179671.0400 entropy=17.6512 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 87600] reward=-119306558.4 actor_loss=0.3196 critic_loss=119217680883.5122 entropy=17.6549 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 87600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-624053.9 mean_steps=12.0
|
|
[Episode 87610] reward=-119233195.6 actor_loss=0.2554 critic_loss=125974349500.6316 entropy=17.6490 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 87620] reward=-123328653.7 actor_loss=0.2915 critic_loss=150661762048.0000 entropy=17.6582 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 87620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529779.1 mean_steps=14.0
|
|
[Episode 87630] reward=-126330047.2 actor_loss=0.2967 critic_loss=285539601066.6667 entropy=17.6507 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 87640] reward=-118101606.9 actor_loss=0.3499 critic_loss=260149646677.3333 entropy=17.6602 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 87640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-637784.8 mean_steps=15.3
|
|
[Episode 87650] reward=-114517164.3 actor_loss=0.2703 critic_loss=118015094528.0000 entropy=17.6529 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 87660] reward=-115809701.6 actor_loss=0.3058 critic_loss=162755715364.5714 entropy=17.6633 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 87660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454171.4 mean_steps=15.4
|
|
[Episode 87670] reward=-122814772.2 actor_loss=0.3025 critic_loss=134523554247.1111 entropy=17.6593 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 87680] reward=-116491322.1 actor_loss=0.3283 critic_loss=153063999674.1818 entropy=17.6550 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 87680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450021.2 mean_steps=14.6
|
|
[Episode 87690] reward=-117730646.9 actor_loss=0.2953 critic_loss=119241331164.2791 entropy=17.6546 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 87700] reward=-143898781.6 actor_loss=0.3213 critic_loss=3385433843513.8066 entropy=17.6425 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 87700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553777.6 mean_steps=13.3
|
|
[Episode 87710] reward=-121086418.7 actor_loss=0.3018 critic_loss=117696016716.1081 entropy=17.6386 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 87720] reward=-118722534.3 actor_loss=0.2455 critic_loss=116573394471.3846 entropy=17.6383 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 87720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553942.8 mean_steps=14.4
|
|
[Episode 87730] reward=-119589843.5 actor_loss=0.2969 critic_loss=132160064474.0741 entropy=17.6386 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 87740] reward=-110877488.3 actor_loss=0.3760 critic_loss=138011709618.0869 entropy=17.6352 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 87740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433621.6 mean_steps=14.8
|
|
[Episode 87750] reward=-115915776.0 actor_loss=0.3758 critic_loss=118298352025.6000 entropy=17.6259 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 87760] reward=-118057563.1 actor_loss=0.2633 critic_loss=121488439783.6190 entropy=17.6207 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 87760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-608088.2 mean_steps=12.0
|
|
[Episode 87770] reward=-118326602.8 actor_loss=0.2527 critic_loss=125487202735.1579 entropy=17.6032 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 87780] reward=-122031638.1 actor_loss=0.2982 critic_loss=124004320870.4000 entropy=17.6062 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 87780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-636654.4 mean_steps=13.2
|
|
[Episode 87790] reward=-118176887.0 actor_loss=0.3499 critic_loss=120887384837.6889 entropy=17.6209 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 87800] reward=-115556874.2 actor_loss=0.3301 critic_loss=114855458304.0000 entropy=17.6143 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 87800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-533865.9 mean_steps=15.2
|
|
[Episode 87810] reward=-120519829.8 actor_loss=0.3794 critic_loss=124900757819.0769 entropy=17.6260 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 87820] reward=-118580584.1 actor_loss=0.2978 critic_loss=116921626251.6364 entropy=17.6252 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 87820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-514096.8 mean_steps=12.2
|
|
[Episode 87830] reward=-113483121.0 actor_loss=0.2795 critic_loss=116567604932.9231 entropy=17.6316 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 87840] reward=-111958443.1 actor_loss=0.2866 critic_loss=106680323218.2857 entropy=17.6380 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 87840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626623.2 mean_steps=12.6
|
|
[Episode 87850] reward=-114561731.0 actor_loss=0.3172 critic_loss=124125707210.1053 entropy=17.6437 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 87860] reward=-120952346.5 actor_loss=0.2432 critic_loss=120314152550.4000 entropy=17.6443 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 87860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635647.8 mean_steps=11.9
|
|
[Episode 87870] reward=-120143836.6 actor_loss=0.2595 critic_loss=123695969792.0000 entropy=17.6436 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 87880] reward=-119539297.0 actor_loss=0.3205 critic_loss=116909383248.8421 entropy=17.6440 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 87880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468944.2 mean_steps=13.8
|
|
[Episode 87890] reward=-117569782.8 actor_loss=0.3075 critic_loss=202089648583.1111 entropy=17.6448 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 87900] reward=-111151482.1 actor_loss=0.2925 critic_loss=110163054733.2414 entropy=17.6508 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 87900] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-295991.5 mean_steps=18.4
|
|
[Episode 87910] reward=-123988191.5 actor_loss=0.2731 critic_loss=145973695829.3333 entropy=17.6418 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 87920] reward=-113251620.8 actor_loss=0.2560 critic_loss=116324866683.5862 entropy=17.6524 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 87920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-664721.0 mean_steps=12.0
|
|
[Episode 87930] reward=-114967721.6 actor_loss=0.3462 critic_loss=118467057974.3030 entropy=17.6619 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 87940] reward=-114418932.1 actor_loss=0.3325 critic_loss=113850493064.5333 entropy=17.6597 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 87940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551933.4 mean_steps=12.4
|
|
[Episode 87950] reward=-115329570.4 actor_loss=0.3354 critic_loss=117748910034.4889 entropy=17.6618 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 87960] reward=-110959136.7 actor_loss=0.3842 critic_loss=113642108770.4615 entropy=17.6562 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 87960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597746.6 mean_steps=12.7
|
|
[Episode 87970] reward=-116298855.7 actor_loss=0.3784 critic_loss=116713884732.2353 entropy=17.6541 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 87980] reward=-113188526.2 actor_loss=0.2814 critic_loss=116691492864.0000 entropy=17.6473 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 87980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551832.6 mean_steps=12.3
|
|
[Episode 87990] reward=-115305359.9 actor_loss=0.3242 critic_loss=115985925782.5882 entropy=17.6584 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 88000] reward=-114070331.0 actor_loss=0.2993 critic_loss=116705801011.2000 entropy=17.6513 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 88000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587745.5 mean_steps=13.3
|
|
[Episode 88010] reward=-121411947.5 actor_loss=0.2231 critic_loss=125702526293.3333 entropy=17.6444 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 88020] reward=-117341833.4 actor_loss=0.2719 critic_loss=120925573492.3636 entropy=17.6586 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 88020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-634993.6 mean_steps=12.8
|
|
[Episode 88030] reward=-117286566.1 actor_loss=0.2864 critic_loss=127974778302.3590 entropy=17.6688 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 88040] reward=-116725658.9 actor_loss=0.2352 critic_loss=121687840995.5556 entropy=17.6619 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 88040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-408227.2 mean_steps=16.2
|
|
[Episode 88050] reward=-115988161.2 actor_loss=0.2813 critic_loss=115533372958.1176 entropy=17.6678 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 88060] reward=-117791634.8 actor_loss=0.2469 critic_loss=122399358065.7778 entropy=17.6788 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 88060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-415438.0 mean_steps=15.2
|
|
[Episode 88070] reward=-113329083.6 actor_loss=0.1556 critic_loss=116735747358.7200 entropy=17.6803 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 88080] reward=-121781458.7 actor_loss=0.3101 critic_loss=129853377584.7619 entropy=17.6648 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 88080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-561645.8 mean_steps=12.6
|
|
[Episode 88090] reward=-111407002.0 actor_loss=0.3962 critic_loss=167414043569.2308 entropy=17.6690 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 88100] reward=-114030620.6 actor_loss=0.3804 critic_loss=115762212278.8571 entropy=17.6713 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 88100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474795.1 mean_steps=15.1
|
|
[Episode 88110] reward=-114286375.2 actor_loss=0.3392 critic_loss=143327016618.6667 entropy=17.6665 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 88120] reward=-119763894.0 actor_loss=0.3675 critic_loss=212898097834.6667 entropy=17.6621 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 88120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-659239.8 mean_steps=12.9
|
|
[Episode 88130] reward=-120267201.1 actor_loss=0.3414 critic_loss=252218267153.6552 entropy=17.6721 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 88140] reward=-118590535.6 actor_loss=0.2634 critic_loss=120353270930.2857 entropy=17.6890 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 88140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492440.0 mean_steps=14.0
|
|
[Episode 88150] reward=-121076021.4 actor_loss=0.1948 critic_loss=130255527936.0000 entropy=17.6800 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 88160] reward=-115980093.2 actor_loss=0.4333 critic_loss=128391191040.0000 entropy=17.6827 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 88160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516022.3 mean_steps=13.2
|
|
[Episode 88170] reward=-117741188.6 actor_loss=0.2200 critic_loss=119571466426.1818 entropy=17.6804 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 88180] reward=-115886302.3 actor_loss=0.3334 critic_loss=114375877518.2222 entropy=17.6790 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 88180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538152.5 mean_steps=14.1
|
|
[Episode 88190] reward=-115813309.0 actor_loss=0.3924 critic_loss=118034343086.8293 entropy=17.6788 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 88200] reward=-112272222.0 actor_loss=0.3041 critic_loss=114436992731.4286 entropy=17.6690 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 88200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403020.4 mean_steps=15.2
|
|
[Episode 88210] reward=-114748710.2 actor_loss=0.2580 critic_loss=121150986082.4615 entropy=17.6710 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 88220] reward=-118043093.4 actor_loss=0.2881 critic_loss=122797333747.8095 entropy=17.6758 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 88220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438322.0 mean_steps=15.2
|
|
[Episode 88230] reward=-119896966.7 actor_loss=0.3853 critic_loss=121847163289.6000 entropy=17.6838 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 88240] reward=-117737908.1 actor_loss=0.2751 critic_loss=126583374953.9310 entropy=17.6863 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 88240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-601333.2 mean_steps=13.9
|
|
[Episode 88250] reward=-118092934.5 actor_loss=0.3404 critic_loss=119653042312.5333 entropy=17.6860 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 88260] reward=-116507740.9 actor_loss=0.3829 critic_loss=121551067136.0000 entropy=17.6617 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 88260] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-324737.6 mean_steps=17.8
|
|
[Episode 88270] reward=-115759981.1 actor_loss=0.3244 critic_loss=117661671751.6800 entropy=17.6697 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 88280] reward=-116692072.6 actor_loss=0.3063 critic_loss=121569257244.4444 entropy=17.6827 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 88280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460526.7 mean_steps=14.7
|
|
[Episode 88290] reward=-119663280.2 actor_loss=0.3010 critic_loss=121001216801.3913 entropy=17.6871 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 88300] reward=-122522462.1 actor_loss=0.2219 critic_loss=125025213683.8095 entropy=17.6802 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 88300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447885.3 mean_steps=14.6
|
|
[Episode 88310] reward=-115474467.4 actor_loss=0.2886 critic_loss=118646191010.9091 entropy=17.6813 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 88320] reward=-110933753.8 actor_loss=0.4410 critic_loss=122761948637.8667 entropy=17.6815 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 88320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-611164.5 mean_steps=11.9
|
|
[Episode 88330] reward=-115252635.3 actor_loss=0.3253 critic_loss=116795494673.0667 entropy=17.6847 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 88340] reward=-119034819.1 actor_loss=0.3203 critic_loss=118729347250.0870 entropy=17.6919 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 88340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-394044.5 mean_steps=17.1
|
|
[Episode 88350] reward=-112553713.8 actor_loss=0.2662 critic_loss=109861890730.6667 entropy=17.7057 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 88360] reward=-123742659.4 actor_loss=0.2561 critic_loss=128428386665.4118 entropy=17.6882 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 88360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482321.3 mean_steps=14.7
|
|
[Episode 88370] reward=-116851588.4 actor_loss=0.3055 critic_loss=114283147910.7368 entropy=17.6736 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 88380] reward=-121778907.0 actor_loss=0.2513 critic_loss=124294146340.5714 entropy=17.6754 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 88380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474551.5 mean_steps=13.7
|
|
[Episode 88390] reward=-114741140.0 actor_loss=0.3021 critic_loss=119957811291.0222 entropy=17.6718 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 88400] reward=-118583012.6 actor_loss=0.2917 critic_loss=121885800857.6000 entropy=17.6636 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 88400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-551534.8 mean_steps=11.7
|
|
[Episode 88410] reward=-112573614.6 actor_loss=0.3145 critic_loss=116726185437.8667 entropy=17.6647 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 88420] reward=-118047252.7 actor_loss=0.2516 critic_loss=124871567918.5455 entropy=17.6795 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 88420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-400778.7 mean_steps=14.5
|
|
[Episode 88430] reward=-118181745.1 actor_loss=0.3073 critic_loss=116344091079.1111 entropy=17.6848 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 88440] reward=-117567197.8 actor_loss=0.2999 critic_loss=122125834842.3529 entropy=17.6879 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 88440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570061.1 mean_steps=13.4
|
|
[Episode 88450] reward=-115115339.9 actor_loss=0.2665 critic_loss=116164803662.7692 entropy=17.6884 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 88460] reward=-116825187.4 actor_loss=0.3746 critic_loss=115867805137.4545 entropy=17.6706 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 88460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591096.5 mean_steps=12.8
|
|
[Episode 88470] reward=-117308645.4 actor_loss=0.3838 critic_loss=113255053721.6000 entropy=17.6626 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 88480] reward=-115942892.0 actor_loss=0.3577 critic_loss=120143874476.6512 entropy=17.6456 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 88480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613713.3 mean_steps=12.8
|
|
[Episode 88490] reward=-116941253.4 actor_loss=0.2799 critic_loss=116686748482.3704 entropy=17.6398 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 88500] reward=-115493487.0 actor_loss=0.3602 critic_loss=111792880981.3333 entropy=17.6247 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 88500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536741.0 mean_steps=13.4
|
|
[Episode 88510] reward=-116431073.6 actor_loss=0.2261 critic_loss=119269361176.3810 entropy=17.6229 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 88520] reward=-117302636.9 actor_loss=0.3013 critic_loss=118815156410.1818 entropy=17.6201 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 88520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521888.2 mean_steps=14.6
|
|
[Episode 88530] reward=-115922551.5 actor_loss=0.2854 critic_loss=126069359762.2857 entropy=17.6128 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 88540] reward=-117019928.9 actor_loss=0.3336 critic_loss=118650160896.0000 entropy=17.6141 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 88540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545540.5 mean_steps=13.4
|
|
[Episode 88550] reward=-292723247.3 actor_loss=0.9317 critic_loss=107543376628784.7656 entropy=17.6174 approx_kl=0.0028 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 88560] reward=-116602743.9 actor_loss=0.2396 critic_loss=113259055349.7600 entropy=17.6174 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 88560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-573764.1 mean_steps=14.7
|
|
[Episode 88570] reward=-116019406.8 actor_loss=0.2108 critic_loss=121812927244.1905 entropy=17.6260 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 88580] reward=-116128331.9 actor_loss=0.3157 critic_loss=143683246610.9630 entropy=17.6268 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 88580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488604.5 mean_steps=13.9
|
|
[Episode 88590] reward=-116209202.4 actor_loss=0.2261 critic_loss=116006149296.5517 entropy=17.6206 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 88600] reward=-118161910.4 actor_loss=0.3498 critic_loss=126854812160.0000 entropy=17.6216 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 88600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484706.4 mean_steps=13.8
|
|
[Episode 88610] reward=-110783504.2 actor_loss=0.2897 critic_loss=115673739755.5200 entropy=17.6118 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 88620] reward=-119613101.3 actor_loss=0.3346 critic_loss=122346936792.6154 entropy=17.6170 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 88620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509439.5 mean_steps=13.8
|
|
[Episode 88630] reward=-119935244.4 actor_loss=0.3140 critic_loss=131060317986.5946 entropy=17.6226 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 88640] reward=-126594618.9 actor_loss=0.4293 critic_loss=837336817033.8462 entropy=17.6184 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 88640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-466020.6 mean_steps=15.3
|
|
[Episode 88650] reward=-116716322.3 actor_loss=0.2584 critic_loss=119520568785.4545 entropy=17.6167 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 88660] reward=-114199019.1 actor_loss=0.2494 critic_loss=126163041523.8095 entropy=17.5937 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 88660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404991.6 mean_steps=16.1
|
|
[Episode 88670] reward=-114767762.6 actor_loss=0.2856 critic_loss=116968256433.2308 entropy=17.6111 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 88680] reward=-110301158.3 actor_loss=0.4394 critic_loss=112272628394.6667 entropy=17.6034 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 88680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-565498.8 mean_steps=12.4
|
|
[Episode 88690] reward=-118329766.2 actor_loss=0.3038 critic_loss=116682082183.5294 entropy=17.5908 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 88700] reward=-105174623.8 actor_loss=0.2965 critic_loss=108427694592.0000 entropy=17.5799 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 88700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585355.5 mean_steps=12.8
|
|
[Episode 88710] reward=-120837115.8 actor_loss=0.2973 critic_loss=120467385163.2941 entropy=17.5642 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 88720] reward=-118959710.7 actor_loss=0.2933 critic_loss=121120774553.6000 entropy=17.5706 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 88720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-512626.0 mean_steps=15.2
|
|
[Episode 88730] reward=-115813915.6 actor_loss=0.2477 critic_loss=110848481689.6000 entropy=17.5503 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 88740] reward=-113234994.9 actor_loss=0.2819 critic_loss=111094302208.0000 entropy=17.5608 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 88740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-342792.6 mean_steps=16.6
|
|
[Episode 88750] reward=-118311241.4 actor_loss=0.3068 critic_loss=118441720738.9091 entropy=17.5726 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 88760] reward=-115691390.9 actor_loss=0.3169 critic_loss=110995958988.8000 entropy=17.5721 approx_kl=0.0105 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 88760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-523036.6 mean_steps=14.9
|
|
[Episode 88770] reward=-119074633.9 actor_loss=0.2773 critic_loss=122293460406.8571 entropy=17.5774 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 88780] reward=-122207226.2 actor_loss=0.3855 critic_loss=127081302272.0000 entropy=17.5756 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 88780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445637.2 mean_steps=14.3
|
|
[Episode 88790] reward=-114452202.7 actor_loss=0.2515 critic_loss=113421280656.6956 entropy=17.5813 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 88800] reward=-113516389.1 actor_loss=0.3522 critic_loss=110570039188.2105 entropy=17.5832 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 88800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544141.3 mean_steps=12.9
|
|
[Episode 88810] reward=-117278538.4 actor_loss=0.3169 critic_loss=118773625452.6061 entropy=17.5835 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 88820] reward=-119725788.5 actor_loss=0.2269 critic_loss=116215676928.0000 entropy=17.5825 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 88820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509202.2 mean_steps=13.9
|
|
[Episode 88830] reward=-114076863.1 actor_loss=0.3306 critic_loss=112465060957.0909 entropy=17.5674 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 88840] reward=-120015417.6 actor_loss=0.1961 critic_loss=119278184448.0000 entropy=17.5635 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 88840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-427588.0 mean_steps=16.3
|
|
[Episode 88850] reward=-116303480.4 actor_loss=0.2669 critic_loss=114767079531.7895 entropy=17.5703 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 88860] reward=-113547977.9 actor_loss=0.3334 critic_loss=114007660589.5111 entropy=17.5605 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 88860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503796.0 mean_steps=13.9
|
|
[Episode 88870] reward=-114578321.7 actor_loss=0.3264 critic_loss=111647371810.1333 entropy=17.5658 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 88880] reward=-119978622.9 actor_loss=0.2850 critic_loss=119726291626.6667 entropy=17.5711 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 88880] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-712221.3 mean_steps=10.7
|
|
[Episode 88890] reward=-120211565.9 actor_loss=0.2505 critic_loss=120055203281.4545 entropy=17.5692 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 88900] reward=-118526025.6 actor_loss=0.1135 critic_loss=117460389515.6364 entropy=17.5737 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 88900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-617722.4 mean_steps=13.1
|
|
[Episode 88910] reward=-115633945.8 actor_loss=0.3191 critic_loss=117920198164.4800 entropy=17.5692 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 88920] reward=-115429690.9 actor_loss=0.3923 critic_loss=114491139754.6667 entropy=17.5608 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 88920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419182.9 mean_steps=15.0
|
|
[Episode 88930] reward=-117679388.6 actor_loss=0.1548 critic_loss=118178909661.8667 entropy=17.5653 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 88940] reward=-111628787.9 actor_loss=0.3966 critic_loss=109687474176.0000 entropy=17.5681 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 88940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520971.4 mean_steps=14.2
|
|
[Episode 88950] reward=-119662557.0 actor_loss=0.2396 critic_loss=116523297177.6000 entropy=17.5737 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 88960] reward=-118577230.4 actor_loss=0.2359 critic_loss=125624820318.8148 entropy=17.5712 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 88960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483844.7 mean_steps=13.9
|
|
[Episode 88970] reward=-116905601.4 actor_loss=0.2091 critic_loss=117799719936.0000 entropy=17.5601 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 88980] reward=-112145832.3 actor_loss=0.3078 critic_loss=113805315810.2326 entropy=17.5628 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 88980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-456493.9 mean_steps=15.4
|
|
[Episode 88990] reward=-121018044.2 actor_loss=0.2609 critic_loss=125527444781.1765 entropy=17.5528 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 89000] reward=-113965855.7 actor_loss=0.2538 critic_loss=131081534464.0000 entropy=17.5539 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 89000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463309.1 mean_steps=14.4
|
|
[Episode 89010] reward=-118653694.9 actor_loss=0.4327 critic_loss=122235264161.6842 entropy=17.5516 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 89020] reward=-116055661.6 actor_loss=0.2724 critic_loss=119142279047.5294 entropy=17.5569 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 89020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486033.4 mean_steps=13.6
|
|
[Episode 89030] reward=-121396110.1 actor_loss=0.2599 critic_loss=122030155912.5333 entropy=17.5589 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 89040] reward=-117472841.7 actor_loss=0.3032 critic_loss=120020602564.9231 entropy=17.5534 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 89040] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-298185.9 mean_steps=17.1
|
|
[Episode 89050] reward=-121208534.7 actor_loss=0.2205 critic_loss=121430494139.7333 entropy=17.5652 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 89060] reward=-116107825.2 actor_loss=0.3478 critic_loss=114000934775.4667 entropy=17.5708 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 89060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555322.6 mean_steps=13.4
|
|
[Episode 89070] reward=-120018905.1 actor_loss=0.2648 critic_loss=124849108764.4444 entropy=17.5489 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 89080] reward=-123864347.2 actor_loss=0.3116 critic_loss=125271529881.6000 entropy=17.5467 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 89080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470683.5 mean_steps=13.7
|
|
[Episode 89090] reward=-118026086.2 actor_loss=0.2372 critic_loss=119074229589.3333 entropy=17.5508 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 89100] reward=-118434068.9 actor_loss=0.3705 critic_loss=118122508288.0000 entropy=17.5505 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 89100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517546.0 mean_steps=13.8
|
|
[Episode 89110] reward=-119458339.1 actor_loss=0.2861 critic_loss=120480958553.0435 entropy=17.5466 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 89120] reward=-122368045.6 actor_loss=0.3182 critic_loss=121089667364.5714 entropy=17.5420 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 89120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502838.5 mean_steps=13.7
|
|
[Episode 89130] reward=-116797754.2 actor_loss=0.2409 critic_loss=118833063936.0000 entropy=17.5492 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 89140] reward=-114437245.2 actor_loss=0.2789 critic_loss=112828561221.8182 entropy=17.5432 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 89140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464125.1 mean_steps=13.9
|
|
[Episode 89150] reward=-116775602.2 actor_loss=0.2880 critic_loss=122090688884.3636 entropy=17.5434 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 89160] reward=-118327527.5 actor_loss=0.2834 critic_loss=121481937408.0000 entropy=17.5341 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 89160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-495787.4 mean_steps=15.8
|
|
[Episode 89170] reward=-115232197.0 actor_loss=0.3009 critic_loss=116689358632.4211 entropy=17.5323 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 89180] reward=-110397353.4 actor_loss=0.3052 critic_loss=107252600558.9333 entropy=17.5341 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 89180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535993.1 mean_steps=13.5
|
|
[Episode 89190] reward=-118371706.9 actor_loss=0.3162 critic_loss=120157018112.0000 entropy=17.5314 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 89200] reward=-113022235.3 actor_loss=0.3503 critic_loss=113104819151.2381 entropy=17.5370 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 89200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635842.6 mean_steps=12.0
|
|
[Episode 89210] reward=-118285222.8 actor_loss=0.2478 critic_loss=125082814976.0000 entropy=17.5214 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 89220] reward=-115121812.6 actor_loss=0.2919 critic_loss=117076986538.6667 entropy=17.5170 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 89220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-673223.2 mean_steps=12.3
|
|
[Episode 89230] reward=-112789009.5 actor_loss=0.3058 critic_loss=113968825230.2222 entropy=17.5113 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 89240] reward=-113873707.0 actor_loss=0.3336 critic_loss=112872079750.0952 entropy=17.5010 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 89240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-427659.9 mean_steps=17.1
|
|
[Episode 89250] reward=-116685381.0 actor_loss=0.3235 critic_loss=115944436470.5185 entropy=17.4943 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 89260] reward=-110295263.4 actor_loss=0.3462 critic_loss=110614093095.8222 entropy=17.4860 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 89260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521372.2 mean_steps=13.4
|
|
[Episode 89270] reward=-114820962.7 actor_loss=0.3552 critic_loss=113888207360.0000 entropy=17.4813 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 89280] reward=-114914867.1 actor_loss=0.2652 critic_loss=110178988032.0000 entropy=17.4839 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 89280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-606820.1 mean_steps=13.7
|
|
[Episode 89290] reward=-117383673.4 actor_loss=0.3419 critic_loss=116604374272.0000 entropy=17.4752 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 89300] reward=-114747256.8 actor_loss=0.4096 critic_loss=114144774729.1429 entropy=17.4871 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 89300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470146.9 mean_steps=13.8
|
|
[Episode 89310] reward=-107640741.8 actor_loss=0.4835 critic_loss=107575383927.4667 entropy=17.4830 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 89320] reward=-108417265.1 actor_loss=0.2969 critic_loss=107055320112.7619 entropy=17.4841 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 89320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487947.8 mean_steps=14.3
|
|
[Episode 89330] reward=-116383584.0 actor_loss=0.3021 critic_loss=114528018432.0000 entropy=17.4933 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 89340] reward=-114338881.5 actor_loss=0.2156 critic_loss=112680492032.0000 entropy=17.4927 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 89340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508480.9 mean_steps=12.8
|
|
[Episode 89350] reward=-116678499.5 actor_loss=0.2348 critic_loss=112500365870.5455 entropy=17.4903 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 89360] reward=-114806899.7 actor_loss=0.3600 critic_loss=111377883526.0952 entropy=17.4923 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 89360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543508.4 mean_steps=13.3
|
|
[Episode 89370] reward=-114940906.8 actor_loss=0.4561 critic_loss=113697619968.0000 entropy=17.4911 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 89380] reward=-111095539.3 actor_loss=0.2765 critic_loss=107316442756.7407 entropy=17.4824 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 89380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607698.5 mean_steps=12.8
|
|
[Episode 89390] reward=-115196402.9 actor_loss=0.3356 critic_loss=112829602529.2800 entropy=17.4792 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 89400] reward=-116854572.0 actor_loss=0.3154 critic_loss=119470574498.9091 entropy=17.4785 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 89400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-388651.2 mean_steps=14.7
|
|
[Episode 89410] reward=-116812167.9 actor_loss=0.2728 critic_loss=153336643584.0000 entropy=17.4813 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 89420] reward=-113390416.3 actor_loss=0.2512 critic_loss=107444795030.5882 entropy=17.4750 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 89420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455050.8 mean_steps=14.6
|
|
[Episode 89430] reward=-115045915.3 actor_loss=0.2983 critic_loss=114209097318.4000 entropy=17.4900 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 89440] reward=-118764066.6 actor_loss=0.3398 critic_loss=120044559701.3333 entropy=17.4979 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 89440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550812.3 mean_steps=13.5
|
|
[Episode 89450] reward=-113421450.3 actor_loss=0.3442 critic_loss=117507513821.8667 entropy=17.4775 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 89460] reward=-108310343.7 actor_loss=0.3489 critic_loss=102501600824.8889 entropy=17.4816 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 89460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631372.5 mean_steps=12.1
|
|
[Episode 89470] reward=-115431963.9 actor_loss=0.2076 critic_loss=113847718707.2000 entropy=17.4828 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 89480] reward=-114468523.0 actor_loss=0.2851 critic_loss=115251475377.2308 entropy=17.4716 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 89480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512291.4 mean_steps=13.6
|
|
[Episode 89490] reward=-117278451.2 actor_loss=0.2050 critic_loss=120241963300.5714 entropy=17.4699 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 89500] reward=-115389552.9 actor_loss=0.3477 critic_loss=113565006754.9091 entropy=17.4668 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 89500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-652584.4 mean_steps=13.2
|
|
[Episode 89510] reward=-116791829.9 actor_loss=0.2921 critic_loss=119557756820.2105 entropy=17.4653 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 89520] reward=-114555474.8 actor_loss=0.3274 critic_loss=124663379285.3333 entropy=17.4645 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 89520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430212.5 mean_steps=15.3
|
|
[Episode 89530] reward=-112948675.8 actor_loss=0.3572 critic_loss=116145048576.0000 entropy=17.4651 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 89540] reward=-119522833.2 actor_loss=0.2179 critic_loss=127897990875.4286 entropy=17.4710 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 89540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506803.7 mean_steps=12.8
|
|
[Episode 89550] reward=-113175387.0 actor_loss=0.2135 critic_loss=116269986452.6452 entropy=17.4643 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 89560] reward=-114062621.8 actor_loss=0.2152 critic_loss=118097468893.8667 entropy=17.4495 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 89560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532413.3 mean_steps=14.3
|
|
[Episode 89570] reward=-116221792.2 actor_loss=0.2285 critic_loss=116305620536.8889 entropy=17.4454 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 89580] reward=-110833015.3 actor_loss=0.3144 critic_loss=112481001472.0000 entropy=17.4458 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 89580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-541718.5 mean_steps=12.2
|
|
[Episode 89590] reward=-116373397.8 actor_loss=0.2636 critic_loss=121447198720.0000 entropy=17.4455 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 89600] reward=-115225835.4 actor_loss=0.3011 critic_loss=116744776850.2857 entropy=17.4554 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 89600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448108.1 mean_steps=14.8
|
|
[Episode 89610] reward=-113090208.5 actor_loss=0.3615 critic_loss=121182840921.0435 entropy=17.4459 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 89620] reward=-116329217.0 actor_loss=0.2890 critic_loss=109489908297.1429 entropy=17.4441 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 89620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418784.5 mean_steps=15.3
|
|
[Episode 89630] reward=-118045128.1 actor_loss=0.2760 critic_loss=111742978340.5714 entropy=17.4341 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 89640] reward=-116682840.0 actor_loss=0.2832 critic_loss=112171172897.0323 entropy=17.4299 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 89640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-307727.8 mean_steps=16.7
|
|
[Episode 89650] reward=-118128358.8 actor_loss=0.3236 critic_loss=117457975572.7568 entropy=17.4264 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 89660] reward=-110730291.7 actor_loss=0.3021 critic_loss=109112193609.1429 entropy=17.4196 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 89660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581206.5 mean_steps=12.6
|
|
[Episode 89670] reward=-109328741.2 actor_loss=0.3871 critic_loss=109525834240.0000 entropy=17.4159 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 89680] reward=-118959676.2 actor_loss=0.2762 critic_loss=117264263577.6000 entropy=17.4268 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 89680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511990.8 mean_steps=14.8
|
|
[Episode 89690] reward=-118242353.3 actor_loss=0.2252 critic_loss=120636596770.1333 entropy=17.4200 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 89700] reward=-111316847.6 actor_loss=0.2920 critic_loss=113363958169.6000 entropy=17.4286 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 89700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471550.3 mean_steps=14.6
|
|
[Episode 89710] reward=-111421128.7 actor_loss=0.2921 critic_loss=109882521867.1304 entropy=17.4337 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 89720] reward=-117868734.5 actor_loss=0.2499 critic_loss=119234339986.2857 entropy=17.4414 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 89720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432618.8 mean_steps=14.4
|
|
[Episode 89730] reward=-118810329.3 actor_loss=0.2955 critic_loss=275347198976.0000 entropy=17.4374 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 89740] reward=-286491724.8 actor_loss=0.3139 critic_loss=96299765814613.3281 entropy=17.4575 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 89740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554669.0 mean_steps=13.1
|
|
[Episode 89750] reward=-112161935.4 actor_loss=0.3144 critic_loss=108774002903.5789 entropy=17.4589 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 89760] reward=-110694066.4 actor_loss=0.2967 critic_loss=108011414764.3077 entropy=17.4698 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 89760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-687386.3 mean_steps=12.4
|
|
[Episode 89770] reward=-124156552.6 actor_loss=0.1932 critic_loss=129379002686.5778 entropy=17.4786 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 89780] reward=-117647410.9 actor_loss=0.3210 critic_loss=116672812106.9268 entropy=17.4944 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 89780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591582.0 mean_steps=12.9
|
|
[Episode 89790] reward=-117764208.8 actor_loss=0.2189 critic_loss=114278035651.0476 entropy=17.4885 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 89800] reward=-118159423.1 actor_loss=0.2986 critic_loss=118691153920.0000 entropy=17.4923 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 89800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550267.6 mean_steps=12.7
|
|
[Episode 89810] reward=-114473657.6 actor_loss=0.3375 critic_loss=114462708027.0769 entropy=17.4974 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 89820] reward=-114074194.1 actor_loss=0.2835 critic_loss=114549929073.7778 entropy=17.4963 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 89820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404978.0 mean_steps=15.8
|
|
[Episode 89830] reward=-119679310.7 actor_loss=0.3083 critic_loss=535951018363.2593 entropy=17.5082 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 89840] reward=-111768403.6 actor_loss=0.2543 critic_loss=111346915141.8182 entropy=17.5131 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 89840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614519.3 mean_steps=12.8
|
|
[Episode 89850] reward=-117700298.8 actor_loss=0.2870 critic_loss=119135177993.4815 entropy=17.5168 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 89860] reward=-117086060.9 actor_loss=0.4114 critic_loss=119816399985.7778 entropy=17.5261 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 89860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-4625249.0 mean_steps=22.2
|
|
[Episode 89870] reward=-111103837.7 actor_loss=0.3039 critic_loss=117084565876.3636 entropy=17.5271 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 89880] reward=-113996258.1 actor_loss=0.4300 critic_loss=113802244096.0000 entropy=17.5271 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 89880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-597617.7 mean_steps=13.4
|
|
[Episode 89890] reward=-157531425.4 actor_loss=0.3063 critic_loss=5565540856991.2891 entropy=17.5254 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 89900] reward=-117533718.5 actor_loss=0.2673 critic_loss=122900149301.8947 entropy=17.5338 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 89900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514202.0 mean_steps=13.9
|
|
[Episode 89910] reward=-118479884.1 actor_loss=0.2092 critic_loss=118284857476.1290 entropy=17.5280 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 89920] reward=-118120958.9 actor_loss=0.3520 critic_loss=152307434896.6956 entropy=17.5209 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 89920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458761.7 mean_steps=15.7
|
|
[Episode 89930] reward=-115117974.0 actor_loss=0.2445 critic_loss=111922501973.3333 entropy=17.5204 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 89940] reward=-118926538.8 actor_loss=0.2680 critic_loss=121265588153.3793 entropy=17.5240 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 89940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435953.1 mean_steps=15.8
|
|
[Episode 89950] reward=-119663488.0 actor_loss=0.2108 critic_loss=119815265304.9756 entropy=17.5189 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 89960] reward=-110554152.7 actor_loss=0.3170 critic_loss=114575327573.3333 entropy=17.5199 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 89960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-543100.9 mean_steps=14.2
|
|
[Episode 89970] reward=-119400569.2 actor_loss=0.3284 critic_loss=141688472497.2308 entropy=17.5304 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 89980] reward=-115425456.1 actor_loss=0.3362 critic_loss=123837746082.9091 entropy=17.5197 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 89980] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-677942.9 mean_steps=10.7
|
|
[Episode 89990] reward=-114929359.7 actor_loss=0.2926 critic_loss=115785914777.6000 entropy=17.5233 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 90000] reward=-111839613.5 actor_loss=0.3375 critic_loss=120083721784.8889 entropy=17.5302 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 90000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475206.2 mean_steps=13.9
|
|
[Episode 90010] reward=-114727188.5 actor_loss=0.2985 critic_loss=112601581257.6970 entropy=17.5375 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 90020] reward=-118760684.3 actor_loss=0.2952 critic_loss=118672424738.5946 entropy=17.5367 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 90020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637023.6 mean_steps=11.9
|
|
[Episode 90030] reward=-114528201.5 actor_loss=0.2696 critic_loss=114908083541.3333 entropy=17.5338 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 90040] reward=-113010071.0 actor_loss=0.3399 critic_loss=109384715581.7931 entropy=17.5670 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 90040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417354.8 mean_steps=15.2
|
|
[Episode 90050] reward=-120772100.7 actor_loss=0.3517 critic_loss=123126946876.2353 entropy=17.5647 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 90060] reward=-114589368.8 actor_loss=0.3580 critic_loss=117916311808.0000 entropy=17.5672 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 90060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-504517.7 mean_steps=12.9
|
|
[Episode 90070] reward=-110497877.4 actor_loss=0.2867 critic_loss=110856440402.5806 entropy=17.5605 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 90080] reward=-118565094.9 actor_loss=0.3434 critic_loss=119825618534.4000 entropy=17.5552 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 90080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431724.7 mean_steps=15.2
|
|
[Episode 90090] reward=-120855090.6 actor_loss=0.2673 critic_loss=123973403443.2000 entropy=17.5613 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 90100] reward=-114617066.2 actor_loss=0.3699 critic_loss=115648435931.4286 entropy=17.5683 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 90100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523118.1 mean_steps=13.2
|
|
[Episode 90110] reward=-112173842.2 actor_loss=0.2746 critic_loss=109124166724.2667 entropy=17.5678 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 90120] reward=-118064360.5 actor_loss=0.2687 critic_loss=117485427097.6000 entropy=17.5647 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 90120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531547.7 mean_steps=13.2
|
|
[Episode 90130] reward=-122724834.4 actor_loss=0.2722 critic_loss=121105327321.2121 entropy=17.5629 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 90140] reward=-114816698.1 actor_loss=0.3644 critic_loss=113753506474.6667 entropy=17.5649 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 90140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426833.9 mean_steps=15.2
|
|
[Episode 90150] reward=-114954877.4 actor_loss=0.3302 critic_loss=113590573738.6667 entropy=17.5819 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 90160] reward=-115668456.3 actor_loss=0.2423 critic_loss=141079299686.4000 entropy=17.5883 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 90160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489421.2 mean_steps=15.1
|
|
[Episode 90170] reward=-119744091.3 actor_loss=0.2374 critic_loss=123034424832.0000 entropy=17.5963 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 90180] reward=-110199407.1 actor_loss=0.4058 critic_loss=110839004013.7143 entropy=17.5996 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 90180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-410803.8 mean_steps=14.0
|
|
[Episode 90190] reward=-117263629.4 actor_loss=0.2762 critic_loss=126268708864.0000 entropy=17.5947 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 90200] reward=-112855370.9 actor_loss=0.2489 critic_loss=112722134853.8182 entropy=17.5929 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 90200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-540657.1 mean_steps=12.3
|
|
[Episode 90210] reward=-114321599.2 actor_loss=0.3974 critic_loss=116952904265.1429 entropy=17.5982 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 90220] reward=-121577914.9 actor_loss=0.2841 critic_loss=123088873390.0800 entropy=17.6013 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 90220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621862.9 mean_steps=11.8
|
|
[Episode 90230] reward=-116406167.7 actor_loss=0.3128 critic_loss=117163627985.4545 entropy=17.6033 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 90240] reward=-113437787.6 actor_loss=0.3611 critic_loss=122083234566.9189 entropy=17.6088 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 90240] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-330814.1 mean_steps=17.5
|
|
[Episode 90250] reward=-113579650.5 actor_loss=0.2336 critic_loss=111881357653.3333 entropy=17.5998 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 90260] reward=-116904792.4 actor_loss=0.2845 critic_loss=117899205632.0000 entropy=17.5995 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 90260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563516.2 mean_steps=12.8
|
|
[Episode 90270] reward=-112061298.6 actor_loss=0.3350 critic_loss=117029031563.6364 entropy=17.5925 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 90280] reward=-106210899.3 actor_loss=0.2735 critic_loss=103102415758.2222 entropy=17.5778 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 90280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451204.9 mean_steps=15.9
|
|
[Episode 90290] reward=-115131955.2 actor_loss=0.3311 critic_loss=118963390675.8621 entropy=17.5819 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 90300] reward=-111138944.3 actor_loss=0.2909 critic_loss=114481210327.0400 entropy=17.5884 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 90300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-411216.9 mean_steps=16.2
|
|
[Episode 90310] reward=-115699219.9 actor_loss=0.3752 critic_loss=126656647168.0000 entropy=17.5740 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 90320] reward=-113576585.5 actor_loss=0.3484 critic_loss=109013910032.5161 entropy=17.5797 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 90320] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-756663.7 mean_steps=10.0
|
|
[Episode 90330] reward=-118054837.6 actor_loss=0.2486 critic_loss=120185162800.7619 entropy=17.5814 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 90340] reward=-116156773.1 actor_loss=0.3428 critic_loss=116899029902.2222 entropy=17.5742 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 90340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573671.2 mean_steps=13.7
|
|
[Episode 90350] reward=-114219514.3 actor_loss=0.2445 critic_loss=112263378042.8800 entropy=17.5888 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 90360] reward=-118626719.7 actor_loss=0.4033 critic_loss=119077003579.0769 entropy=17.5927 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 90360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399067.1 mean_steps=15.2
|
|
[Episode 90370] reward=-117035372.1 actor_loss=0.2186 critic_loss=117538467020.8000 entropy=17.6004 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 90380] reward=-115537720.5 actor_loss=0.2937 critic_loss=115338494498.1333 entropy=17.6019 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 90380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510662.2 mean_steps=12.8
|
|
[Episode 90390] reward=-118727764.5 actor_loss=0.2771 critic_loss=115553321524.9655 entropy=17.5767 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 90400] reward=-116804754.8 actor_loss=0.3407 critic_loss=115681892937.1429 entropy=17.5762 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 90400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600048.2 mean_steps=12.7
|
|
[Episode 90410] reward=-117593460.0 actor_loss=0.2761 critic_loss=114786132324.1739 entropy=17.5716 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 90420] reward=-116997740.3 actor_loss=0.2726 critic_loss=112487869467.6757 entropy=17.5683 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 90420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534619.5 mean_steps=13.2
|
|
[Episode 90430] reward=-117381862.7 actor_loss=0.3408 critic_loss=120107192320.0000 entropy=17.5544 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 90440] reward=-114811318.1 actor_loss=0.3013 critic_loss=112493201594.1818 entropy=17.5365 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 90440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446176.7 mean_steps=14.6
|
|
[Episode 90450] reward=-116450767.0 actor_loss=0.3223 critic_loss=118419707904.0000 entropy=17.5380 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 90460] reward=-135623966.7 actor_loss=0.7695 critic_loss=3109412368839.1113 entropy=17.5353 approx_kl=0.0042 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 90460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578318.8 mean_steps=13.3
|
|
[Episode 90470] reward=-114258593.2 actor_loss=0.2531 critic_loss=113912987975.6800 entropy=17.5404 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 90480] reward=-113220754.6 actor_loss=0.2753 critic_loss=114439369614.2222 entropy=17.5380 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 90480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428401.9 mean_steps=15.5
|
|
[Episode 90490] reward=-229112062.0 actor_loss=1.3096 critic_loss=36315325935243.6328 entropy=17.5410 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 90500] reward=-114320625.2 actor_loss=0.3952 critic_loss=116058680524.8000 entropy=17.5470 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 90500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522015.2 mean_steps=14.1
|
|
[Episode 90510] reward=-115002757.9 actor_loss=0.3911 critic_loss=117955571985.0667 entropy=17.5481 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 90520] reward=-114364141.0 actor_loss=0.4448 critic_loss=111228436480.0000 entropy=17.5491 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 90520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548181.3 mean_steps=12.6
|
|
[Episode 90530] reward=-119834105.4 actor_loss=0.4006 critic_loss=117104609962.6667 entropy=17.5390 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 90540] reward=-118990465.4 actor_loss=0.2446 critic_loss=116260025042.8235 entropy=17.5321 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 90540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560653.6 mean_steps=13.5
|
|
[Episode 90550] reward=-120137331.3 actor_loss=0.3146 critic_loss=122313019228.1600 entropy=17.5374 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 90560] reward=-121472316.9 actor_loss=0.2027 critic_loss=124163829975.5789 entropy=17.5248 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 90560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600843.5 mean_steps=12.6
|
|
[Episode 90570] reward=-112393094.2 actor_loss=0.3127 critic_loss=113950812947.6923 entropy=17.5275 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 90580] reward=-113356477.2 actor_loss=0.3319 critic_loss=119748591908.5714 entropy=17.5372 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 90580] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-313429.3 mean_steps=18.1
|
|
[Episode 90590] reward=-107830901.3 actor_loss=0.3900 critic_loss=105441144073.4815 entropy=17.5290 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 90600] reward=-115912538.0 actor_loss=0.3301 critic_loss=116168821915.1515 entropy=17.5327 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 90600] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-284824.7 mean_steps=18.4
|
|
[Episode 90610] reward=-114689835.3 actor_loss=0.2550 critic_loss=112530509368.8889 entropy=17.5244 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 90620] reward=-115412185.6 actor_loss=0.2298 critic_loss=114838854562.9091 entropy=17.5260 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 90620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-621520.9 mean_steps=11.2
|
|
[Episode 90630] reward=-120177281.0 actor_loss=0.2431 critic_loss=121598138075.4286 entropy=17.5179 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 90640] reward=-118869208.5 actor_loss=0.3357 critic_loss=120009018731.3548 entropy=17.5132 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 90640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-564030.3 mean_steps=12.6
|
|
[Episode 90650] reward=-121060533.9 actor_loss=0.2748 critic_loss=126036006229.3333 entropy=17.5108 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 90660] reward=-110885576.2 actor_loss=0.3711 critic_loss=108047298427.8710 entropy=17.5057 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 90660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452715.0 mean_steps=14.2
|
|
[Episode 90670] reward=-115827910.3 actor_loss=0.3251 critic_loss=111405439658.6667 entropy=17.5006 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 90680] reward=-120425442.6 actor_loss=0.2684 critic_loss=120092684288.0000 entropy=17.5162 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 90680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543518.3 mean_steps=13.5
|
|
[Episode 90690] reward=-119964810.1 actor_loss=0.2365 critic_loss=122000256808.4211 entropy=17.5215 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 90700] reward=-118320811.6 actor_loss=0.3200 critic_loss=114465950747.6757 entropy=17.5379 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 90700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631172.9 mean_steps=11.9
|
|
[Episode 90710] reward=-113870025.7 actor_loss=0.3443 critic_loss=111687796690.4889 entropy=17.5397 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 90720] reward=-119684962.0 actor_loss=0.3839 critic_loss=115854256677.4634 entropy=17.5362 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 90720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518323.1 mean_steps=15.1
|
|
[Episode 90730] reward=-115351025.4 actor_loss=0.3010 critic_loss=109951887360.0000 entropy=17.5333 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 90740] reward=-120298852.9 actor_loss=0.1941 critic_loss=118133507868.4444 entropy=17.5271 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 90740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577308.1 mean_steps=12.4
|
|
[Episode 90750] reward=-116780067.8 actor_loss=0.1881 critic_loss=116518648399.6444 entropy=17.5184 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 90760] reward=-119476145.5 actor_loss=0.2031 critic_loss=116558633327.5897 entropy=17.5329 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 90760] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-689790.1 mean_steps=10.4
|
|
[Episode 90770] reward=-108867109.7 actor_loss=0.2376 critic_loss=105667806412.8000 entropy=17.5306 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 90780] reward=-119311661.3 actor_loss=0.2259 critic_loss=119822167088.7619 entropy=17.5317 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 90780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445881.6 mean_steps=15.3
|
|
[Episode 90790] reward=-118481034.4 actor_loss=0.2412 critic_loss=114542615254.7097 entropy=17.5199 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 90800] reward=-110616104.9 actor_loss=0.2723 critic_loss=110423647945.6970 entropy=17.5196 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 90800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-572954.6 mean_steps=11.6
|
|
[Episode 90810] reward=-118098925.7 actor_loss=0.2477 critic_loss=113823132805.5652 entropy=17.5318 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 90820] reward=-117541495.8 actor_loss=0.3228 critic_loss=117362809241.6000 entropy=17.5410 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 90820] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-671766.7 mean_steps=10.6
|
|
[Episode 90830] reward=-113207576.3 actor_loss=0.2830 critic_loss=114831475108.1026 entropy=17.5483 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 90840] reward=-114201606.7 actor_loss=0.3140 critic_loss=116578069099.1628 entropy=17.5259 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 90840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458060.0 mean_steps=14.5
|
|
[Episode 90850] reward=-118460409.8 actor_loss=0.3391 critic_loss=118038256207.6444 entropy=17.5236 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 90860] reward=-111573337.7 actor_loss=0.3780 critic_loss=114925828096.0000 entropy=17.5311 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 90860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569588.6 mean_steps=12.2
|
|
[Episode 90870] reward=-114878141.1 actor_loss=0.2013 critic_loss=118019123079.5294 entropy=17.5291 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 90880] reward=-118113181.6 actor_loss=0.4016 critic_loss=121016599893.3333 entropy=17.5480 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 90880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567307.5 mean_steps=13.2
|
|
[Episode 90890] reward=-114953437.7 actor_loss=0.2652 critic_loss=111548217501.5385 entropy=17.5692 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 90900] reward=-114308416.4 actor_loss=0.2255 critic_loss=126253092310.4865 entropy=17.5646 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 90900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558727.9 mean_steps=13.3
|
|
[Episode 90910] reward=-117140212.2 actor_loss=0.2701 critic_loss=114134971014.7368 entropy=17.5579 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 90920] reward=-112650803.4 actor_loss=0.3332 critic_loss=109694216094.4762 entropy=17.5587 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 90920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430988.5 mean_steps=15.0
|
|
[Episode 90930] reward=-121351546.8 actor_loss=0.3063 critic_loss=131236868411.0769 entropy=17.5483 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 90940] reward=-115333297.6 actor_loss=0.3743 critic_loss=113389723648.0000 entropy=17.5379 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 90940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-352435.2 mean_steps=15.3
|
|
[Episode 90950] reward=-115372088.5 actor_loss=0.2952 critic_loss=112098326937.6000 entropy=17.5499 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 90960] reward=-111816518.8 actor_loss=0.3711 critic_loss=105096126712.2424 entropy=17.5464 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 90960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498210.8 mean_steps=14.5
|
|
[Episode 90970] reward=-116540829.0 actor_loss=0.3494 critic_loss=113367990703.1579 entropy=17.5499 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 90980] reward=-114603980.8 actor_loss=0.2794 critic_loss=113480828830.4762 entropy=17.5385 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 90980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-599781.1 mean_steps=13.7
|
|
[Episode 90990] reward=-113313359.5 actor_loss=0.3848 critic_loss=113465917440.0000 entropy=17.5464 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 91000] reward=-122602412.7 actor_loss=0.2488 critic_loss=122957135872.0000 entropy=17.5584 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 91000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399702.8 mean_steps=15.8
|
|
[Episode 91010] reward=-119442176.0 actor_loss=0.3348 critic_loss=120467936870.4000 entropy=17.5555 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 91020] reward=-113217067.6 actor_loss=0.3126 critic_loss=110652551623.1111 entropy=17.5662 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 91020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-593696.2 mean_steps=11.6
|
|
[Episode 91030] reward=-110031293.1 actor_loss=0.2782 critic_loss=113988288512.0000 entropy=17.5850 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 91040] reward=-115220293.0 actor_loss=0.3596 critic_loss=118398858581.3333 entropy=17.6085 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 91040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400384.7 mean_steps=15.0
|
|
[Episode 91050] reward=-114292560.5 actor_loss=0.3978 critic_loss=114532942916.2667 entropy=17.5890 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 91060] reward=-117925654.3 actor_loss=0.2983 critic_loss=118163291867.4286 entropy=17.5892 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 91060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-628903.6 mean_steps=12.6
|
|
[Episode 91070] reward=-119609258.4 actor_loss=0.2863 critic_loss=124878265093.6889 entropy=17.5673 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 91080] reward=-119473581.1 actor_loss=0.2200 critic_loss=117421842614.0444 entropy=17.5692 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 91080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610265.3 mean_steps=12.7
|
|
[Episode 91090] reward=-116902330.1 actor_loss=0.3759 critic_loss=116474584385.8286 entropy=17.5673 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 91100] reward=-117317211.8 actor_loss=0.2735 critic_loss=116763641669.8182 entropy=17.5524 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 91100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485680.9 mean_steps=14.6
|
|
[Episode 91110] reward=-119412428.0 actor_loss=0.2840 critic_loss=118184412228.2667 entropy=17.5306 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 91120] reward=-117865782.2 actor_loss=0.2739 critic_loss=115988748775.6190 entropy=17.5249 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 91120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556247.2 mean_steps=13.1
|
|
[Episode 91130] reward=-111098227.4 actor_loss=0.3053 critic_loss=111243924889.6000 entropy=17.5277 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 91140] reward=-115768143.2 actor_loss=0.2994 critic_loss=115543004997.8182 entropy=17.5399 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 91140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552587.5 mean_steps=14.3
|
|
[Episode 91150] reward=-114192822.0 actor_loss=0.4219 critic_loss=118975049272.8889 entropy=17.5419 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 91160] reward=-112349744.0 actor_loss=0.3614 critic_loss=106907766692.9778 entropy=17.5438 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 91160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641280.5 mean_steps=11.9
|
|
[Episode 91170] reward=-117773658.6 actor_loss=0.3438 critic_loss=114464486321.2308 entropy=17.5456 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 91180] reward=-115440249.8 actor_loss=0.2231 critic_loss=113089471010.1333 entropy=17.5486 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 91180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591252.8 mean_steps=13.7
|
|
[Episode 91190] reward=-115125649.4 actor_loss=0.2929 critic_loss=111819201576.9600 entropy=17.5346 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 91200] reward=-117044815.8 actor_loss=0.1882 critic_loss=112931314307.6572 entropy=17.5346 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 91200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545617.0 mean_steps=13.2
|
|
[Episode 91210] reward=-111709855.7 actor_loss=0.2646 critic_loss=108624558125.5111 entropy=17.5274 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 91220] reward=-114230848.9 actor_loss=0.3705 critic_loss=113667720988.4444 entropy=17.5124 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 91220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622177.9 mean_steps=11.8
|
|
[Episode 91230] reward=-117059135.5 actor_loss=0.2892 critic_loss=115200229193.9556 entropy=17.5229 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 91240] reward=-117352514.0 actor_loss=0.3367 critic_loss=116770845378.2069 entropy=17.5328 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 91240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445883.7 mean_steps=15.8
|
|
[Episode 91250] reward=-118506612.9 actor_loss=0.3030 critic_loss=121998554033.2308 entropy=17.5323 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 91260] reward=-118683197.7 actor_loss=0.3119 critic_loss=119449784775.1111 entropy=17.5253 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 91260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-523567.4 mean_steps=15.7
|
|
[Episode 91270] reward=-113959441.2 actor_loss=0.2596 critic_loss=110994907136.0000 entropy=17.5237 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 91280] reward=-116877397.7 actor_loss=0.3017 critic_loss=110083064607.2195 entropy=17.5222 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 91280] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-667427.9 mean_steps=11.3
|
|
[Episode 91290] reward=-114112393.5 actor_loss=0.3031 critic_loss=115707060968.7273 entropy=17.5139 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 91300] reward=-120507906.0 actor_loss=0.2898 critic_loss=119619198683.4286 entropy=17.5146 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 91300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375714.0 mean_steps=15.8
|
|
[Episode 91310] reward=-120115219.0 actor_loss=0.2839 critic_loss=120950445541.0526 entropy=17.5217 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 91320] reward=-111341877.8 actor_loss=0.2617 critic_loss=115088159442.8235 entropy=17.5259 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 91320] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-679573.3 mean_steps=10.3
|
|
[Episode 91330] reward=-117325967.7 actor_loss=0.3160 critic_loss=116134369342.0606 entropy=17.5349 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 91340] reward=-112829871.4 actor_loss=0.1901 critic_loss=111669330202.4828 entropy=17.5394 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 91340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-461523.4 mean_steps=15.4
|
|
[Episode 91350] reward=-118914082.9 actor_loss=0.3521 critic_loss=115621268210.5263 entropy=17.5449 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 91360] reward=-116553118.5 actor_loss=0.2984 critic_loss=114456718936.2759 entropy=17.5461 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 91360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456438.8 mean_steps=13.4
|
|
[Episode 91370] reward=-116892730.3 actor_loss=0.3259 critic_loss=115396790408.5333 entropy=17.5375 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 91380] reward=-118492119.0 actor_loss=0.2699 critic_loss=113614298225.7778 entropy=17.5205 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 91380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454583.3 mean_steps=14.3
|
|
[Episode 91390] reward=-115644538.7 actor_loss=0.3146 critic_loss=117030674735.4074 entropy=17.5102 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 91400] reward=-115337692.6 actor_loss=0.3114 critic_loss=113842077040.6400 entropy=17.5252 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 91400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376997.0 mean_steps=15.6
|
|
[Episode 91410] reward=-108469770.5 actor_loss=0.3568 critic_loss=105407970157.7143 entropy=17.5423 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 91420] reward=-114751741.9 actor_loss=0.3147 critic_loss=117309403421.7674 entropy=17.5566 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 91420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-609186.4 mean_steps=11.9
|
|
[Episode 91430] reward=-112680939.1 actor_loss=0.3745 critic_loss=110971838464.0000 entropy=17.5528 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 91440] reward=-118387884.0 actor_loss=0.2556 critic_loss=115292354332.4444 entropy=17.5660 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 91440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509716.9 mean_steps=14.6
|
|
[Episode 91450] reward=-116964019.7 actor_loss=0.3221 critic_loss=128290474939.7333 entropy=17.5602 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 91460] reward=-120312337.5 actor_loss=0.2830 critic_loss=120568584055.4667 entropy=17.5592 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 91460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591647.5 mean_steps=13.7
|
|
[Episode 91470] reward=-115373514.5 actor_loss=0.3632 critic_loss=113845527552.0000 entropy=17.5666 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 91480] reward=-114336316.9 actor_loss=0.2601 critic_loss=118732788084.3636 entropy=17.5615 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 91480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484458.1 mean_steps=14.3
|
|
[Episode 91490] reward=-118353856.5 actor_loss=0.2678 critic_loss=115972160625.7778 entropy=17.5728 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 91500] reward=-118581618.3 actor_loss=0.2877 critic_loss=116262396849.2308 entropy=17.5822 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 91500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-496540.5 mean_steps=12.8
|
|
[Episode 91510] reward=-122640207.9 actor_loss=0.2630 critic_loss=129055724573.2571 entropy=17.5684 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 91520] reward=-112670532.3 actor_loss=0.2513 critic_loss=103236440268.8000 entropy=17.5630 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 91520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-674680.3 mean_steps=12.3
|
|
[Episode 91530] reward=-113699269.8 actor_loss=0.2910 critic_loss=116660150272.0000 entropy=17.5588 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 91540] reward=-120288012.2 actor_loss=0.3385 critic_loss=118140490313.1429 entropy=17.5518 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 91540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-532752.1 mean_steps=15.1
|
|
[Episode 91550] reward=-112512385.1 actor_loss=0.5148 critic_loss=113921467436.5217 entropy=17.5579 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 91560] reward=-109538026.8 actor_loss=0.3370 critic_loss=109959029009.0667 entropy=17.5494 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 91560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541416.6 mean_steps=13.3
|
|
[Episode 91570] reward=-120729296.4 actor_loss=0.2493 critic_loss=125552433093.4857 entropy=17.5621 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 91580] reward=-109337308.0 actor_loss=0.3828 critic_loss=118271499946.6667 entropy=17.5548 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 91580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-542723.1 mean_steps=12.4
|
|
[Episode 91590] reward=-117096803.7 actor_loss=0.2941 critic_loss=136173654835.2000 entropy=17.5467 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 91600] reward=-117498438.2 actor_loss=0.2282 critic_loss=127210443337.1429 entropy=17.5515 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 91600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-671333.3 mean_steps=14.0
|
|
[Episode 91610] reward=-114204164.0 actor_loss=0.2860 critic_loss=111069488469.3333 entropy=17.5472 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 91620] reward=-112903116.7 actor_loss=0.3412 critic_loss=122954197978.0741 entropy=17.5533 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 91620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473152.9 mean_steps=14.2
|
|
[Episode 91630] reward=-114288151.5 actor_loss=0.3150 critic_loss=114460404849.7778 entropy=17.5689 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 91640] reward=-114926557.5 actor_loss=0.3339 critic_loss=114738990986.9714 entropy=17.5639 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 91640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484264.4 mean_steps=14.6
|
|
[Episode 91650] reward=-268833996.4 actor_loss=4.4864 critic_loss=90438236320563.2031 entropy=17.5615 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 91660] reward=-113511968.0 actor_loss=0.4103 critic_loss=111210158421.3333 entropy=17.5545 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 91660] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-599209.8 mean_steps=11.2
|
|
[Episode 91670] reward=-114645648.2 actor_loss=0.3690 critic_loss=120515713486.4516 entropy=17.5351 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 91680] reward=-114798728.7 actor_loss=0.3665 critic_loss=120949932335.4074 entropy=17.5079 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 91680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494678.0 mean_steps=13.8
|
|
[Episode 91690] reward=-120596681.6 actor_loss=0.3043 critic_loss=124417554295.4667 entropy=17.5078 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 91700] reward=-114269193.3 actor_loss=0.3273 critic_loss=111797867924.8372 entropy=17.5298 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 91700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-408753.5 mean_steps=15.8
|
|
[Episode 91710] reward=-118286513.6 actor_loss=0.2500 critic_loss=115322167296.0000 entropy=17.5244 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 91720] reward=-116452773.2 actor_loss=0.2379 critic_loss=114849893944.8889 entropy=17.5297 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 91720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546231.5 mean_steps=13.3
|
|
[Episode 91730] reward=-115237443.8 actor_loss=0.2334 critic_loss=113988313770.6667 entropy=17.5556 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 91740] reward=-113045163.8 actor_loss=0.3929 critic_loss=107413177744.6956 entropy=17.5426 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 91740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547150.3 mean_steps=13.4
|
|
[Episode 91750] reward=-115755914.2 actor_loss=0.2105 critic_loss=115534807449.6000 entropy=17.5337 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 91760] reward=-111633424.0 actor_loss=0.2802 critic_loss=109150909196.1905 entropy=17.5029 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 91760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557439.2 mean_steps=12.7
|
|
[Episode 91770] reward=-120097500.9 actor_loss=0.2379 critic_loss=123888923648.0000 entropy=17.5006 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 91780] reward=-114330288.1 actor_loss=0.3510 critic_loss=109494577906.5263 entropy=17.4888 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 91780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529133.3 mean_steps=13.2
|
|
[Episode 91790] reward=-118642593.4 actor_loss=0.2998 critic_loss=116476654933.3333 entropy=17.4847 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 91800] reward=-114105709.5 actor_loss=0.2686 critic_loss=109970626969.6000 entropy=17.4885 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 91800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429157.2 mean_steps=15.2
|
|
[Episode 91810] reward=-107936826.4 actor_loss=0.3398 critic_loss=108766353635.5556 entropy=17.4771 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 91820] reward=-120514246.2 actor_loss=0.2299 critic_loss=121584657920.0000 entropy=17.4712 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 91820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-612210.8 mean_steps=13.7
|
|
[Episode 91830] reward=-114823821.3 actor_loss=0.2117 critic_loss=110688038539.6364 entropy=17.4836 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 91840] reward=-123343451.0 actor_loss=0.2409 critic_loss=136265762454.5882 entropy=17.4870 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 91840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-583959.9 mean_steps=14.4
|
|
[Episode 91850] reward=-117131744.1 actor_loss=0.3469 critic_loss=116964137030.6207 entropy=17.4743 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 91860] reward=-116785760.0 actor_loss=0.3728 critic_loss=118344487063.7037 entropy=17.4742 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 91860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562399.4 mean_steps=13.1
|
|
[Episode 91870] reward=-113300947.6 actor_loss=0.4047 critic_loss=120899636428.8000 entropy=17.4702 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 91880] reward=-114642657.6 actor_loss=0.3427 critic_loss=113844728514.2069 entropy=17.4883 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 91880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423063.8 mean_steps=15.2
|
|
[Episode 91890] reward=-118009737.5 actor_loss=0.2876 critic_loss=115409495552.0000 entropy=17.4807 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 91900] reward=-118775572.6 actor_loss=0.3046 critic_loss=121773639559.5294 entropy=17.4748 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 91900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531219.2 mean_steps=14.2
|
|
[Episode 91910] reward=-121127929.9 actor_loss=0.3406 critic_loss=120501958519.4667 entropy=17.4677 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 91920] reward=-118248555.9 actor_loss=0.4649 critic_loss=119335820219.7333 entropy=17.4687 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 91920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457463.0 mean_steps=13.9
|
|
[Episode 91930] reward=-118003652.3 actor_loss=0.3589 critic_loss=120817664000.0000 entropy=17.4833 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 91940] reward=-113616739.5 actor_loss=0.3794 critic_loss=113662551276.3077 entropy=17.4877 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 91940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-542550.6 mean_steps=12.7
|
|
[Episode 91950] reward=-113794689.4 actor_loss=0.3207 critic_loss=118272361972.0930 entropy=17.4898 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 91960] reward=-119555747.0 actor_loss=0.1966 critic_loss=116221166478.2222 entropy=17.4789 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 91960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459770.6 mean_steps=15.1
|
|
[Episode 91970] reward=-114624625.1 actor_loss=0.3013 critic_loss=117920733411.5556 entropy=17.4930 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 91980] reward=-754833535.4 actor_loss=25.8462 critic_loss=976197603184855.6250 entropy=17.4881 approx_kl=0.0032 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 91980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632916.7 mean_steps=12.4
|
|
[Episode 91990] reward=-112014096.3 actor_loss=0.3792 critic_loss=114025533348.9778 entropy=17.4877 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 92000] reward=-115880137.0 actor_loss=0.2920 critic_loss=113265190804.2105 entropy=17.4981 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 92000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519482.8 mean_steps=15.1
|
|
[Episode 92010] reward=-117092700.6 actor_loss=0.2522 critic_loss=116402741452.8000 entropy=17.5009 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 92020] reward=-117742409.6 actor_loss=0.2016 critic_loss=129515902098.2857 entropy=17.5055 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 92020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549292.6 mean_steps=14.2
|
|
[Episode 92030] reward=-116728491.5 actor_loss=0.3368 critic_loss=123310940979.2000 entropy=17.4898 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 92040] reward=-116166273.0 actor_loss=0.2181 critic_loss=113509462107.0222 entropy=17.4841 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 92040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485832.1 mean_steps=15.0
|
|
[Episode 92050] reward=-121408831.2 actor_loss=0.4503 critic_loss=287310638925.9130 entropy=17.4958 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 92060] reward=-111101692.2 actor_loss=0.3766 critic_loss=115071792625.3714 entropy=17.5128 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 92060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511822.5 mean_steps=14.9
|
|
[Episode 92070] reward=-113908474.4 actor_loss=0.2886 critic_loss=113011366115.5556 entropy=17.5067 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 92080] reward=-116315053.6 actor_loss=0.4239 critic_loss=115824452676.2667 entropy=17.5012 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 92080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419493.4 mean_steps=14.2
|
|
[Episode 92090] reward=-112978875.9 actor_loss=0.3426 critic_loss=115062612992.0000 entropy=17.5021 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 92100] reward=-117219209.2 actor_loss=0.3482 critic_loss=117149034968.6154 entropy=17.4978 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 92100] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-682739.5 mean_steps=11.4
|
|
[Episode 92110] reward=-108770761.9 actor_loss=0.3596 critic_loss=110204837515.6364 entropy=17.5040 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 92120] reward=-117993474.2 actor_loss=0.3563 critic_loss=118644186006.0690 entropy=17.4961 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 92120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441615.9 mean_steps=14.2
|
|
[Episode 92130] reward=-115237202.1 actor_loss=0.2226 critic_loss=114841172195.5556 entropy=17.5030 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 92140] reward=-116143397.2 actor_loss=0.3409 critic_loss=114393767280.6400 entropy=17.5074 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 92140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509683.7 mean_steps=14.1
|
|
[Episode 92150] reward=-115579545.6 actor_loss=0.2988 critic_loss=114286109013.3333 entropy=17.5040 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 92160] reward=-115940300.1 actor_loss=0.2570 critic_loss=121041628537.2632 entropy=17.5185 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 92160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432112.4 mean_steps=15.4
|
|
[Episode 92170] reward=-113944507.0 actor_loss=0.2712 critic_loss=117389527449.6000 entropy=17.5244 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 92180] reward=-114047773.7 actor_loss=0.3500 critic_loss=116951503339.5200 entropy=17.5250 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 92180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474146.6 mean_steps=14.8
|
|
[Episode 92190] reward=-122211025.3 actor_loss=0.2995 critic_loss=209535287902.8148 entropy=17.5285 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 92200] reward=-119079681.5 actor_loss=0.2088 critic_loss=122026431897.6000 entropy=17.5352 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 92200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-639809.5 mean_steps=12.8
|
|
[Episode 92210] reward=-117549562.5 actor_loss=0.2391 critic_loss=123064382879.1351 entropy=17.5353 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 92220] reward=-115285589.8 actor_loss=0.2885 critic_loss=111304736950.0444 entropy=17.5380 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 92220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548613.4 mean_steps=13.4
|
|
[Episode 92230] reward=-115170117.6 actor_loss=0.2647 critic_loss=116805020057.6000 entropy=17.5434 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 92240] reward=-119390905.1 actor_loss=0.2202 critic_loss=113099445979.4286 entropy=17.5511 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 92240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-291760.9 mean_steps=16.4
|
|
[Episode 92250] reward=-115250280.0 actor_loss=0.2888 critic_loss=116864441275.7333 entropy=17.5522 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 92260] reward=-113354547.4 actor_loss=0.2706 critic_loss=112576253314.8445 entropy=17.5506 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 92260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432727.0 mean_steps=14.4
|
|
[Episode 92270] reward=-120330193.0 actor_loss=0.2451 critic_loss=119619906355.2000 entropy=17.5538 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 92280] reward=-114851539.9 actor_loss=0.2764 critic_loss=111689978171.0769 entropy=17.5575 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 92280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438915.3 mean_steps=15.6
|
|
[Episode 92290] reward=-116960205.6 actor_loss=0.3067 critic_loss=113507107471.3600 entropy=17.5423 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 92300] reward=-117529914.1 actor_loss=0.1937 critic_loss=114491000921.0435 entropy=17.5360 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 92300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541084.6 mean_steps=13.3
|
|
[Episode 92310] reward=-117987477.0 actor_loss=0.3233 critic_loss=116191518392.3200 entropy=17.5289 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 92320] reward=-116347510.0 actor_loss=0.3832 critic_loss=122397051939.3103 entropy=17.5043 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 92320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410775.7 mean_steps=15.2
|
|
[Episode 92330] reward=-113183153.0 actor_loss=0.3695 critic_loss=108127964013.7143 entropy=17.4986 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 92340] reward=-111132200.5 actor_loss=0.3276 critic_loss=106452931677.0909 entropy=17.4833 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 92340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-467750.1 mean_steps=15.7
|
|
[Episode 92350] reward=-114075155.0 actor_loss=0.3210 critic_loss=110916392487.3846 entropy=17.4756 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 92360] reward=-120989977.8 actor_loss=0.2358 critic_loss=147132440985.6000 entropy=17.4763 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 92360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414269.3 mean_steps=15.4
|
|
[Episode 92370] reward=-116988219.7 actor_loss=0.1987 critic_loss=112720110660.2667 entropy=17.4746 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 92380] reward=-178792608.7 actor_loss=0.8278 critic_loss=14546639519744.0000 entropy=17.4839 approx_kl=0.0042 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 92380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-446031.3 mean_steps=13.7
|
|
[Episode 92390] reward=-117648531.4 actor_loss=0.2637 critic_loss=121648736701.2174 entropy=17.4875 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 92400] reward=-114990634.2 actor_loss=0.3633 critic_loss=116802991991.4667 entropy=17.4961 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 92400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-513330.6 mean_steps=12.6
|
|
[Episode 92410] reward=-114040977.9 actor_loss=0.4443 critic_loss=117078409402.1818 entropy=17.4947 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 92420] reward=-118600826.3 actor_loss=0.2503 critic_loss=146073562589.8667 entropy=17.4882 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 92420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-407840.5 mean_steps=15.2
|
|
[Episode 92430] reward=-118613472.2 actor_loss=0.2637 critic_loss=120443467776.0000 entropy=17.4830 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 92440] reward=-115760859.2 actor_loss=0.2868 critic_loss=112519180620.1081 entropy=17.4667 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 92440] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-633606.4 mean_steps=10.3
|
|
[Episode 92450] reward=-122535728.8 actor_loss=0.3391 critic_loss=124963810110.2703 entropy=17.4479 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 92460] reward=-112393826.4 actor_loss=0.3650 critic_loss=104627476252.4444 entropy=17.4444 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 92460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467595.3 mean_steps=13.9
|
|
[Episode 92470] reward=-114872588.6 actor_loss=0.2967 critic_loss=110690085728.7111 entropy=17.4491 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 92480] reward=-114441439.5 actor_loss=0.2560 critic_loss=213895326141.2174 entropy=17.4471 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 92480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-312700.3 mean_steps=16.6
|
|
[Episode 92490] reward=-113725810.3 actor_loss=0.3871 critic_loss=118559464594.2857 entropy=17.4549 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 92500] reward=-116993947.7 actor_loss=0.3049 critic_loss=112732828593.2308 entropy=17.4548 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 92500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543258.3 mean_steps=13.4
|
|
[Episode 92510] reward=-116470625.7 actor_loss=0.2790 critic_loss=111758137389.5111 entropy=17.4524 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 92520] reward=-115648522.4 actor_loss=0.3022 critic_loss=111624950306.1333 entropy=17.4625 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 92520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442014.2 mean_steps=14.8
|
|
[Episode 92530] reward=-115409364.3 actor_loss=0.2855 critic_loss=114289421880.8889 entropy=17.4491 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 92540] reward=-119425715.7 actor_loss=0.1471 critic_loss=112772741120.0000 entropy=17.4426 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 92540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564357.7 mean_steps=13.6
|
|
[Episode 92550] reward=-273361112.5 actor_loss=52.2653 critic_loss=40378533325482.6641 entropy=17.4492 approx_kl=0.0042 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 92560] reward=-113752217.4 actor_loss=0.3363 critic_loss=111423671136.7111 entropy=17.4564 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 92560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-667325.4 mean_steps=12.9
|
|
[Episode 92570] reward=-224677049.1 actor_loss=0.9154 critic_loss=44639527749404.4453 entropy=17.4705 approx_kl=0.0045 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 92580] reward=-111135256.3 actor_loss=0.3203 critic_loss=121999248224.7111 entropy=17.4601 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 92580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539194.1 mean_steps=14.5
|
|
[Episode 92590] reward=-268979639.0 actor_loss=0.3053 critic_loss=84670829245235.2031 entropy=17.4678 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 92600] reward=-109996510.2 actor_loss=0.3795 critic_loss=109339675579.7333 entropy=17.4704 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 92600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521752.8 mean_steps=13.2
|
|
[Episode 92610] reward=-119393567.0 actor_loss=0.3606 critic_loss=125036673554.9630 entropy=17.4668 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 92620] reward=-204735274.9 actor_loss=22.8408 critic_loss=22188490454903.4648 entropy=17.4621 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 92620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436114.8 mean_steps=14.8
|
|
[Episode 92630] reward=-1071905036.9 actor_loss=13.1159 critic_loss=2093885504379653.7500 entropy=17.4629 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 92640] reward=-119991666.7 actor_loss=0.2927 critic_loss=120160961058.1333 entropy=17.4655 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 92640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-295959.6 mean_steps=16.8
|
|
[Episode 92650] reward=-118239556.8 actor_loss=0.3008 critic_loss=114297577216.0000 entropy=17.4706 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 92660] reward=-112947473.2 actor_loss=0.3354 critic_loss=176116264960.0000 entropy=17.4820 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 92660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-564120.5 mean_steps=12.6
|
|
[Episode 92670] reward=-118169021.5 actor_loss=0.2915 critic_loss=137853455194.8387 entropy=17.4772 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 92680] reward=-1613482324.8 actor_loss=132.2384 critic_loss=3049387743679465.0000 entropy=17.5031 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Eval 92680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457395.8 mean_steps=14.4
|
|
[Episode 92690] reward=-143316572.6 actor_loss=0.2501 critic_loss=3413367149909.3335 entropy=17.5113 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 92700] reward=-280981898.6 actor_loss=1.6649 critic_loss=72785660593948.4375 entropy=17.5319 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 92700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-629625.6 mean_steps=11.8
|
|
[Episode 92710] reward=-12270065091.2 actor_loss=398.6193 critic_loss=170863751795633216.0000 entropy=17.5437 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 92720] reward=-198600562.9 actor_loss=0.4420 critic_loss=23315870762780.4453 entropy=17.5574 approx_kl=0.0013 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 92720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391895.8 mean_steps=16.1
|
|
[Episode 92730] reward=-132387598.3 actor_loss=0.1965 critic_loss=751489136867.5555 entropy=17.5546 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 92740] reward=-117063851.1 actor_loss=0.2920 critic_loss=134896235178.6667 entropy=17.5657 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 92740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-485383.5 mean_steps=12.7
|
|
[Episode 92750] reward=-28798809890.0 actor_loss=51.0646 critic_loss=1183098480840461568.0000 entropy=17.5669 approx_kl=0.1180 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 92760] reward=-116884018.1 actor_loss=0.3178 critic_loss=116458479988.3636 entropy=17.5640 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 92760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596790.1 mean_steps=12.6
|
|
[Episode 92770] reward=-114630990.5 actor_loss=0.3141 critic_loss=113033058424.4706 entropy=17.5719 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 92780] reward=-132404827.0 actor_loss=0.3078 critic_loss=843117432376.8889 entropy=17.5682 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 92780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-558452.5 mean_steps=14.2
|
|
[Episode 92790] reward=-238294221.8 actor_loss=23.8044 critic_loss=34905439827831.4648 entropy=17.5633 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 92800] reward=-121699860.8 actor_loss=0.2673 critic_loss=131360165302.8571 entropy=17.5617 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 92800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606317.8 mean_steps=12.8
|
|
[Episode 92810] reward=-117010802.7 actor_loss=0.2692 critic_loss=124538009258.6667 entropy=17.5612 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 92820] reward=-120973969.4 actor_loss=0.3286 critic_loss=796417388544.0000 entropy=17.5687 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 92820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493551.1 mean_steps=14.0
|
|
[Episode 92830] reward=-121413891.4 actor_loss=0.3595 critic_loss=121293201408.0000 entropy=17.5619 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 92840] reward=-404608657.9 actor_loss=40.4887 critic_loss=208775025320846.2188 entropy=17.5514 approx_kl=0.0216 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 92840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587721.5 mean_steps=13.7
|
|
[Episode 92850] reward=-118914551.6 actor_loss=0.3766 critic_loss=116538206526.5778 entropy=17.5438 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 92860] reward=-112165651.2 actor_loss=0.3283 critic_loss=108109688832.0000 entropy=17.5374 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 92860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-486235.0 mean_steps=16.1
|
|
[Episode 92870] reward=-116836431.7 actor_loss=0.2205 critic_loss=161768004418.3704 entropy=17.5447 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 92880] reward=-116566776.6 actor_loss=0.2603 critic_loss=116030319435.2941 entropy=17.5410 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 92880] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-643740.1 mean_steps=11.3
|
|
[Episode 92890] reward=-121552010.9 actor_loss=0.1461 critic_loss=117114788924.2353 entropy=17.5576 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 92900] reward=-114765415.6 actor_loss=0.2528 critic_loss=110374221637.8182 entropy=17.5686 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 92900] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-404189.2 mean_steps=17.1
|
|
[Episode 92910] reward=-117284178.4 actor_loss=0.3339 critic_loss=113917534208.0000 entropy=17.5939 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 92920] reward=-119257718.2 actor_loss=0.2732 critic_loss=229205789696.0000 entropy=17.5917 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 92920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-625347.3 mean_steps=12.2
|
|
[Episode 92930] reward=-38443550898.9 actor_loss=252.2572 critic_loss=2174080215800610816.0000 entropy=17.6138 approx_kl=0.0532 kl_stop=1 intervention_rate=0.1068 front_blocked=0
|
|
[Episode 92940] reward=-110751262.9 actor_loss=0.3661 critic_loss=113917186594.1333 entropy=17.6218 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 92940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-571558.3 mean_steps=11.6
|
|
[Episode 92950] reward=-121518151.6 actor_loss=0.2850 critic_loss=141745178093.0370 entropy=17.6296 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 92960] reward=-332585299.4 actor_loss=0.3466 critic_loss=147056977778369.4375 entropy=17.6412 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 92960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579856.7 mean_steps=12.8
|
|
[Episode 92970] reward=-119142690.6 actor_loss=0.2682 critic_loss=128118787185.7778 entropy=17.6493 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 92980] reward=-391839979.9 actor_loss=40.8811 critic_loss=185317020500696.1875 entropy=17.6430 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 92980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-381609.5 mean_steps=17.2
|
|
[Episode 92990] reward=-119232220.5 actor_loss=0.3684 critic_loss=125807094052.5714 entropy=17.6454 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 93000] reward=-116479847.7 actor_loss=0.4031 critic_loss=117287310914.7826 entropy=17.6360 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 93000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505869.2 mean_steps=14.2
|
|
[Episode 93010] reward=-119600904.3 actor_loss=0.1697 critic_loss=128234265941.3333 entropy=17.6461 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 93020] reward=-117818229.7 actor_loss=0.4229 critic_loss=119406724332.3077 entropy=17.6410 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 93020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626020.1 mean_steps=13.1
|
|
[Episode 93030] reward=-116314569.0 actor_loss=0.3173 critic_loss=121170605215.2889 entropy=17.6558 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 93040] reward=-117473370.9 actor_loss=0.3120 critic_loss=127522878025.1429 entropy=17.6514 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 93040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499329.4 mean_steps=14.2
|
|
[Episode 93050] reward=-115287408.1 actor_loss=0.3955 critic_loss=113652845152.8649 entropy=17.6473 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 93060] reward=-119834808.0 actor_loss=0.1611 critic_loss=130001989176.8889 entropy=17.6386 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 93060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602532.5 mean_steps=13.0
|
|
[Episode 93070] reward=-116867384.5 actor_loss=0.3255 critic_loss=116545010255.6444 entropy=17.6411 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 93080] reward=-116472855.1 actor_loss=0.3469 critic_loss=120044061635.7647 entropy=17.6421 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 93080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446911.5 mean_steps=14.2
|
|
[Episode 93090] reward=-111853615.9 actor_loss=0.3525 critic_loss=104843747584.0000 entropy=17.6333 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 93100] reward=-198175777.9 actor_loss=0.2462 critic_loss=16427373352852.2109 entropy=17.6506 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 93100] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-319801.5 mean_steps=17.6
|
|
[Episode 93110] reward=-117287737.8 actor_loss=0.2987 critic_loss=123760465715.2000 entropy=17.6530 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 93120] reward=-110469846.3 actor_loss=0.2833 critic_loss=116759351933.1555 entropy=17.6572 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 93120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419916.1 mean_steps=15.2
|
|
[Episode 93130] reward=-119276480.8 actor_loss=0.2943 critic_loss=140001891802.5366 entropy=17.6508 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 93140] reward=-118662449.0 actor_loss=0.3435 critic_loss=115958660473.2632 entropy=17.6443 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 93140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584583.8 mean_steps=12.9
|
|
[Episode 93150] reward=-113941950.4 actor_loss=0.3331 critic_loss=113752628455.2258 entropy=17.6396 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 93160] reward=-110348306.6 actor_loss=0.2815 critic_loss=107862129777.7778 entropy=17.6362 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 93160] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-715648.3 mean_steps=9.7
|
|
[Episode 93170] reward=-116919216.6 actor_loss=0.2941 critic_loss=117694048028.4444 entropy=17.6314 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 93180] reward=-114872959.6 actor_loss=0.2700 critic_loss=122161622766.9333 entropy=17.6329 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 93180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447202.6 mean_steps=15.7
|
|
[Episode 93190] reward=-116023671.3 actor_loss=0.1906 critic_loss=111047285145.6000 entropy=17.6184 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 93200] reward=-117010869.9 actor_loss=0.3136 critic_loss=131762333882.1818 entropy=17.6014 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 93200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436426.9 mean_steps=14.4
|
|
[Episode 93210] reward=-110612696.0 actor_loss=0.3611 critic_loss=110883279121.0667 entropy=17.6041 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 93220] reward=-113179819.8 actor_loss=0.3424 critic_loss=112780483098.9474 entropy=17.6060 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 93220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430600.5 mean_steps=15.7
|
|
[Episode 93230] reward=-116657171.2 actor_loss=0.3811 critic_loss=116467072146.2857 entropy=17.6188 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 93240] reward=-118036956.7 actor_loss=0.3555 critic_loss=172474731178.6667 entropy=17.6230 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 93240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448915.5 mean_steps=14.8
|
|
[Episode 93250] reward=-114541320.1 actor_loss=0.2609 critic_loss=120965377046.7556 entropy=17.6256 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 93260] reward=-115792538.7 actor_loss=0.4007 critic_loss=117701346255.2381 entropy=17.6318 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 93260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523103.3 mean_steps=14.2
|
|
[Episode 93270] reward=-121615305.6 actor_loss=0.2253 critic_loss=127198439742.5778 entropy=17.6359 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 93280] reward=-116235194.1 actor_loss=0.3244 critic_loss=113533481518.5455 entropy=17.6331 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 93280] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-670675.1 mean_steps=11.5
|
|
[Episode 93290] reward=-118510249.7 actor_loss=0.3444 critic_loss=138610436622.6286 entropy=17.6390 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 93300] reward=-118600280.5 actor_loss=0.3983 critic_loss=122123589313.4222 entropy=17.6451 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 93300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642492.9 mean_steps=13.0
|
|
[Episode 93310] reward=-5739047503.9 actor_loss=281.6769 critic_loss=63864208813294296.0000 entropy=17.6498 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 93320] reward=-158058251.0 actor_loss=0.3091 critic_loss=4000153519340.3076 entropy=17.6424 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 93320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512185.2 mean_steps=14.3
|
|
[Episode 93330] reward=-120312303.5 actor_loss=0.3079 critic_loss=139769069750.0444 entropy=17.6456 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 93340] reward=-113282785.0 actor_loss=0.3090 critic_loss=134391509083.0222 entropy=17.6346 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 93340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450019.7 mean_steps=14.7
|
|
[Episode 93350] reward=-117539908.9 actor_loss=0.3438 critic_loss=202554468864.0000 entropy=17.6191 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 93360] reward=-115087920.9 actor_loss=0.2598 critic_loss=111997528018.4889 entropy=17.6218 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 93360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-593473.5 mean_steps=12.6
|
|
[Episode 93370] reward=-118401962.2 actor_loss=0.1829 critic_loss=113902107579.7333 entropy=17.6211 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 93380] reward=-117629256.4 actor_loss=0.3418 critic_loss=122027634050.8445 entropy=17.6312 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 93380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467300.6 mean_steps=15.1
|
|
[Episode 93390] reward=-118606915.1 actor_loss=0.2337 critic_loss=196349077346.4615 entropy=17.6452 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 93400] reward=-407500595.2 actor_loss=28.9611 critic_loss=150106411455829.3438 entropy=17.6519 approx_kl=0.0037 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 93400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448658.7 mean_steps=15.4
|
|
[Episode 93410] reward=-58449293655.8 actor_loss=97.2409 critic_loss=3478289503867633664.0000 entropy=17.6469 approx_kl=1.5942 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 93420] reward=-32203509668.9 actor_loss=128.2122 critic_loss=1281276389217428992.0000 entropy=17.6509 approx_kl=0.0098 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Eval 93420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495372.0 mean_steps=13.9
|
|
[Episode 93430] reward=-17152742615.3 actor_loss=0.3559 critic_loss=497519892494077120.0000 entropy=17.6622 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 93440] reward=-242927175.0 actor_loss=0.3418 critic_loss=71276669039775.2812 entropy=17.6854 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 93440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450736.4 mean_steps=14.8
|
|
[Episode 93450] reward=-117399288.8 actor_loss=0.3074 critic_loss=119128320573.4400 entropy=17.6949 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 93460] reward=-405422637.5 actor_loss=28.6095 critic_loss=166534328614912.0000 entropy=17.6970 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 93460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550193.0 mean_steps=12.7
|
|
[Episode 93470] reward=-1179320555.7 actor_loss=8.8861 critic_loss=2189245076500935.0000 entropy=17.7020 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 93480] reward=-434077937.5 actor_loss=5.0057 critic_loss=278026160614058.6562 entropy=17.7031 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 93480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399417.2 mean_steps=15.0
|
|
[Episode 93490] reward=-405943210.2 actor_loss=14.5893 critic_loss=149162428348643.5625 entropy=17.7104 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 93500] reward=-120487145.2 actor_loss=0.2425 critic_loss=120198047880.5333 entropy=17.7060 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 93500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478917.8 mean_steps=13.4
|
|
[Episode 93510] reward=-117433223.3 actor_loss=0.3119 critic_loss=118946194227.2000 entropy=17.6944 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 93520] reward=-112737300.9 actor_loss=0.2633 critic_loss=111202136337.0667 entropy=17.7010 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 93520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450605.4 mean_steps=14.7
|
|
[Episode 93530] reward=-116390431.1 actor_loss=0.3523 critic_loss=118561414758.4000 entropy=17.6915 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 93540] reward=-113993886.6 actor_loss=0.3721 critic_loss=133820970900.2105 entropy=17.6820 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 93540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464594.7 mean_steps=14.5
|
|
[Episode 93550] reward=-120798024.9 actor_loss=0.3655 critic_loss=117381668548.9231 entropy=17.6809 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 93560] reward=-116914316.3 actor_loss=0.3328 critic_loss=116516892672.0000 entropy=17.6766 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 93560] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-615221.4 mean_steps=11.0
|
|
[Episode 93570] reward=-123204510.8 actor_loss=0.2816 critic_loss=127164039987.2000 entropy=17.6732 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 93580] reward=-114574840.9 actor_loss=0.2460 critic_loss=112802571934.8965 entropy=17.6762 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 93580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-546474.2 mean_steps=14.9
|
|
[Episode 93590] reward=-117055487.8 actor_loss=0.2673 critic_loss=118171891525.8182 entropy=17.6757 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 93600] reward=-123482351.5 actor_loss=0.1233 critic_loss=130501727153.2308 entropy=17.6772 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 93600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400466.8 mean_steps=15.2
|
|
[Episode 93610] reward=-113342448.0 actor_loss=0.3447 critic_loss=106858726650.3111 entropy=17.6816 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 93620] reward=-128201784.9 actor_loss=0.3973 critic_loss=460421829754.8800 entropy=17.6921 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 93620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465027.5 mean_steps=14.7
|
|
[Episode 93630] reward=-118452343.7 actor_loss=0.4075 critic_loss=121771478493.8667 entropy=17.7164 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 93640] reward=-115219319.9 actor_loss=0.2956 critic_loss=115801882980.1739 entropy=17.7152 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 93640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392846.5 mean_steps=16.1
|
|
[Episode 93650] reward=-118278798.4 actor_loss=0.3919 critic_loss=120066632089.6000 entropy=17.7338 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 93660] reward=-122780719.2 actor_loss=0.2309 critic_loss=145477690072.1778 entropy=17.7371 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 93660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431670.7 mean_steps=14.4
|
|
[Episode 93670] reward=-118641854.9 actor_loss=0.3594 critic_loss=122442972577.1852 entropy=17.7329 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 93680] reward=-1857987846.1 actor_loss=0.2379 critic_loss=7384216744493056.0000 entropy=17.7217 approx_kl=0.0572 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 93680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511082.1 mean_steps=13.3
|
|
[Episode 93690] reward=-119211250.0 actor_loss=0.3045 critic_loss=117459666385.4545 entropy=17.7162 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 93700] reward=-914624577.8 actor_loss=0.2760 critic_loss=1745389776116121.5000 entropy=17.7225 approx_kl=0.0036 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 93700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-257921.6 mean_steps=17.2
|
|
[Episode 93710] reward=-121808971.2 actor_loss=0.3680 critic_loss=123074176400.6956 entropy=17.7157 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 93720] reward=-116668107.1 actor_loss=0.2988 critic_loss=115006495857.7778 entropy=17.7262 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 93720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496278.0 mean_steps=14.2
|
|
[Episode 93730] reward=-115851725.3 actor_loss=0.3072 critic_loss=115816557856.8205 entropy=17.7282 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 93740] reward=-1702556398.5 actor_loss=0.2614 critic_loss=4883568896857429.0000 entropy=17.7341 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1068 front_blocked=0
|
|
[Eval 93740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-18110429.1 mean_steps=17.1
|
|
[Episode 93750] reward=-117564191.7 actor_loss=0.2891 critic_loss=119713258955.0345 entropy=17.7517 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 93760] reward=-12289013844.5 actor_loss=275.3448 critic_loss=200562442462897312.0000 entropy=17.7528 approx_kl=0.0032 kl_stop=1 intervention_rate=0.1035 front_blocked=0
|
|
[Eval 93760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-23262522.3 mean_steps=18.0
|
|
[Episode 93770] reward=-13654930646.3 actor_loss=205.2657 critic_loss=176563383483731520.0000 entropy=17.7559 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 93780] reward=-123507822.2 actor_loss=0.2618 critic_loss=123428901925.9259 entropy=17.7545 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 93780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-553993.7 mean_steps=12.5
|
|
[Episode 93790] reward=-386154759.8 actor_loss=0.3061 critic_loss=228230619463680.0000 entropy=17.7519 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 93800] reward=-3460107199.3 actor_loss=100.0291 critic_loss=23632020311638016.0000 entropy=17.7522 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 93800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377840.6 mean_steps=15.6
|
|
[Episode 93810] reward=-146111713.9 actor_loss=0.4000 critic_loss=3432709344460.7998 entropy=17.7481 approx_kl=0.0029 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 93820] reward=-125255931.8 actor_loss=2.5248 critic_loss=355776664917.3333 entropy=17.7381 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 93820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546442.6 mean_steps=14.6
|
|
[Episode 93830] reward=-115705449.0 actor_loss=0.2396 critic_loss=124626973818.8800 entropy=17.7277 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 93840] reward=-116826769.3 actor_loss=0.2818 critic_loss=111312594300.3428 entropy=17.7265 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 93840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487533.9 mean_steps=14.2
|
|
[Episode 93850] reward=-116458938.3 actor_loss=0.2672 critic_loss=113107641048.1778 entropy=17.7224 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 93860] reward=-112616454.3 actor_loss=0.3522 critic_loss=112484669667.5556 entropy=17.7129 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 93860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418779.5 mean_steps=15.5
|
|
[Episode 93870] reward=-116103985.8 actor_loss=0.2403 critic_loss=116589760056.8889 entropy=17.6922 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 93880] reward=-120333588.1 actor_loss=0.2690 critic_loss=125420463589.0526 entropy=17.6895 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 93880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532135.9 mean_steps=13.2
|
|
[Episode 93890] reward=-116756332.2 actor_loss=0.3874 critic_loss=125980492040.2581 entropy=17.6715 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 93900] reward=-114222150.9 actor_loss=0.4380 critic_loss=137492496384.0000 entropy=17.6650 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 93900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-449930.4 mean_steps=12.8
|
|
[Episode 93910] reward=-119106242.5 actor_loss=0.3531 critic_loss=121753612743.1111 entropy=17.6689 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 93920] reward=-120921289.5 actor_loss=0.3069 critic_loss=124580306397.8667 entropy=17.6651 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 93920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485836.5 mean_steps=14.9
|
|
[Episode 93930] reward=-1322231996.6 actor_loss=56.2336 critic_loss=3011471539746315.5000 entropy=17.6606 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 93940] reward=-115697852.9 actor_loss=0.2582 critic_loss=114796829642.1053 entropy=17.6653 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 93940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501039.0 mean_steps=13.2
|
|
[Episode 93950] reward=-305738461.4 actor_loss=0.3364 critic_loss=78617969164288.0000 entropy=17.6616 approx_kl=0.0038 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 93960] reward=-121231264.3 actor_loss=0.3804 critic_loss=121276521403.7333 entropy=17.6586 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 93960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-589944.7 mean_steps=12.1
|
|
[Episode 93970] reward=-120169200.8 actor_loss=0.3212 critic_loss=126409995150.2222 entropy=17.6669 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 93980] reward=-117760684.4 actor_loss=0.3396 critic_loss=119739247001.6000 entropy=17.6665 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 93980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-408876.0 mean_steps=17.6
|
|
[Episode 93990] reward=-122860562.3 actor_loss=0.2914 critic_loss=124357403648.0000 entropy=17.6790 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 94000] reward=-13617709339.4 actor_loss=12.7421 critic_loss=321568788476592128.0000 entropy=17.6798 approx_kl=0.0632 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 94000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-674800.3 mean_steps=11.4
|
|
[Episode 94010] reward=-114738031.5 actor_loss=0.2787 critic_loss=107069881821.8667 entropy=17.6822 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 94020] reward=-119244240.5 actor_loss=0.3322 critic_loss=118779841097.1429 entropy=17.6808 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 94020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519791.0 mean_steps=14.2
|
|
[Episode 94030] reward=-124308238.1 actor_loss=0.2600 critic_loss=300055819605.3333 entropy=17.6974 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 94040] reward=-116131145.0 actor_loss=0.2891 critic_loss=130303741952.0000 entropy=17.6839 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 94040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591039.5 mean_steps=12.8
|
|
[Episode 94050] reward=-119848954.5 actor_loss=0.2103 critic_loss=120385134409.9556 entropy=17.6766 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 94060] reward=-120517151.2 actor_loss=0.2626 critic_loss=120772737210.1818 entropy=17.6804 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 94060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586419.1 mean_steps=12.7
|
|
[Episode 94070] reward=-123142358.2 actor_loss=0.2194 critic_loss=175300198400.0000 entropy=17.6751 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 94080] reward=-115548522.9 actor_loss=0.4388 critic_loss=134795046671.0588 entropy=17.6663 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 94080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530408.9 mean_steps=13.3
|
|
[Episode 94090] reward=-118863456.1 actor_loss=0.2559 critic_loss=116415665052.9032 entropy=17.6726 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 94100] reward=-120808413.6 actor_loss=0.1497 critic_loss=119614564761.6000 entropy=17.6554 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 94100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446656.0 mean_steps=15.5
|
|
[Episode 94110] reward=-120674095.2 actor_loss=0.3541 critic_loss=115521306624.0000 entropy=17.6559 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 94120] reward=-118193279.2 actor_loss=0.2697 critic_loss=122391648044.1379 entropy=17.6640 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 94120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532567.6 mean_steps=14.1
|
|
[Episode 94130] reward=-122753323.7 actor_loss=0.2211 critic_loss=142694180864.0000 entropy=17.6662 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 94140] reward=-117268452.3 actor_loss=0.3157 critic_loss=121806657945.6000 entropy=17.6730 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 94140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510842.0 mean_steps=14.2
|
|
[Episode 94150] reward=-158645814.3 actor_loss=0.3266 critic_loss=5941506746210.4619 entropy=17.6844 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 94160] reward=-124498174.7 actor_loss=0.3399 critic_loss=151449514959.2381 entropy=17.6961 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 94160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485632.1 mean_steps=14.9
|
|
[Episode 94170] reward=-118570653.5 actor_loss=0.2707 critic_loss=114889011734.2609 entropy=17.6887 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 94180] reward=-117555228.5 actor_loss=0.2426 critic_loss=121451082410.6667 entropy=17.6952 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 94180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-478501.5 mean_steps=12.9
|
|
[Episode 94190] reward=-120151560.5 actor_loss=0.3076 critic_loss=123946688954.8108 entropy=17.6871 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 94200] reward=-121280949.6 actor_loss=0.2964 critic_loss=176390720000.0000 entropy=17.6819 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 94200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616921.0 mean_steps=13.1
|
|
[Episode 94210] reward=-115314448.3 actor_loss=0.3245 critic_loss=121880164509.5385 entropy=17.6826 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 94220] reward=-115520183.8 actor_loss=0.3427 critic_loss=114612915638.8571 entropy=17.6731 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 94220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492269.4 mean_steps=14.3
|
|
[Episode 94230] reward=-118906237.4 actor_loss=0.2332 critic_loss=116641402971.0222 entropy=17.6642 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 94240] reward=-120302454.1 actor_loss=0.2706 critic_loss=126567058195.6923 entropy=17.6558 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 94240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501239.4 mean_steps=13.9
|
|
[Episode 94250] reward=-113578668.5 actor_loss=0.3135 critic_loss=109170948815.5676 entropy=17.6829 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 94260] reward=-121587695.4 actor_loss=0.2136 critic_loss=121615973814.8571 entropy=17.6740 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 94260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487023.5 mean_steps=14.8
|
|
[Episode 94270] reward=-120998432.3 actor_loss=0.2590 critic_loss=123329074517.3333 entropy=17.6619 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 94280] reward=-115023353.1 actor_loss=0.3499 critic_loss=111040507904.0000 entropy=17.6536 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 94280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-536602.9 mean_steps=12.6
|
|
[Episode 94290] reward=-119129281.9 actor_loss=0.3391 critic_loss=117354452805.8182 entropy=17.6630 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 94300] reward=-122957341.7 actor_loss=0.2176 critic_loss=122621659225.0435 entropy=17.6652 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 94300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485489.0 mean_steps=14.1
|
|
[Episode 94310] reward=-121529653.5 actor_loss=0.2333 critic_loss=128334017877.3333 entropy=17.6719 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 94320] reward=-120603518.3 actor_loss=0.2944 critic_loss=114025742726.0952 entropy=17.6611 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 94320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-688667.9 mean_steps=12.1
|
|
[Episode 94330] reward=-118179343.0 actor_loss=0.3589 critic_loss=122075282909.8667 entropy=17.6553 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 94340] reward=-123257685.4 actor_loss=0.2339 critic_loss=126696089930.3226 entropy=17.6608 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 94340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624851.7 mean_steps=13.1
|
|
[Episode 94350] reward=-119320694.4 actor_loss=0.3069 critic_loss=121397085649.4545 entropy=17.6598 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 94360] reward=-110796503.3 actor_loss=0.3366 critic_loss=112971539038.8148 entropy=17.6721 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 94360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-585207.4 mean_steps=11.9
|
|
[Episode 94370] reward=-119012659.1 actor_loss=0.2462 critic_loss=115317181160.7273 entropy=17.6738 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 94380] reward=-120156447.9 actor_loss=0.2568 critic_loss=122418114800.9412 entropy=17.6753 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 94380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532474.3 mean_steps=13.2
|
|
[Episode 94390] reward=-118707112.2 actor_loss=0.3307 critic_loss=117895001819.4286 entropy=17.6639 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 94400] reward=-119962073.6 actor_loss=0.2259 critic_loss=117603932914.5263 entropy=17.6546 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 94400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545490.2 mean_steps=14.6
|
|
[Episode 94410] reward=-117209179.3 actor_loss=0.2172 critic_loss=118152962612.9655 entropy=17.6593 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 94420] reward=-120889869.1 actor_loss=0.2344 critic_loss=115794311115.4872 entropy=17.6466 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 94420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-518911.8 mean_steps=12.1
|
|
[Episode 94430] reward=-117927002.7 actor_loss=0.3843 critic_loss=121214233434.8387 entropy=17.6419 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 94440] reward=-121217538.9 actor_loss=0.2830 critic_loss=121081495552.0000 entropy=17.6540 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 94440] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-696136.2 mean_steps=11.4
|
|
[Episode 94450] reward=-118031304.8 actor_loss=0.3230 critic_loss=122240411648.0000 entropy=17.6545 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 94460] reward=-119362288.1 actor_loss=0.2772 critic_loss=118200121389.5111 entropy=17.6544 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 94460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-354757.6 mean_steps=16.1
|
|
[Episode 94470] reward=-116455482.7 actor_loss=0.2553 critic_loss=110786265816.1778 entropy=17.6659 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 94480] reward=-119990751.2 actor_loss=0.3839 critic_loss=117752220732.2353 entropy=17.6595 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 94480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508100.2 mean_steps=14.0
|
|
[Episode 94490] reward=-117516586.0 actor_loss=0.2796 critic_loss=115441490875.7333 entropy=17.6672 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 94500] reward=-114227755.8 actor_loss=0.4083 critic_loss=112917808437.5814 entropy=17.6783 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 94500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486176.8 mean_steps=15.1
|
|
[Episode 94510] reward=-114658622.9 actor_loss=0.3561 critic_loss=116002325390.2222 entropy=17.6757 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 94520] reward=-121493190.2 actor_loss=0.3863 critic_loss=117208312651.2941 entropy=17.6857 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 94520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-610334.0 mean_steps=13.6
|
|
[Episode 94530] reward=-122384601.8 actor_loss=0.2000 critic_loss=124998462841.2632 entropy=17.6750 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 94540] reward=-120060677.0 actor_loss=0.3477 critic_loss=120456937881.6000 entropy=17.6675 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 94540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-440834.7 mean_steps=16.4
|
|
[Episode 94550] reward=-117638285.1 actor_loss=0.3630 critic_loss=197857537024.0000 entropy=17.6690 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 94560] reward=-115038334.2 actor_loss=0.3038 critic_loss=121235694114.1333 entropy=17.6626 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 94560] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-734448.2 mean_steps=10.2
|
|
[Episode 94570] reward=-116351985.9 actor_loss=0.2371 critic_loss=134621166508.9730 entropy=17.6636 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 94580] reward=-115720774.7 actor_loss=0.3588 critic_loss=115879011879.3846 entropy=17.6512 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 94580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463919.6 mean_steps=15.1
|
|
[Episode 94590] reward=-119756568.1 actor_loss=0.3197 critic_loss=116465870968.4706 entropy=17.6530 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 94600] reward=-115377504.1 actor_loss=0.1946 critic_loss=115281030771.6129 entropy=17.6574 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 94600] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-643042.7 mean_steps=11.2
|
|
[Episode 94610] reward=-115417659.8 actor_loss=0.3707 critic_loss=109780745240.9756 entropy=17.6611 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 94620] reward=-118395034.4 actor_loss=0.3630 critic_loss=115407567360.0000 entropy=17.6493 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 94620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-402581.1 mean_steps=14.3
|
|
[Episode 94630] reward=-120827524.2 actor_loss=0.2270 critic_loss=122287342828.3077 entropy=17.6393 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 94640] reward=-120751672.1 actor_loss=0.3522 critic_loss=127385979418.9474 entropy=17.6489 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 94640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387748.7 mean_steps=15.2
|
|
[Episode 94650] reward=-119993203.9 actor_loss=0.2471 critic_loss=119685320463.0588 entropy=17.6435 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 94660] reward=-120323195.1 actor_loss=0.2735 critic_loss=118504949584.4571 entropy=17.6485 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 94660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610591.4 mean_steps=12.7
|
|
[Episode 94670] reward=-123600152.9 actor_loss=0.3329 critic_loss=121162935395.0968 entropy=17.6583 approx_kl=0.0116 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 94680] reward=-113631042.2 actor_loss=0.3958 critic_loss=111276895800.8889 entropy=17.6461 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 94680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537401.0 mean_steps=13.7
|
|
[Episode 94690] reward=-118480463.8 actor_loss=0.2863 critic_loss=114817969720.8889 entropy=17.6343 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 94700] reward=-117414988.5 actor_loss=0.3420 critic_loss=128559611904.0000 entropy=17.6246 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 94700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-406145.2 mean_steps=16.4
|
|
[Episode 94710] reward=-113734002.1 actor_loss=0.3479 critic_loss=126264944152.3810 entropy=17.6239 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 94720] reward=-121963029.5 actor_loss=0.3264 critic_loss=118139417551.2381 entropy=17.6225 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 94720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440535.1 mean_steps=14.2
|
|
[Episode 94730] reward=-118011508.3 actor_loss=0.3365 critic_loss=111410313728.0000 entropy=17.6219 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 94740] reward=-119111388.1 actor_loss=0.2922 critic_loss=111516868208.3902 entropy=17.6228 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 94740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-399001.1 mean_steps=14.5
|
|
[Episode 94750] reward=-115967388.2 actor_loss=0.3543 critic_loss=113428384587.2941 entropy=17.6202 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 94760] reward=-114064232.9 actor_loss=0.4153 critic_loss=119430998574.5455 entropy=17.5965 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 94760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497470.9 mean_steps=15.2
|
|
[Episode 94770] reward=-119747332.3 actor_loss=0.2720 critic_loss=130102849851.0769 entropy=17.5967 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 94780] reward=-117775148.0 actor_loss=0.3870 critic_loss=115628044146.7586 entropy=17.5892 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 94780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371875.4 mean_steps=15.8
|
|
[Episode 94790] reward=-117853694.4 actor_loss=0.3731 critic_loss=133923537327.1579 entropy=17.6047 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 94800] reward=-116764192.1 actor_loss=0.2529 critic_loss=119099511506.8235 entropy=17.6039 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 94800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570019.8 mean_steps=13.8
|
|
[Episode 94810] reward=-118266083.5 actor_loss=0.2329 critic_loss=114021769557.3333 entropy=17.6081 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 94820] reward=-119256946.6 actor_loss=0.3054 critic_loss=121227578026.6667 entropy=17.5989 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 94820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432229.7 mean_steps=14.5
|
|
[Episode 94830] reward=-121458983.7 actor_loss=0.3359 critic_loss=136376742863.2381 entropy=17.6259 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 94840] reward=-118435938.5 actor_loss=0.2656 critic_loss=122624256409.6000 entropy=17.6176 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 94840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476268.1 mean_steps=13.8
|
|
[Episode 94850] reward=-113648662.9 actor_loss=0.3011 critic_loss=118936873290.3226 entropy=17.6255 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 94860] reward=-117669130.6 actor_loss=0.3224 critic_loss=120770716020.3636 entropy=17.6181 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 94860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527193.8 mean_steps=14.4
|
|
[Episode 94870] reward=-114839209.4 actor_loss=0.3913 critic_loss=116799522745.3793 entropy=17.6231 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 94880] reward=-115078343.9 actor_loss=0.3842 critic_loss=115179831478.0444 entropy=17.6240 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 94880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-585783.9 mean_steps=11.9
|
|
[Episode 94890] reward=-114767718.3 actor_loss=0.3132 critic_loss=124284983296.0000 entropy=17.6126 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 94900] reward=-113798504.0 actor_loss=0.3642 critic_loss=121578219292.4444 entropy=17.6186 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 94900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-605969.2 mean_steps=14.5
|
|
[Episode 94910] reward=-118227660.5 actor_loss=0.2994 critic_loss=117380705328.7619 entropy=17.6180 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 94920] reward=-113080696.9 actor_loss=0.3195 critic_loss=112463510823.8222 entropy=17.6326 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 94920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450835.6 mean_steps=14.5
|
|
[Episode 94930] reward=-111022296.2 actor_loss=0.3551 critic_loss=108759960689.7778 entropy=17.6234 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 94940] reward=-116107458.8 actor_loss=0.3780 critic_loss=114669448396.8000 entropy=17.6243 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 94940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527224.6 mean_steps=13.3
|
|
[Episode 94950] reward=-121458312.0 actor_loss=0.2872 critic_loss=120980200691.8095 entropy=17.6182 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 94960] reward=-116737596.4 actor_loss=0.3337 critic_loss=115952204003.5556 entropy=17.6024 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 94960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455345.6 mean_steps=14.7
|
|
[Episode 94970] reward=-118662518.9 actor_loss=0.3419 critic_loss=119350714368.0000 entropy=17.5959 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 94980] reward=-114653934.0 actor_loss=0.3429 critic_loss=119087197345.6842 entropy=17.6026 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 94980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600416.1 mean_steps=12.7
|
|
[Episode 94990] reward=-116297141.4 actor_loss=0.2333 critic_loss=107364401334.0444 entropy=17.5969 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 95000] reward=-115935906.0 actor_loss=0.2785 critic_loss=115848637192.8276 entropy=17.5866 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 95000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484648.5 mean_steps=14.0
|
|
[Episode 95010] reward=-120726895.9 actor_loss=0.2425 critic_loss=116890030838.5185 entropy=17.6000 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 95020] reward=-117406442.2 actor_loss=0.2475 critic_loss=127806939864.1778 entropy=17.5841 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 95020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-481892.4 mean_steps=16.1
|
|
[Episode 95030] reward=-119125404.5 actor_loss=0.1961 critic_loss=127318266096.9412 entropy=17.5858 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 95040] reward=-115171493.6 actor_loss=0.4378 critic_loss=110276243729.0667 entropy=17.5911 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 95040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519328.7 mean_steps=14.1
|
|
[Episode 95050] reward=-115041074.0 actor_loss=0.3495 critic_loss=113444543237.6889 entropy=17.5892 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 95060] reward=-117929984.8 actor_loss=0.3769 critic_loss=126255322908.4444 entropy=17.6136 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 95060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482089.0 mean_steps=13.7
|
|
[Episode 95070] reward=-115943679.8 actor_loss=0.2835 critic_loss=118073042688.0000 entropy=17.6088 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 95080] reward=-121878608.9 actor_loss=0.2978 critic_loss=117831929760.7442 entropy=17.5948 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 95080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-672604.3 mean_steps=12.4
|
|
[Episode 95090] reward=-118044144.7 actor_loss=0.3143 critic_loss=125167652044.8000 entropy=17.5806 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 95100] reward=-117553663.7 actor_loss=0.2589 critic_loss=110959715441.7778 entropy=17.5752 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 95100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-621904.3 mean_steps=12.7
|
|
[Episode 95110] reward=-119921502.3 actor_loss=0.3801 critic_loss=117484894253.5111 entropy=17.5635 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 95120] reward=-113394722.9 actor_loss=0.3434 critic_loss=117869048361.5135 entropy=17.5816 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 95120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641170.0 mean_steps=12.2
|
|
[Episode 95130] reward=-113260183.4 actor_loss=0.3900 critic_loss=116053522295.4667 entropy=17.5666 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 95140] reward=-114009426.1 actor_loss=0.3171 critic_loss=113634521718.1538 entropy=17.5521 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 95140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526557.3 mean_steps=14.7
|
|
[Episode 95150] reward=-114743940.0 actor_loss=0.2807 critic_loss=127876868778.6667 entropy=17.5592 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 95160] reward=-116155183.6 actor_loss=0.2427 critic_loss=112458580178.0513 entropy=17.5582 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 95160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599234.3 mean_steps=12.6
|
|
[Episode 95170] reward=-4828281176.8 actor_loss=12.3830 critic_loss=48504933984501760.0000 entropy=17.5461 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 95180] reward=-251741993.5 actor_loss=0.5336 critic_loss=57423349829847.5781 entropy=17.5344 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 95180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399327.1 mean_steps=15.1
|
|
[Episode 95190] reward=-112527297.8 actor_loss=0.2937 critic_loss=110484251560.2286 entropy=17.5412 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 95200] reward=-560231584.0 actor_loss=21.7236 critic_loss=323180964235556.5625 entropy=17.5448 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 95200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580308.2 mean_steps=13.7
|
|
[Episode 95210] reward=-510767252.1 actor_loss=143.2209 critic_loss=202251165499392.0000 entropy=17.5742 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1016 front_blocked=0
|
|
[Episode 95220] reward=-119040811.6 actor_loss=0.3914 critic_loss=129748542184.7273 entropy=17.5892 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 95220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-661281.0 mean_steps=13.3
|
|
[Episode 95230] reward=-119068363.5 actor_loss=0.3075 critic_loss=119384638610.2857 entropy=17.5941 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 95240] reward=-118450000.0 actor_loss=0.2692 critic_loss=115653094786.8445 entropy=17.5846 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 95240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462514.0 mean_steps=14.8
|
|
[Episode 95250] reward=-119532963.4 actor_loss=0.2566 critic_loss=119121794571.3778 entropy=17.6014 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 95260] reward=-7069078704.3 actor_loss=244.3522 critic_loss=75698377685204992.0000 entropy=17.6210 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1048 front_blocked=0
|
|
[Eval 95260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642453.2 mean_steps=12.6
|
|
[Episode 95270] reward=-121583822.7 actor_loss=0.2504 critic_loss=118138879271.8222 entropy=17.6000 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 95280] reward=-116304354.1 actor_loss=0.3441 critic_loss=116485389243.7333 entropy=17.6073 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 95280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539119.1 mean_steps=13.6
|
|
[Episode 95290] reward=-73049669715.6 actor_loss=308.1007 critic_loss=2564498807500832768.0000 entropy=17.6046 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Episode 95300] reward=-302246760.1 actor_loss=0.3200 critic_loss=76336344212408.5625 entropy=17.6024 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 95300] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-297556.5 mean_steps=17.2
|
|
[Episode 95310] reward=-264527514.9 actor_loss=0.4797 critic_loss=68238145748992.0000 entropy=17.6038 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 95320] reward=-4870961760.0 actor_loss=115.1880 critic_loss=51986817776609600.0000 entropy=17.6077 approx_kl=-0.0014 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 95320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534985.5 mean_steps=13.2
|
|
[Episode 95330] reward=-132934259.7 actor_loss=0.3372 critic_loss=1058939649084.2354 entropy=17.6147 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 95340] reward=-905711025.9 actor_loss=0.7152 critic_loss=1382446343074611.2500 entropy=17.6265 approx_kl=0.0257 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 95340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-26204740.0 mean_steps=22.8
|
|
[Episode 95350] reward=-210523927.1 actor_loss=0.5396 critic_loss=31141345783629.9141 entropy=17.6334 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 95360] reward=-3034518110.5 actor_loss=131.1826 critic_loss=18679591233126400.0000 entropy=17.6381 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 95360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436401.0 mean_steps=15.5
|
|
[Episode 95370] reward=-2628451988.1 actor_loss=33.8616 critic_loss=10657263770752342.0000 entropy=17.6668 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 95380] reward=-113872814.0 actor_loss=0.2956 critic_loss=112325125552.3556 entropy=17.6582 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 95380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495543.3 mean_steps=14.8
|
|
[Episode 95390] reward=-1670055886.8 actor_loss=0.4195 critic_loss=6694126256495002.0000 entropy=17.6519 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 95400] reward=-119712946.4 actor_loss=0.1755 critic_loss=174571184128.0000 entropy=17.6587 approx_kl=0.0039 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 95400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394717.8 mean_steps=16.3
|
|
[Episode 95410] reward=-119229560.8 actor_loss=0.2501 critic_loss=210262451244.5217 entropy=17.6657 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 95420] reward=-121183992.8 actor_loss=0.3326 critic_loss=120464479085.7143 entropy=17.6674 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 95420] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-660196.3 mean_steps=11.2
|
|
[Episode 95430] reward=-119036540.9 actor_loss=0.3201 critic_loss=135922440601.6000 entropy=17.6656 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 95440] reward=-120782530.1 actor_loss=0.3756 critic_loss=209684239453.0909 entropy=17.6711 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 95440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560806.3 mean_steps=13.7
|
|
[Episode 95450] reward=-479736653.0 actor_loss=0.3256 critic_loss=367666043337262.5625 entropy=17.6826 approx_kl=0.0028 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 95460] reward=-117992519.5 actor_loss=0.3350 critic_loss=123758750666.1053 entropy=17.6846 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 95460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433803.3 mean_steps=15.7
|
|
[Episode 95470] reward=-114978893.4 actor_loss=0.3544 critic_loss=109400189477.4634 entropy=17.6857 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 95480] reward=-123905068.9 actor_loss=0.2564 critic_loss=389358394709.3333 entropy=17.6899 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 95480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409478.6 mean_steps=15.1
|
|
[Episode 95490] reward=-115781665.2 actor_loss=0.2510 critic_loss=120268817408.0000 entropy=17.6847 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 95500] reward=-123569303.4 actor_loss=0.2026 critic_loss=218829073066.6667 entropy=17.6800 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 95500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501844.2 mean_steps=15.2
|
|
[Episode 95510] reward=-115406399.5 actor_loss=0.2611 critic_loss=113669969783.4667 entropy=17.6661 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 95520] reward=-116598405.5 actor_loss=0.3071 critic_loss=120849230362.9474 entropy=17.6696 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 95520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-358142.6 mean_steps=17.1
|
|
[Episode 95530] reward=-113088711.9 actor_loss=0.3802 critic_loss=118430032616.7273 entropy=17.6634 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 95540] reward=-113421794.7 actor_loss=0.3140 critic_loss=112175138548.8696 entropy=17.6647 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 95540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560319.7 mean_steps=12.6
|
|
[Episode 95550] reward=-114905760.5 actor_loss=0.2764 critic_loss=117183099933.2571 entropy=17.6620 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 95560] reward=-115721526.7 actor_loss=0.3092 critic_loss=117522489526.0444 entropy=17.6684 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 95560] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-309848.4 mean_steps=17.4
|
|
[Episode 95570] reward=-120524328.9 actor_loss=0.2752 critic_loss=122881424976.8421 entropy=17.6624 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 95580] reward=-120416801.5 actor_loss=0.2498 critic_loss=123139115690.6667 entropy=17.6546 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 95580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449544.7 mean_steps=14.7
|
|
[Episode 95590] reward=-115583047.0 actor_loss=0.3108 critic_loss=110311289651.2000 entropy=17.6577 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 95600] reward=-121032870.7 actor_loss=0.2459 critic_loss=118403038248.9600 entropy=17.6731 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 95600] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-565984.8 mean_steps=10.9
|
|
[Episode 95610] reward=-118840231.9 actor_loss=0.2961 critic_loss=111976142517.6774 entropy=17.6894 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 95620] reward=-121960282.2 actor_loss=0.2877 critic_loss=130173240500.7059 entropy=17.6838 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 95620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363823.9 mean_steps=16.1
|
|
[Episode 95630] reward=-121653387.7 actor_loss=0.3548 critic_loss=145006616985.6000 entropy=17.6849 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 95640] reward=-119526611.5 actor_loss=0.3011 critic_loss=127367050581.3333 entropy=17.6632 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 95640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451694.0 mean_steps=15.7
|
|
[Episode 95650] reward=-118135797.8 actor_loss=0.2675 critic_loss=182525095480.8889 entropy=17.6614 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 95660] reward=-112527315.4 actor_loss=0.3560 critic_loss=116732271656.9600 entropy=17.6813 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 95660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-585773.9 mean_steps=14.0
|
|
[Episode 95670] reward=-115667448.9 actor_loss=0.3106 critic_loss=112669499392.0000 entropy=17.6673 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 95680] reward=-119928005.1 actor_loss=0.3416 critic_loss=120970094819.5556 entropy=17.6641 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 95680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473927.4 mean_steps=14.8
|
|
[Episode 95690] reward=-119050984.1 actor_loss=0.2747 critic_loss=118969905798.7368 entropy=17.6580 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 95700] reward=-119573319.8 actor_loss=0.3943 critic_loss=125264864768.0000 entropy=17.6624 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 95700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-578720.3 mean_steps=14.7
|
|
[Episode 95710] reward=-113456041.3 actor_loss=0.2566 critic_loss=124182539753.7391 entropy=17.6653 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 95720] reward=-113058269.1 actor_loss=0.2794 critic_loss=118973964580.5714 entropy=17.6448 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 95720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533600.2 mean_steps=14.2
|
|
[Episode 95730] reward=-114420662.8 actor_loss=0.4421 critic_loss=108717121145.9048 entropy=17.6307 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 95740] reward=-114501110.0 actor_loss=0.3420 critic_loss=109319467008.0000 entropy=17.6227 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 95740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-649832.1 mean_steps=12.3
|
|
[Episode 95750] reward=-116291312.4 actor_loss=0.3409 critic_loss=116042544250.8800 entropy=17.6298 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 95760] reward=-114503024.0 actor_loss=0.1922 critic_loss=113035856083.8621 entropy=17.6226 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 95760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472592.4 mean_steps=15.0
|
|
[Episode 95770] reward=-118709766.4 actor_loss=0.3135 critic_loss=127878916960.7111 entropy=17.6096 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 95780] reward=-115933483.9 actor_loss=0.1825 critic_loss=112816444757.3333 entropy=17.5806 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 95780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399325.6 mean_steps=15.7
|
|
[Episode 95790] reward=-120428277.2 actor_loss=0.4004 critic_loss=120736548727.4667 entropy=17.5733 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 95800] reward=-119942269.6 actor_loss=0.3136 critic_loss=208923151564.8000 entropy=17.5744 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 95800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505870.2 mean_steps=13.4
|
|
[Episode 95810] reward=-120628880.5 actor_loss=0.3468 critic_loss=156120219179.8857 entropy=17.5778 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 95820] reward=-114991032.0 actor_loss=0.3199 critic_loss=114467293769.1429 entropy=17.5774 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 95820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594117.5 mean_steps=12.8
|
|
[Episode 95830] reward=-120917962.1 actor_loss=0.2638 critic_loss=136724896944.5517 entropy=17.5824 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 95840] reward=-117674800.2 actor_loss=0.2584 critic_loss=118207966646.8571 entropy=17.5731 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 95840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449938.5 mean_steps=15.6
|
|
[Episode 95850] reward=-118204155.6 actor_loss=0.2813 critic_loss=138452112725.3333 entropy=17.5653 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 95860] reward=-116385804.8 actor_loss=0.4081 critic_loss=143114612640.7442 entropy=17.5637 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 95860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491654.5 mean_steps=15.3
|
|
[Episode 95870] reward=-115532763.0 actor_loss=0.3840 critic_loss=116926327163.2593 entropy=17.5491 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 95880] reward=-122251434.6 actor_loss=0.2598 critic_loss=126274628643.3103 entropy=17.5493 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 95880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536380.7 mean_steps=14.6
|
|
[Episode 95890] reward=-118150674.8 actor_loss=0.3624 critic_loss=133566588518.4000 entropy=17.5678 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 95900] reward=-115913512.4 actor_loss=0.2589 critic_loss=116529474030.3448 entropy=17.5673 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 95900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462779.1 mean_steps=14.1
|
|
[Episode 95910] reward=-115799172.9 actor_loss=0.3184 critic_loss=115649136412.4444 entropy=17.5756 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 95920] reward=-115556747.6 actor_loss=0.3119 critic_loss=121010739200.0000 entropy=17.5981 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 95920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-407272.1 mean_steps=15.4
|
|
[Episode 95930] reward=-118740156.9 actor_loss=0.3072 critic_loss=118953340342.8571 entropy=17.6021 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 95940] reward=-120198063.0 actor_loss=0.3756 critic_loss=126545261568.0000 entropy=17.6062 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 95940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471873.5 mean_steps=14.7
|
|
[Episode 95950] reward=-118849462.5 actor_loss=0.2923 critic_loss=119220202882.8445 entropy=17.6033 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 95960] reward=-118610669.2 actor_loss=0.2858 critic_loss=121538166784.0000 entropy=17.6084 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 95960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486841.6 mean_steps=14.1
|
|
[Episode 95970] reward=-113928232.6 actor_loss=0.3410 critic_loss=111893413705.9556 entropy=17.5984 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 95980] reward=-115507574.6 actor_loss=0.3016 critic_loss=122719844608.0000 entropy=17.5760 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 95980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492638.3 mean_steps=14.2
|
|
[Episode 95990] reward=-117353553.8 actor_loss=0.3213 critic_loss=119467244657.7778 entropy=17.5836 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 96000] reward=-117972155.1 actor_loss=0.1787 critic_loss=113135264689.2308 entropy=17.5718 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 96000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487503.2 mean_steps=15.2
|
|
[Episode 96010] reward=-118401419.6 actor_loss=0.2269 critic_loss=112738512076.8000 entropy=17.5675 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 96020] reward=-121695768.9 actor_loss=0.2819 critic_loss=120097293198.2222 entropy=17.5638 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 96020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-355698.7 mean_steps=16.7
|
|
[Episode 96030] reward=-118894765.4 actor_loss=0.3672 critic_loss=118152895055.6444 entropy=17.5598 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 96040] reward=-115953026.1 actor_loss=0.3124 critic_loss=113919664332.8000 entropy=17.5643 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 96040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435534.2 mean_steps=15.6
|
|
[Episode 96050] reward=-115739771.9 actor_loss=0.3040 critic_loss=120604125063.5294 entropy=17.5513 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 96060] reward=-119507730.3 actor_loss=0.3042 critic_loss=136689452178.2857 entropy=17.5416 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 96060] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-277344.3 mean_steps=17.6
|
|
[Episode 96070] reward=-119289776.0 actor_loss=0.2550 critic_loss=118689192960.0000 entropy=17.5276 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 96080] reward=-116856901.0 actor_loss=0.2034 critic_loss=119002820061.8667 entropy=17.5179 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 96080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584203.7 mean_steps=13.0
|
|
[Episode 96090] reward=-114607659.6 actor_loss=0.2908 critic_loss=111805304591.0588 entropy=17.5088 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 96100] reward=-119823873.7 actor_loss=0.3328 critic_loss=117299126272.0000 entropy=17.4977 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 96100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-535011.0 mean_steps=15.2
|
|
[Episode 96110] reward=-121372965.5 actor_loss=0.2499 critic_loss=134200311417.9048 entropy=17.4917 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 96120] reward=-113680249.1 actor_loss=0.3743 critic_loss=112360175908.5714 entropy=17.4916 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 96120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460216.6 mean_steps=14.8
|
|
[Episode 96130] reward=-122299616.7 actor_loss=0.3186 critic_loss=246047129600.0000 entropy=17.4884 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 96140] reward=-113464405.7 actor_loss=0.3837 critic_loss=110812797048.4706 entropy=17.4887 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 96140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604882.2 mean_steps=13.0
|
|
[Episode 96150] reward=-120780515.8 actor_loss=0.3948 critic_loss=115771952696.8889 entropy=17.5037 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 96160] reward=-118795556.4 actor_loss=0.3754 critic_loss=111137435863.5789 entropy=17.5062 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 96160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478398.2 mean_steps=14.4
|
|
[Episode 96170] reward=-114279469.1 actor_loss=0.2007 critic_loss=113313260885.3333 entropy=17.5094 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 96180] reward=-112498503.8 actor_loss=0.3369 critic_loss=119441569792.0000 entropy=17.4940 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 96180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532463.4 mean_steps=13.4
|
|
[Episode 96190] reward=-115515995.0 actor_loss=0.2892 critic_loss=121682417891.5556 entropy=17.5044 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 96200] reward=-120968127.8 actor_loss=0.3204 critic_loss=119067626259.6923 entropy=17.4939 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 96200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550428.4 mean_steps=14.2
|
|
[Episode 96210] reward=-114738488.5 actor_loss=0.3322 critic_loss=109477108297.1429 entropy=17.5076 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 96220] reward=-110209424.0 actor_loss=0.3403 critic_loss=106949337088.0000 entropy=17.5043 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 96220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530882.9 mean_steps=14.8
|
|
[Episode 96230] reward=-214741662.5 actor_loss=0.6186 critic_loss=33101499643403.3789 entropy=17.4978 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 96240] reward=-114673501.2 actor_loss=0.2940 critic_loss=114367642477.7143 entropy=17.5132 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 96240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454225.5 mean_steps=14.9
|
|
[Episode 96250] reward=-119737469.0 actor_loss=0.2605 critic_loss=120302582897.7778 entropy=17.5088 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 96260] reward=-116556496.0 actor_loss=0.2817 critic_loss=113340450718.4762 entropy=17.4949 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 96260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448355.7 mean_steps=16.0
|
|
[Episode 96270] reward=-115656256.0 actor_loss=0.3531 critic_loss=111336015644.4444 entropy=17.5056 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 96280] reward=-116614372.0 actor_loss=0.3150 critic_loss=109988976503.4667 entropy=17.5088 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 96280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560935.4 mean_steps=13.9
|
|
[Episode 96290] reward=-116439362.6 actor_loss=0.2762 critic_loss=114196658176.0000 entropy=17.5079 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 96300] reward=-115675732.7 actor_loss=0.2950 critic_loss=111225998825.7391 entropy=17.5020 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 96300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-603170.7 mean_steps=13.6
|
|
[Episode 96310] reward=-112858869.1 actor_loss=0.3812 critic_loss=109853064192.0000 entropy=17.4898 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 96320] reward=-118387882.3 actor_loss=0.2392 critic_loss=117633439382.5882 entropy=17.4986 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 96320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409211.2 mean_steps=16.4
|
|
[Episode 96330] reward=-115935299.4 actor_loss=0.2633 critic_loss=110426238156.8000 entropy=17.4961 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 96340] reward=-6331443754.6 actor_loss=0.3807 critic_loss=80074720425057056.0000 entropy=17.5103 approx_kl=0.0121 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 96340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532461.8 mean_steps=13.3
|
|
[Episode 96350] reward=-117328037.2 actor_loss=0.2713 critic_loss=110866776064.0000 entropy=17.5098 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 96360] reward=-114908499.2 actor_loss=0.3072 critic_loss=110723376308.7059 entropy=17.5025 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 96360] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-367867.5 mean_steps=17.4
|
|
[Episode 96370] reward=-115286685.2 actor_loss=0.2596 critic_loss=110300435078.7368 entropy=17.5128 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 96380] reward=-117193400.3 actor_loss=0.3159 critic_loss=114830086144.0000 entropy=17.5022 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 96380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-504636.3 mean_steps=13.3
|
|
[Episode 96390] reward=-120534612.6 actor_loss=0.3624 critic_loss=115837253713.9200 entropy=17.5181 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 96400] reward=-108072736.2 actor_loss=0.4636 critic_loss=109313619968.0000 entropy=17.5114 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 96400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562689.0 mean_steps=12.6
|
|
[Episode 96410] reward=-118500882.0 actor_loss=0.2635 critic_loss=117743882331.0222 entropy=17.5359 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 96420] reward=-117245967.7 actor_loss=0.3621 critic_loss=116985090606.5455 entropy=17.5415 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 96420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528126.6 mean_steps=14.2
|
|
[Episode 96430] reward=-112327010.1 actor_loss=0.2683 critic_loss=111988196190.3158 entropy=17.5591 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 96440] reward=-112955963.8 actor_loss=0.3451 critic_loss=110934436522.6667 entropy=17.5575 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 96440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433356.8 mean_steps=14.9
|
|
[Episode 96450] reward=-118915112.6 actor_loss=0.2629 critic_loss=117857786265.6000 entropy=17.5507 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 96460] reward=-117051050.5 actor_loss=0.3751 critic_loss=113332385109.3333 entropy=17.5568 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 96460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-600353.9 mean_steps=12.1
|
|
[Episode 96470] reward=-112288571.2 actor_loss=0.3590 critic_loss=114021007067.4286 entropy=17.5390 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 96480] reward=-217125845.7 actor_loss=0.1875 critic_loss=24090485447856.5508 entropy=17.5268 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 96480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501047.7 mean_steps=13.6
|
|
[Episode 96490] reward=-133830682.9 actor_loss=0.2717 critic_loss=2639020165984.7109 entropy=17.5226 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 96500] reward=-115092704.4 actor_loss=0.3077 critic_loss=116431818660.9778 entropy=17.5213 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 96500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-678054.8 mean_steps=12.1
|
|
[Episode 96510] reward=-110226369.9 actor_loss=0.2973 critic_loss=104432610553.0811 entropy=17.5190 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 96520] reward=-105222858.4 actor_loss=0.4398 critic_loss=110524865114.3529 entropy=17.5127 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 96520] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-633125.6 mean_steps=10.2
|
|
[Episode 96530] reward=-117923237.0 actor_loss=0.1942 critic_loss=116916511519.2195 entropy=17.5257 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 96540] reward=-17304208909.7 actor_loss=658.4865 critic_loss=201306531639416160.0000 entropy=17.5558 approx_kl=0.0339 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 96540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-97151251.1 mean_steps=19.2
|
|
[Episode 96550] reward=-1466540066.0 actor_loss=0.4137 critic_loss=4573319026180096.0000 entropy=17.5356 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 96560] reward=-108124192.0 actor_loss=0.4107 critic_loss=104514776632.8889 entropy=17.5167 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 96560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-606889.2 mean_steps=11.9
|
|
[Episode 96570] reward=-113785966.4 actor_loss=0.2661 critic_loss=111400761013.6774 entropy=17.5241 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 96580] reward=-5392983027.3 actor_loss=45.7738 critic_loss=35133571072489336.0000 entropy=17.5333 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 96580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563859.1 mean_steps=12.9
|
|
[Episode 96590] reward=-114085941.5 actor_loss=0.2914 critic_loss=114005366404.7407 entropy=17.5315 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 96600] reward=-110577497.7 actor_loss=0.3934 critic_loss=107160803555.5556 entropy=17.5404 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 96600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405759.1 mean_steps=16.4
|
|
[Episode 96610] reward=-312978057.4 actor_loss=65.5927 critic_loss=94417745513378.9062 entropy=17.5431 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 96620] reward=-118361799.6 actor_loss=0.3180 critic_loss=127365650659.5556 entropy=17.5451 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 96620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611265.3 mean_steps=13.2
|
|
[Episode 96630] reward=-2131702896.9 actor_loss=0.4735 critic_loss=9449525349915762.0000 entropy=17.5311 approx_kl=0.0040 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 96640] reward=-112954845.3 actor_loss=0.3896 critic_loss=114621292251.4286 entropy=17.5450 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 96640] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-595593.2 mean_steps=10.7
|
|
[Episode 96650] reward=-118784248.7 actor_loss=0.4163 critic_loss=125040857673.1429 entropy=17.5543 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 96660] reward=-117496664.0 actor_loss=0.2760 critic_loss=132835393536.0000 entropy=17.5585 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 96660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-391099.8 mean_steps=15.5
|
|
[Episode 96670] reward=-116183157.7 actor_loss=0.2952 critic_loss=122320839566.2222 entropy=17.5616 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 96680] reward=-6205095764.3 actor_loss=118.7088 critic_loss=41679897470808152.0000 entropy=17.5548 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.1035 front_blocked=0
|
|
[Eval 96680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635662.2 mean_steps=12.3
|
|
[Episode 96690] reward=-117132637.9 actor_loss=0.3607 critic_loss=330786641861.4857 entropy=17.5424 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 96700] reward=-115740121.0 actor_loss=0.3389 critic_loss=121764607414.8571 entropy=17.5552 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 96700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443015.1 mean_steps=14.9
|
|
[Episode 96710] reward=-114006200.1 actor_loss=0.1840 critic_loss=112237359299.0476 entropy=17.5490 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 96720] reward=-112145013.9 actor_loss=0.3468 critic_loss=177197004117.3333 entropy=17.5488 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 96720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-653721.5 mean_steps=12.3
|
|
[Episode 96730] reward=-119305303.3 actor_loss=0.2977 critic_loss=116830603673.6000 entropy=17.5551 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 96740] reward=-3000630402.5 actor_loss=157.5260 critic_loss=18473258045471312.0000 entropy=17.5661 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Eval 96740] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-652709.0 mean_steps=10.3
|
|
[Episode 96750] reward=-8220441515.3 actor_loss=339.0754 critic_loss=42273769995714192.0000 entropy=17.5710 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 96760] reward=-118017764.7 actor_loss=0.3364 critic_loss=124343408230.4000 entropy=17.5734 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 96760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-610681.1 mean_steps=12.1
|
|
[Episode 96770] reward=-675425381.5 actor_loss=0.1858 critic_loss=821066485039832.1250 entropy=17.5818 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 96780] reward=-2102232299.1 actor_loss=0.4756 critic_loss=6029498455956034.0000 entropy=17.6003 approx_kl=0.0020 kl_stop=1 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 96780] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332833.8 mean_steps=16.8
|
|
[Episode 96790] reward=-5371408713.7 actor_loss=241.3579 critic_loss=33805531127293076.0000 entropy=17.6061 approx_kl=0.0188 kl_stop=1 intervention_rate=0.1113 front_blocked=0
|
|
[Episode 96800] reward=-1945486177.2 actor_loss=0.3562 critic_loss=7429126586839859.0000 entropy=17.6195 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 96800] success_rate=0.250 qp_infeasible_rate=0.600 mean_return=-48601494187.0 mean_steps=491.2
|
|
[Episode 96810] reward=-116318424.1 actor_loss=0.2211 critic_loss=118713108070.4000 entropy=17.6220 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 96820] reward=-9692720651.0 actor_loss=0.6298 critic_loss=95994308114704576.0000 entropy=17.6308 approx_kl=0.0412 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 96820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-512770.3 mean_steps=15.3
|
|
[Episode 96830] reward=-5563648409.3 actor_loss=0.7227 critic_loss=51073735026383256.0000 entropy=17.6435 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1133 front_blocked=0
|
|
[Episode 96840] reward=-118915071.9 actor_loss=0.4145 critic_loss=122185226467.5556 entropy=17.6477 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 96840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534054.3 mean_steps=13.4
|
|
[Episode 96850] reward=-336356599.7 actor_loss=0.9364 critic_loss=153875981591438.2188 entropy=17.6457 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 96860] reward=-114279480.6 actor_loss=0.2651 critic_loss=112649785461.0286 entropy=17.6627 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 96860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492258.1 mean_steps=13.9
|
|
[Episode 96870] reward=-421278590.7 actor_loss=91.1096 critic_loss=262547105369916.9375 entropy=17.6709 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 96880] reward=-552731086.3 actor_loss=0.2855 critic_loss=320181967263425.4375 entropy=17.6836 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 96880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528586.5 mean_steps=14.4
|
|
[Episode 96890] reward=-142662287.1 actor_loss=0.3706 critic_loss=4109010422603.2939 entropy=17.6828 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 96900] reward=-226244347.8 actor_loss=0.3759 critic_loss=46908158858581.3359 entropy=17.6983 approx_kl=0.0041 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 96900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375951.0 mean_steps=16.3
|
|
[Episode 96910] reward=-7967660836.0 actor_loss=0.2164 critic_loss=52076302752115736.0000 entropy=17.7204 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 96920] reward=-120787910.8 actor_loss=0.3181 critic_loss=137758011578.1818 entropy=17.7227 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 96920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468206.4 mean_steps=14.6
|
|
[Episode 96930] reward=-1956771534.1 actor_loss=2.5785 critic_loss=2227327522511985.7500 entropy=17.7171 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 96940] reward=-4669236839.0 actor_loss=19.3347 critic_loss=33739551532908544.0000 entropy=17.7424 approx_kl=0.0308 kl_stop=1 intervention_rate=0.1081 front_blocked=0
|
|
[Eval 96940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543528.2 mean_steps=13.7
|
|
[Episode 96950] reward=-2733536710.5 actor_loss=0.5000 critic_loss=8810302054253804.0000 entropy=17.7486 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1029 front_blocked=0
|
|
[Episode 96960] reward=-1504332647.8 actor_loss=6.1571 critic_loss=2751329474707456.0000 entropy=17.7499 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 96960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-207913565.9 mean_steps=26.1
|
|
[Episode 96970] reward=-289212306.2 actor_loss=0.3614 critic_loss=92635666299835.7344 entropy=17.7569 approx_kl=-0.0005 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 96980] reward=-840808720.3 actor_loss=1.0105 critic_loss=1370821097572644.5000 entropy=17.7526 approx_kl=0.0034 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 96980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-497962.7 mean_steps=13.2
|
|
[Episode 96990] reward=-110985776.0 actor_loss=0.4499 critic_loss=116138211238.9565 entropy=17.7592 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 97000] reward=-1394873194.4 actor_loss=0.3043 critic_loss=3395297616926534.0000 entropy=17.7465 approx_kl=0.0045 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 97000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-547746.9 mean_steps=12.8
|
|
[Episode 97010] reward=-624286311.0 actor_loss=0.4184 critic_loss=646324820464981.3750 entropy=17.7469 approx_kl=0.0044 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 97020] reward=-118643320.9 actor_loss=0.2878 critic_loss=127081391860.8696 entropy=17.7564 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 97020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465448.1 mean_steps=16.1
|
|
[Episode 97030] reward=-324915403.3 actor_loss=37.8376 critic_loss=131676130071893.3281 entropy=17.7556 approx_kl=0.0037 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 97040] reward=-463992923.6 actor_loss=0.2762 critic_loss=339101693771776.0000 entropy=17.7507 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 97040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475178.5 mean_steps=14.8
|
|
[Episode 97050] reward=-112358446.9 actor_loss=0.4650 critic_loss=109059302832.3556 entropy=17.7499 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 97060] reward=-115960941.4 actor_loss=0.2605 critic_loss=149048224790.7556 entropy=17.7310 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 97060] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-718439.1 mean_steps=10.4
|
|
[Episode 97070] reward=-491371315.4 actor_loss=0.3462 critic_loss=145744232911793.2188 entropy=17.7001 approx_kl=0.0035 kl_stop=1 intervention_rate=0.1035 front_blocked=0
|
|
[Episode 97080] reward=-572686018.5 actor_loss=0.3231 critic_loss=584327649820672.0000 entropy=17.6965 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 97080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526518.8 mean_steps=13.8
|
|
[Episode 97090] reward=-113482673.6 actor_loss=0.3206 critic_loss=114402047863.4667 entropy=17.6893 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 97100] reward=-119007655.6 actor_loss=0.3304 critic_loss=218553311049.9556 entropy=17.6851 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 97100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587465.4 mean_steps=12.9
|
|
[Episode 97110] reward=-118912929.9 actor_loss=0.2442 critic_loss=116869139842.8445 entropy=17.6715 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 97120] reward=-114153111.6 actor_loss=0.2813 critic_loss=112742644303.6444 entropy=17.6343 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 97120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430298.4 mean_steps=15.0
|
|
[Episode 97130] reward=-123938300.6 actor_loss=0.3512 critic_loss=386380628332.0889 entropy=17.6394 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 97140] reward=-111502071.4 actor_loss=0.3467 critic_loss=114227629442.8445 entropy=17.6342 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 97140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469220.6 mean_steps=15.0
|
|
[Episode 97150] reward=-117321183.3 actor_loss=0.2936 critic_loss=118106666689.4222 entropy=17.6108 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 97160] reward=-114782086.8 actor_loss=0.2792 critic_loss=107898757120.0000 entropy=17.6056 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 97160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-633674.7 mean_steps=12.2
|
|
[Episode 97170] reward=-321560348.8 actor_loss=0.6218 critic_loss=109821812119961.5938 entropy=17.6054 approx_kl=0.0114 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 97180] reward=-111179056.0 actor_loss=0.4164 critic_loss=105515244369.1707 entropy=17.6130 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 97180] success_rate=0.200 qp_infeasible_rate=0.750 mean_return=-21794819963.3 mean_steps=171.6
|
|
[Episode 97190] reward=-119437589.0 actor_loss=0.2308 critic_loss=124025954082.5946 entropy=17.6120 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 97200] reward=-117369387.0 actor_loss=0.2583 critic_loss=113744921122.1333 entropy=17.6003 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 97200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387780.0 mean_steps=15.8
|
|
[Episode 97210] reward=-116626939.8 actor_loss=0.2845 critic_loss=124187908869.6889 entropy=17.6055 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 97220] reward=-119222136.7 actor_loss=0.2004 critic_loss=302913592832.0000 entropy=17.6163 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 97220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-448376.8 mean_steps=16.9
|
|
[Episode 97230] reward=-117998455.7 actor_loss=0.2592 critic_loss=122616883200.0000 entropy=17.6169 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 97240] reward=-170784483.2 actor_loss=0.4844 critic_loss=7303284064256.0000 entropy=17.6132 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 97240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-556884.3 mean_steps=12.8
|
|
[Episode 97250] reward=-163576453.9 actor_loss=0.2838 critic_loss=7930060100244.6455 entropy=17.6165 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 97260] reward=-297243099.5 actor_loss=0.3435 critic_loss=95211423618389.3281 entropy=17.6276 approx_kl=0.0036 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 97260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626715.1 mean_steps=12.2
|
|
[Episode 97270] reward=-119023021.4 actor_loss=0.2198 critic_loss=123228727705.6000 entropy=17.6144 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 97280] reward=-118372518.5 actor_loss=0.2962 critic_loss=117184222367.2889 entropy=17.6212 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 97280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440150561.4 mean_steps=35.2
|
|
[Episode 97290] reward=-114106906.5 actor_loss=0.3858 critic_loss=112366905662.5778 entropy=17.6155 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 97300] reward=-6232356490.3 actor_loss=22.0497 critic_loss=73193556575926496.0000 entropy=17.6441 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 97300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-8277723100.5 mean_steps=98.3
|
|
[Episode 97310] reward=-117591315.0 actor_loss=0.3449 critic_loss=124993423184.4571 entropy=17.6432 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 97320] reward=-114901515.8 actor_loss=0.2387 critic_loss=128583974365.8667 entropy=17.6538 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 97320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495922.4 mean_steps=15.1
|
|
[Episode 97330] reward=-120734309.2 actor_loss=0.1873 critic_loss=117799265166.2222 entropy=17.6656 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 97340] reward=-121150666.9 actor_loss=0.2301 critic_loss=128947234952.5333 entropy=17.6794 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 97340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-443706.9 mean_steps=12.8
|
|
[Episode 97350] reward=-4287161397.5 actor_loss=0.3980 critic_loss=32569634588657600.0000 entropy=17.6738 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 97360] reward=-1658872835.4 actor_loss=0.3492 critic_loss=5613205459649374.0000 entropy=17.6887 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 97360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-77761947.3 mean_steps=26.0
|
|
[Episode 97370] reward=-112254184.8 actor_loss=0.3334 critic_loss=112988302358.7556 entropy=17.6893 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 97380] reward=-1951687488.8 actor_loss=60.2881 critic_loss=6644841345712128.0000 entropy=17.6889 approx_kl=0.0140 kl_stop=1 intervention_rate=0.1081 front_blocked=0
|
|
[Eval 97380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332178.1 mean_steps=16.9
|
|
[Episode 97390] reward=-117172900.2 actor_loss=0.2682 critic_loss=123498947470.2222 entropy=17.6915 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 97400] reward=-114713176.8 actor_loss=0.3346 critic_loss=115855854250.6667 entropy=17.6824 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 97400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451555.6 mean_steps=14.7
|
|
[Episode 97410] reward=-1487956076.9 actor_loss=53.9505 critic_loss=3078766943272960.0000 entropy=17.6833 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 97420] reward=-10036751730.7 actor_loss=1.2106 critic_loss=64581076337108112.0000 entropy=17.7030 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 97420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-345557.7 mean_steps=16.9
|
|
[Episode 97430] reward=-804260279.1 actor_loss=22.0841 critic_loss=789861921837428.3750 entropy=17.7116 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 97440] reward=-12428487165.1 actor_loss=31.0119 critic_loss=181541715199813216.0000 entropy=17.7180 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 97440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554833.3 mean_steps=12.8
|
|
[Episode 97450] reward=-422582969.7 actor_loss=0.3870 critic_loss=271663287500800.0000 entropy=17.7367 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 97460] reward=-120458126.6 actor_loss=0.3058 critic_loss=124585978902.7556 entropy=17.7315 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 97460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399342.8 mean_steps=15.8
|
|
[Episode 97470] reward=-119093933.7 actor_loss=0.2666 critic_loss=118060789486.9333 entropy=17.7353 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 97480] reward=-119177391.7 actor_loss=0.3720 critic_loss=122921969072.3556 entropy=17.7404 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 97480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-650657.9 mean_steps=12.3
|
|
[Episode 97490] reward=-120765160.5 actor_loss=0.3373 critic_loss=121725222912.0000 entropy=17.7502 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 97500] reward=-114908858.9 actor_loss=0.3102 critic_loss=113703420632.1778 entropy=17.7617 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 97500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-442205434.7 mean_steps=25.6
|
|
[Episode 97510] reward=-3345271442.7 actor_loss=204.8098 critic_loss=15966383512328602.0000 entropy=17.7672 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 97520] reward=-116501067.4 actor_loss=0.4471 critic_loss=117374430048.7111 entropy=17.7706 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 97520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-254589695.7 mean_steps=28.0
|
|
[Episode 97530] reward=-7689544391.3 actor_loss=24.1772 critic_loss=92534310397856976.0000 entropy=17.7744 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 97540] reward=-121751778.9 actor_loss=0.2782 critic_loss=123868469657.6000 entropy=17.7746 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 97540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-525603187.9 mean_steps=33.7
|
|
[Episode 97550] reward=-9992403118.9 actor_loss=72.5581 critic_loss=170435494990053376.0000 entropy=17.7728 approx_kl=0.0007 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 97560] reward=-1094808452.2 actor_loss=0.4986 critic_loss=1371348927593235.7500 entropy=17.7967 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 97560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377675.6 mean_steps=16.2
|
|
[Episode 97570] reward=-2798254534.3 actor_loss=0.3046 critic_loss=16924354791126540.0000 entropy=17.8178 approx_kl=0.0005 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 97580] reward=-6223360933.6 actor_loss=500.5370 critic_loss=62182872664261704.0000 entropy=17.8215 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 97580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-564551.5 mean_steps=12.9
|
|
[Episode 97590] reward=-9563926911.8 actor_loss=42.5608 critic_loss=82849752017890320.0000 entropy=17.8327 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0996 front_blocked=0
|
|
[Episode 97600] reward=-16998757027.3 actor_loss=58.1225 critic_loss=165779444066855584.0000 entropy=17.8352 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 97600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-564255.8 mean_steps=12.7
|
|
[Episode 97610] reward=-27122249016.4 actor_loss=255.6434 critic_loss=542224736481339008.0000 entropy=17.8319 approx_kl=0.0159 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 97620] reward=-950512852.3 actor_loss=0.2625 critic_loss=1712682609928510.5000 entropy=17.8438 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 97620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399546402.4 mean_steps=37.8
|
|
[Episode 97630] reward=-121376499.0 actor_loss=0.3053 critic_loss=124020238654.5778 entropy=17.8557 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 97640] reward=-119901259.7 actor_loss=0.2984 critic_loss=129883933536.7111 entropy=17.8595 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 97640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-388713.6 mean_steps=17.4
|
|
[Episode 97650] reward=-4177414115.0 actor_loss=21.3921 critic_loss=22049015714895188.0000 entropy=17.8604 approx_kl=0.0033 kl_stop=1 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 97660] reward=-3951104694.0 actor_loss=14.7759 critic_loss=22570364794044416.0000 entropy=17.8566 approx_kl=0.0116 kl_stop=1 intervention_rate=0.1152 front_blocked=0
|
|
[Eval 97660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527864.5 mean_steps=14.2
|
|
[Episode 97670] reward=-9003482841.6 actor_loss=55.6129 critic_loss=99149062330122240.0000 entropy=17.8731 approx_kl=0.0188 kl_stop=1 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 97680] reward=-119985321.9 actor_loss=0.2209 critic_loss=123106513897.2444 entropy=17.8866 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 97680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-535230.0 mean_steps=15.4
|
|
[Episode 97690] reward=-119614937.7 actor_loss=0.3136 critic_loss=192932323145.9556 entropy=17.8852 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 97700] reward=-113797439.4 actor_loss=0.3650 critic_loss=124732103975.8222 entropy=17.9017 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 97700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544172.5 mean_steps=13.8
|
|
[Episode 97710] reward=-115980133.6 actor_loss=0.3074 critic_loss=112380258986.6667 entropy=17.9163 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 97720] reward=-6655862974.9 actor_loss=88.9809 critic_loss=28912237357126996.0000 entropy=17.9090 approx_kl=0.0169 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 97720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581652.2 mean_steps=12.7
|
|
[Episode 97730] reward=-4423487660.7 actor_loss=254.4223 critic_loss=12294609958023988.0000 entropy=17.9035 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 97740] reward=-250299718.1 actor_loss=5.3419 critic_loss=59002765023641.6016 entropy=17.9021 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 97740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-258344453.2 mean_steps=26.6
|
|
[Episode 97750] reward=-115437422.4 actor_loss=0.2529 critic_loss=123640359230.5778 entropy=17.9139 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 97760] reward=-12476355879.3 actor_loss=90.0231 critic_loss=145028976727818240.0000 entropy=17.9128 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1029 front_blocked=0
|
|
[Eval 97760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-185069760.3 mean_steps=25.8
|
|
[Episode 97770] reward=-576645282.8 actor_loss=0.9159 critic_loss=456116153963861.3125 entropy=17.9145 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 97780] reward=-6024283231.5 actor_loss=276.2638 critic_loss=33408931034999468.0000 entropy=17.9145 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 97780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-391168.2 mean_steps=15.6
|
|
[Episode 97790] reward=-125567794.5 actor_loss=0.2568 critic_loss=141345852620.8000 entropy=17.9202 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 97800] reward=-4859618926.4 actor_loss=14.7309 critic_loss=17707217616633856.0000 entropy=17.9220 approx_kl=0.0040 kl_stop=1 intervention_rate=0.1016 front_blocked=0
|
|
[Eval 97800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-265433105.1 mean_steps=27.7
|
|
[Episode 97810] reward=-4282277690.0 actor_loss=2.9881 critic_loss=18515031303028008.0000 entropy=17.9348 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1022 front_blocked=0
|
|
[Episode 97820] reward=-120143580.9 actor_loss=0.2356 critic_loss=132913794798.9333 entropy=17.9520 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 97820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445077.2 mean_steps=14.2
|
|
[Episode 97830] reward=-14972876676.8 actor_loss=160.7056 critic_loss=226493884878815232.0000 entropy=17.9523 approx_kl=0.0040 kl_stop=1 intervention_rate=0.1016 front_blocked=0
|
|
[Episode 97840] reward=-3726338545.7 actor_loss=3.3918 critic_loss=25196913836599980.0000 entropy=17.9667 approx_kl=0.0007 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Eval 97840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-615311.3 mean_steps=12.2
|
|
[Episode 97850] reward=-7081131800.7 actor_loss=145.8403 critic_loss=89255745523317472.0000 entropy=17.9651 approx_kl=0.0036 kl_stop=1 intervention_rate=0.1081 front_blocked=0
|
|
[Episode 97860] reward=-1115130663.9 actor_loss=0.3126 critic_loss=2633626811623742.5000 entropy=17.9892 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 97860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-1323239383.0 mean_steps=33.8
|
|
[Episode 97870] reward=-7047980196.1 actor_loss=46.4966 critic_loss=45503507554319600.0000 entropy=17.9880 approx_kl=0.0027 kl_stop=1 intervention_rate=0.1022 front_blocked=0
|
|
[Episode 97880] reward=-124069544.8 actor_loss=0.2004 critic_loss=150023938048.0000 entropy=17.9771 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 97880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-111634591.7 mean_steps=22.8
|
|
[Episode 97890] reward=-8308818528.0 actor_loss=50.1192 critic_loss=46116018628381904.0000 entropy=17.9735 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 97900] reward=-119110467.0 actor_loss=0.3623 critic_loss=122277613158.4000 entropy=17.9806 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 97900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624678443.8 mean_steps=38.1
|
|
[Episode 97910] reward=-5785714131.9 actor_loss=11.5927 critic_loss=26496710947881924.0000 entropy=17.9785 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1016 front_blocked=0
|
|
[Episode 97920] reward=-17250628530.0 actor_loss=122.4959 critic_loss=166560973495904928.0000 entropy=17.9866 approx_kl=0.0590 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 97920] success_rate=0.250 qp_infeasible_rate=0.700 mean_return=-7527739225.0 mean_steps=172.4
|
|
[Episode 97930] reward=-3401014272.6 actor_loss=2.1594 critic_loss=19991209283944448.0000 entropy=18.0046 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1048 front_blocked=0
|
|
[Episode 97940] reward=-534194328.7 actor_loss=19.1322 critic_loss=345543009965893.8125 entropy=18.0090 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 97940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464692.3 mean_steps=15.1
|
|
[Episode 97950] reward=-123954772.7 actor_loss=0.2997 critic_loss=134786777973.6216 entropy=18.0242 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 97960] reward=-125614411.8 actor_loss=0.2123 critic_loss=164028317696.0000 entropy=18.0291 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 97960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619187.2 mean_steps=13.6
|
|
[Episode 97970] reward=-115943031.7 actor_loss=0.2848 critic_loss=127623904733.8667 entropy=18.0170 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 97980] reward=-1072933329.2 actor_loss=0.2470 critic_loss=2210510318576890.2500 entropy=18.0081 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 97980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600176.2 mean_steps=13.1
|
|
[Episode 97990] reward=-3973169015.0 actor_loss=18.4962 critic_loss=29656721333175364.0000 entropy=18.0273 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 98000] reward=-318106633.0 actor_loss=0.2268 critic_loss=130333636587975.1094 entropy=18.0319 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 98000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-540636.9 mean_steps=12.3
|
|
[Episode 98010] reward=-5230372613.2 actor_loss=0.1706 critic_loss=20034614581351764.0000 entropy=18.0384 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 98020] reward=-1586910803.6 actor_loss=0.3071 critic_loss=4599416174793076.0000 entropy=18.0417 approx_kl=0.0028 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 98020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513726.0 mean_steps=13.3
|
|
[Episode 98030] reward=-5285392064.8 actor_loss=0.2515 critic_loss=17652468057899008.0000 entropy=18.0439 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 98040] reward=-12531841075.5 actor_loss=13.3826 critic_loss=97138054990257632.0000 entropy=18.0576 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 98040] success_rate=0.500 qp_infeasible_rate=0.450 mean_return=-5867313054.0 mean_steps=176.2
|
|
[Episode 98050] reward=-120720906.9 actor_loss=0.3560 critic_loss=280757404160.0000 entropy=18.0730 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 98060] reward=-122306911.7 actor_loss=0.2789 critic_loss=137990844689.0667 entropy=18.0437 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 98060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556663.3 mean_steps=14.2
|
|
[Episode 98070] reward=-120090516.8 actor_loss=0.3581 critic_loss=130111714281.2444 entropy=18.0251 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 98080] reward=-113819635.3 actor_loss=0.2991 critic_loss=128423505100.8000 entropy=18.0079 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 98080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545496.2 mean_steps=12.6
|
|
[Episode 98090] reward=-113295640.5 actor_loss=0.3494 critic_loss=116404224364.0889 entropy=18.0237 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 98100] reward=-119170088.5 actor_loss=0.3446 critic_loss=122689244182.7556 entropy=18.0256 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 98100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-674683.8 mean_steps=13.7
|
|
[Episode 98110] reward=-118630708.0 actor_loss=0.3510 critic_loss=126451564179.9111 entropy=17.9833 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 98120] reward=-122649378.6 actor_loss=0.3005 critic_loss=131542451268.2667 entropy=17.9700 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 98120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619775.3 mean_steps=13.2
|
|
[Episode 98130] reward=-120032545.0 actor_loss=0.2765 critic_loss=124260956569.6000 entropy=17.9500 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 98140] reward=-115607683.7 actor_loss=0.2166 critic_loss=117007585917.1555 entropy=17.9290 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 98140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-549695.9 mean_steps=12.4
|
|
[Episode 98150] reward=-120078334.4 actor_loss=0.3322 critic_loss=125161851829.0732 entropy=17.9120 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 98160] reward=-118625205.5 actor_loss=0.2897 critic_loss=135465852563.9111 entropy=17.9081 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 98160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-454718.5 mean_steps=13.8
|
|
[Episode 98170] reward=-118747992.0 actor_loss=0.1930 critic_loss=118130181694.4390 entropy=17.9023 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 98180] reward=-122088705.0 actor_loss=0.2543 critic_loss=120548264364.6512 entropy=17.8968 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 98180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426885.7 mean_steps=15.7
|
|
[Episode 98190] reward=-116851958.1 actor_loss=0.2823 critic_loss=118416805796.9778 entropy=17.8625 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 98200] reward=-115959576.3 actor_loss=0.3060 critic_loss=116430305143.4667 entropy=17.8395 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 98200] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-715486.4 mean_steps=10.9
|
|
[Episode 98210] reward=-121295083.7 actor_loss=0.2389 critic_loss=130318746510.2222 entropy=17.8324 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 98220] reward=-115708584.3 actor_loss=0.2855 critic_loss=116028144753.7778 entropy=17.8045 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 98220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493283.3 mean_steps=15.0
|
|
[Episode 98230] reward=-119830134.7 actor_loss=0.2020 critic_loss=133251465519.4074 entropy=17.7769 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 98240] reward=-115826835.4 actor_loss=0.3382 critic_loss=111207670488.1778 entropy=17.7803 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 98240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-480702.9 mean_steps=15.5
|
|
[Episode 98250] reward=-114343051.2 actor_loss=0.4083 critic_loss=111847203635.2000 entropy=17.7759 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 98260] reward=-113494838.4 actor_loss=0.3523 critic_loss=117229051175.8222 entropy=17.7825 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 98260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518120.6 mean_steps=13.1
|
|
[Episode 98270] reward=-114310787.5 actor_loss=0.3218 critic_loss=112360435907.0476 entropy=17.7867 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 98280] reward=-116235393.5 actor_loss=0.2845 critic_loss=124213911773.4054 entropy=17.7912 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 98280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392757.4 mean_steps=16.1
|
|
[Episode 98290] reward=-119067326.4 actor_loss=0.4288 critic_loss=121357518620.4444 entropy=17.8051 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 98300] reward=-117990816.8 actor_loss=0.3881 critic_loss=118329967684.2667 entropy=17.8161 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 98300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573920.6 mean_steps=13.8
|
|
[Episode 98310] reward=-120216349.9 actor_loss=0.3301 critic_loss=122856451458.8445 entropy=17.8190 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 98320] reward=-120152945.2 actor_loss=0.2742 critic_loss=116661625196.0889 entropy=17.8229 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 98320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401821.3 mean_steps=16.4
|
|
[Episode 98330] reward=-117886873.0 actor_loss=0.2489 critic_loss=124991530598.4000 entropy=17.8202 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 98340] reward=-118551995.6 actor_loss=0.2935 critic_loss=117439590985.1429 entropy=17.8383 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 98340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525492.2 mean_steps=14.0
|
|
[Episode 98350] reward=-116462684.9 actor_loss=0.2013 critic_loss=116624902280.5333 entropy=17.8313 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 98360] reward=-118868018.3 actor_loss=0.3313 critic_loss=126203770950.6207 entropy=17.8338 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 98360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-549779.1 mean_steps=12.7
|
|
[Episode 98370] reward=-122972716.9 actor_loss=0.2127 critic_loss=124228522439.1111 entropy=17.8241 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 98380] reward=-118499825.4 actor_loss=0.3574 critic_loss=118227735165.1555 entropy=17.8223 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 98380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549590.2 mean_steps=13.3
|
|
[Episode 98390] reward=-118832007.5 actor_loss=0.3004 critic_loss=121609928175.4839 entropy=17.8222 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 98400] reward=-115304771.6 actor_loss=0.3857 critic_loss=122694326802.9630 entropy=17.8266 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 98400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461778.3 mean_steps=14.3
|
|
[Episode 98410] reward=-120543004.6 actor_loss=0.2267 critic_loss=125145369258.6667 entropy=17.8137 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 98420] reward=-116180675.3 actor_loss=0.2576 critic_loss=119081675539.6923 entropy=17.8105 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 98420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437245.1 mean_steps=14.9
|
|
[Episode 98430] reward=-121490842.4 actor_loss=0.2802 critic_loss=132391422634.6667 entropy=17.7986 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 98440] reward=-114032828.7 actor_loss=0.2121 critic_loss=114324887365.8182 entropy=17.7952 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 98440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535435.6 mean_steps=13.2
|
|
[Episode 98450] reward=-120059899.2 actor_loss=0.3657 critic_loss=120110212892.4444 entropy=17.7890 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 98460] reward=-117794521.4 actor_loss=0.2966 critic_loss=116506996887.7037 entropy=17.7712 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 98460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476162.4 mean_steps=14.8
|
|
[Episode 98470] reward=-113280249.8 actor_loss=0.2808 critic_loss=110056612411.5349 entropy=17.7799 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 98480] reward=-122329734.3 actor_loss=0.2647 critic_loss=122539140710.4000 entropy=17.7849 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 98480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569953.4 mean_steps=13.6
|
|
[Episode 98490] reward=-119222090.2 actor_loss=0.1867 critic_loss=121531534540.8000 entropy=17.7946 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 98500] reward=-117632463.1 actor_loss=0.3460 critic_loss=120326027077.8182 entropy=17.7991 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 98500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624413.0 mean_steps=12.9
|
|
[Episode 98510] reward=-118996345.1 actor_loss=0.3222 critic_loss=121961444010.6667 entropy=17.7911 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 98520] reward=-119155679.2 actor_loss=0.2422 critic_loss=114250240975.2381 entropy=17.7857 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 98520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559523.5 mean_steps=13.8
|
|
[Episode 98530] reward=-117244725.5 actor_loss=0.2916 critic_loss=116492436716.3077 entropy=17.7784 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 98540] reward=-120982578.2 actor_loss=0.2591 critic_loss=123049302574.5455 entropy=17.7661 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 98540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489026.8 mean_steps=14.1
|
|
[Episode 98550] reward=-116871186.6 actor_loss=0.2806 critic_loss=115297979778.8445 entropy=17.7711 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 98560] reward=-117174193.3 actor_loss=0.3750 critic_loss=118299485798.4000 entropy=17.7770 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 98560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435429.7 mean_steps=14.6
|
|
[Episode 98570] reward=-123160641.5 actor_loss=0.3060 critic_loss=136444058373.6889 entropy=17.7658 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 98580] reward=-116994919.1 actor_loss=0.3037 critic_loss=119935288570.3111 entropy=17.7421 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 98580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413176.4 mean_steps=15.3
|
|
[Episode 98590] reward=-125077400.6 actor_loss=0.2217 critic_loss=129716911900.4444 entropy=17.7160 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 98600] reward=-116593054.8 actor_loss=0.2072 critic_loss=112894997146.7907 entropy=17.6961 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 98600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-538723.4 mean_steps=12.5
|
|
[Episode 98610] reward=-118556286.0 actor_loss=0.2927 critic_loss=118316383042.3704 entropy=17.6708 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 98620] reward=-125099378.1 actor_loss=0.2968 critic_loss=127112157593.6000 entropy=17.6633 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 98620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-370233.7 mean_steps=15.5
|
|
[Episode 98630] reward=-116615178.0 actor_loss=0.2727 critic_loss=110779778298.3111 entropy=17.6514 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 98640] reward=-117517594.7 actor_loss=0.2675 critic_loss=114925778868.1481 entropy=17.6429 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 98640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-346275.4 mean_steps=16.1
|
|
[Episode 98650] reward=-122784714.4 actor_loss=0.2540 critic_loss=123249828717.7143 entropy=17.6473 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 98660] reward=-116310191.0 actor_loss=0.3326 critic_loss=112753464373.8947 entropy=17.6443 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 98660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637191.4 mean_steps=12.3
|
|
[Episode 98670] reward=-118016488.3 actor_loss=0.4079 critic_loss=122689781760.0000 entropy=17.6268 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 98680] reward=-118956507.0 actor_loss=0.3264 critic_loss=124243451904.0000 entropy=17.6272 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 98680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534747.6 mean_steps=14.4
|
|
[Episode 98690] reward=-118420188.1 actor_loss=0.3338 critic_loss=118934122587.0222 entropy=17.6161 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 98700] reward=-116250440.5 actor_loss=0.3901 critic_loss=117774384059.7333 entropy=17.6272 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 98700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517062.8 mean_steps=13.2
|
|
[Episode 98710] reward=-113088242.0 actor_loss=0.3780 critic_loss=111313518006.8571 entropy=17.6347 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 98720] reward=-121255188.7 actor_loss=0.2976 critic_loss=117316630235.4286 entropy=17.6202 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 98720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474371.0 mean_steps=14.0
|
|
[Episode 98730] reward=-113805592.2 actor_loss=0.3879 critic_loss=112252178705.0667 entropy=17.6062 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 98740] reward=-120016874.5 actor_loss=0.1874 critic_loss=112906771169.2800 entropy=17.6004 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 98740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-592395.3 mean_steps=14.8
|
|
[Episode 98750] reward=-122151357.7 actor_loss=0.3066 critic_loss=341589607804.3428 entropy=17.5908 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 98760] reward=-111138762.9 actor_loss=0.2987 critic_loss=106557584335.2381 entropy=17.5991 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 98760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-447753.4 mean_steps=14.1
|
|
[Episode 98770] reward=-122452735.8 actor_loss=0.2778 critic_loss=118445925612.3077 entropy=17.6025 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 98780] reward=-118144380.8 actor_loss=0.4132 critic_loss=113494771173.0526 entropy=17.6065 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 98780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450236.9 mean_steps=14.3
|
|
[Episode 98790] reward=-121242933.9 actor_loss=0.2570 critic_loss=120158863716.1739 entropy=17.6047 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 98800] reward=-117148709.0 actor_loss=0.4166 critic_loss=114013383338.6667 entropy=17.6144 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 98800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554389.9 mean_steps=13.5
|
|
[Episode 98810] reward=-117828200.3 actor_loss=0.3409 critic_loss=114184445041.7778 entropy=17.6160 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 98820] reward=-116343479.1 actor_loss=0.3110 critic_loss=110176021890.8445 entropy=17.6152 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 98820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420802.4 mean_steps=16.5
|
|
[Episode 98830] reward=-110621781.7 actor_loss=0.4507 critic_loss=110545229824.0000 entropy=17.6237 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 98840] reward=-116571669.2 actor_loss=0.3144 critic_loss=114973048467.9111 entropy=17.6240 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 98840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-490460.2 mean_steps=13.2
|
|
[Episode 98850] reward=-121704364.9 actor_loss=0.2851 critic_loss=117534605494.0444 entropy=17.6076 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 98860] reward=-116694019.3 actor_loss=0.3068 critic_loss=108011998412.8000 entropy=17.6120 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 98860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-349045.4 mean_steps=16.0
|
|
[Episode 98870] reward=-121319614.9 actor_loss=0.2384 critic_loss=118743506313.8462 entropy=17.6046 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 98880] reward=-116476914.5 actor_loss=0.2081 critic_loss=108939522685.1555 entropy=17.6123 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 98880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495804.7 mean_steps=14.7
|
|
[Episode 98890] reward=-118474889.9 actor_loss=0.4642 critic_loss=118570479616.0000 entropy=17.6005 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 98900] reward=-116409474.6 actor_loss=0.3334 critic_loss=111359764666.1818 entropy=17.5969 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 98900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-426850.3 mean_steps=16.9
|
|
[Episode 98910] reward=-117412363.5 actor_loss=0.3075 critic_loss=117454988341.8947 entropy=17.6053 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 98920] reward=-114083472.3 actor_loss=0.3601 critic_loss=108143968256.0000 entropy=17.5942 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 98920] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-734618.7 mean_steps=10.8
|
|
[Episode 98930] reward=-112047076.8 actor_loss=0.3076 critic_loss=107132506892.1905 entropy=17.6088 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 98940] reward=-112764506.0 actor_loss=0.3490 critic_loss=105883487254.7556 entropy=17.6022 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 98940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560464.7 mean_steps=12.7
|
|
[Episode 98950] reward=-120666187.9 actor_loss=0.3229 critic_loss=117245754971.8974 entropy=17.5967 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 98960] reward=-120032991.4 actor_loss=0.3251 critic_loss=115928055808.0000 entropy=17.5876 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 98960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577172.0 mean_steps=13.6
|
|
[Episode 98970] reward=-116691134.8 actor_loss=0.2495 critic_loss=112068099913.9556 entropy=17.5895 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 98980] reward=-119741894.4 actor_loss=0.3162 critic_loss=116312391914.0571 entropy=17.5907 approx_kl=0.0114 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 98980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509267.2 mean_steps=13.9
|
|
[Episode 98990] reward=-113722855.8 actor_loss=0.4050 critic_loss=108380300174.2222 entropy=17.5986 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 99000] reward=-120026283.7 actor_loss=0.2737 critic_loss=119375297929.8462 entropy=17.5960 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 99000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-600125.4 mean_steps=11.8
|
|
[Episode 99010] reward=-114453475.8 actor_loss=0.3233 critic_loss=111417749113.9048 entropy=17.5986 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 99020] reward=-118451037.4 actor_loss=0.3120 critic_loss=111634984868.9778 entropy=17.5876 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 99020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548467.3 mean_steps=13.9
|
|
[Episode 99030] reward=-115587367.2 actor_loss=0.3261 critic_loss=109263840870.4000 entropy=17.5928 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 99040] reward=-114613335.7 actor_loss=0.3959 critic_loss=108445726037.3333 entropy=17.5834 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 99040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-608723.0 mean_steps=13.0
|
|
[Episode 99050] reward=-122499301.6 actor_loss=0.3186 critic_loss=116368246556.4444 entropy=17.5574 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 99060] reward=-110270095.8 actor_loss=0.3970 critic_loss=107674472903.1111 entropy=17.5520 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 99060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536058.1 mean_steps=14.2
|
|
[Episode 99070] reward=-117675080.1 actor_loss=0.4185 critic_loss=119014012063.2889 entropy=17.5608 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 99080] reward=-119391178.3 actor_loss=0.2848 critic_loss=111902792453.6889 entropy=17.5399 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 99080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533957.9 mean_steps=13.2
|
|
[Episode 99090] reward=-115567360.6 actor_loss=0.2424 critic_loss=109088975530.6667 entropy=17.5311 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 99100] reward=-116937654.9 actor_loss=0.3406 critic_loss=109711441373.8667 entropy=17.5122 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 99100] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349301.4 mean_steps=17.1
|
|
[Episode 99110] reward=-115011255.6 actor_loss=0.3219 critic_loss=107361333156.9778 entropy=17.5221 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 99120] reward=-118755510.8 actor_loss=0.3905 critic_loss=117058151856.3556 entropy=17.5350 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 99120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469729.5 mean_steps=14.7
|
|
[Episode 99130] reward=-117181677.9 actor_loss=0.3094 critic_loss=111729115578.8108 entropy=17.5036 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 99140] reward=-114963526.4 actor_loss=0.3530 critic_loss=110513992681.2444 entropy=17.5007 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 99140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477121.7 mean_steps=14.3
|
|
[Episode 99150] reward=-115919572.8 actor_loss=0.3136 critic_loss=109739207702.7556 entropy=17.5017 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 99160] reward=-115422288.8 actor_loss=0.2629 critic_loss=111315238183.8222 entropy=17.4842 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 99160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468121.0 mean_steps=13.5
|
|
[Episode 99170] reward=-126831677.2 actor_loss=0.2434 critic_loss=527292284563.9111 entropy=17.4904 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 99180] reward=-2066061872.9 actor_loss=0.7413 critic_loss=9513815678910464.0000 entropy=17.5009 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 99180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465999.1 mean_steps=14.1
|
|
[Episode 99190] reward=-527412506.9 actor_loss=2.9857 critic_loss=477649055589522.3125 entropy=17.4960 approx_kl=0.0040 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 99200] reward=-117976509.4 actor_loss=0.2713 critic_loss=108716700285.1555 entropy=17.5110 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 99200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416608.6 mean_steps=16.5
|
|
[Episode 99210] reward=-113210952.1 actor_loss=0.5304 critic_loss=108553807644.4444 entropy=17.5077 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1523 front_blocked=0
|
|
[Episode 99220] reward=-111950660.1 actor_loss=0.4065 critic_loss=105469496368.7619 entropy=17.5090 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 99220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427779.1 mean_steps=15.3
|
|
[Episode 99230] reward=-110645242.7 actor_loss=0.4250 critic_loss=103093482118.7368 entropy=17.5019 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 99240] reward=-112837978.2 actor_loss=0.3757 critic_loss=108608181589.3333 entropy=17.5025 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 99240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459265.3 mean_steps=15.0
|
|
[Episode 99250] reward=-116546037.8 actor_loss=0.3760 critic_loss=110510033134.1395 entropy=17.5062 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 99260] reward=-487608412.4 actor_loss=0.3532 critic_loss=434359102341120.0000 entropy=17.5180 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 99260] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-726252.4 mean_steps=9.6
|
|
[Episode 99270] reward=-113388177.0 actor_loss=0.3555 critic_loss=112530185459.8095 entropy=17.5150 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 99280] reward=-120209938.4 actor_loss=0.2439 critic_loss=115497617908.6222 entropy=17.4900 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 99280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584534.2 mean_steps=12.8
|
|
[Episode 99290] reward=-120081946.9 actor_loss=0.2966 critic_loss=123007528201.4815 entropy=17.4950 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 99300] reward=-115258354.6 actor_loss=0.3064 critic_loss=108459509714.4889 entropy=17.4756 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 99300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414025.0 mean_steps=15.4
|
|
[Episode 99310] reward=-113177916.7 actor_loss=0.2960 critic_loss=103541533900.8000 entropy=17.4773 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 99320] reward=-7324718072.3 actor_loss=0.7182 critic_loss=67858911795988256.0000 entropy=17.5052 approx_kl=0.0035 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 99320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-401436.6 mean_steps=14.4
|
|
[Episode 99330] reward=-10367038016.6 actor_loss=0.5656 critic_loss=119322339894833968.0000 entropy=17.5076 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 99340] reward=-6874429652.3 actor_loss=9.4561 critic_loss=61495250118159016.0000 entropy=17.5093 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1087 front_blocked=0
|
|
[Eval 99340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-304489.3 mean_steps=16.7
|
|
[Episode 99350] reward=-114873939.9 actor_loss=0.2977 critic_loss=112556062037.3333 entropy=17.5137 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 99360] reward=-115996432.5 actor_loss=0.3199 critic_loss=123085747541.3333 entropy=17.5296 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 99360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543873.5 mean_steps=13.6
|
|
[Episode 99370] reward=-371913020.3 actor_loss=23.3848 critic_loss=202630635040859.0312 entropy=17.5466 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 99380] reward=-4646286093.1 actor_loss=0.2299 critic_loss=44183085776686288.0000 entropy=17.5801 approx_kl=0.0006 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 99380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-617644.0 mean_steps=12.9
|
|
[Episode 99390] reward=-19841261223.1 actor_loss=0.0938 critic_loss=251910368896832864.0000 entropy=17.5837 approx_kl=-0.0033 kl_stop=0 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 99400] reward=-115515025.1 actor_loss=0.2584 critic_loss=121967388829.5385 entropy=17.6000 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 99400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-337571.5 mean_steps=14.9
|
|
[Episode 99410] reward=-13685033715.2 actor_loss=0.1384 critic_loss=120178046463284208.0000 entropy=17.6161 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 99420] reward=-12067759643.4 actor_loss=24.0473 critic_loss=73966093775113792.0000 entropy=17.6356 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 99420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-403506.4 mean_steps=17.4
|
|
[Episode 99430] reward=-118847493.7 actor_loss=0.2758 critic_loss=122944736980.2927 entropy=17.6715 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 99440] reward=-6650652396.9 actor_loss=0.2852 critic_loss=91136722766473168.0000 entropy=17.7020 approx_kl=-0.0007 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 99440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-408400.8 mean_steps=16.6
|
|
[Episode 99450] reward=-4866529036.0 actor_loss=0.2042 critic_loss=32641482489203552.0000 entropy=17.7057 approx_kl=-0.0008 kl_stop=0 intervention_rate=0.1061 front_blocked=0
|
|
[Episode 99460] reward=-4441280324.4 actor_loss=0.2784 critic_loss=42905038228204200.0000 entropy=17.7259 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 99460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-559187.9 mean_steps=14.6
|
|
[Episode 99470] reward=-117812748.6 actor_loss=0.3532 critic_loss=119920961672.5333 entropy=17.7206 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 99480] reward=-2131935137.5 actor_loss=9.7875 critic_loss=9879322868222882.0000 entropy=17.7289 approx_kl=0.0024 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 99480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-502651.4 mean_steps=12.7
|
|
[Episode 99490] reward=-113421951.8 actor_loss=0.3461 critic_loss=112306932212.6222 entropy=17.7364 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 99500] reward=-117459672.9 actor_loss=0.2999 critic_loss=114810453625.9048 entropy=17.7353 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 99500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-440583.2 mean_steps=14.2
|
|
[Episode 99510] reward=-115368194.0 actor_loss=0.2715 critic_loss=121981024326.6207 entropy=17.7396 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 99520] reward=-2790301117.8 actor_loss=0.3944 critic_loss=16860329752845166.0000 entropy=17.7362 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 99520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559291.5 mean_steps=13.6
|
|
[Episode 99530] reward=-115014033.3 actor_loss=0.2662 critic_loss=112071153235.3488 entropy=17.7561 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 99540] reward=-299896544.9 actor_loss=0.3010 critic_loss=109820527917283.5625 entropy=17.7446 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 99540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412411.1 mean_steps=16.1
|
|
[Episode 99550] reward=-115598711.5 actor_loss=0.3483 critic_loss=114933250925.7143 entropy=17.7527 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 99560] reward=-943221286.2 actor_loss=0.1706 critic_loss=1838259239538597.0000 entropy=17.7666 approx_kl=-0.0001 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Eval 99560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-361888.8 mean_steps=15.8
|
|
[Episode 99570] reward=-2149769479.9 actor_loss=0.2856 critic_loss=10159708309343436.0000 entropy=17.7745 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 99580] reward=-3963004744.1 actor_loss=8.5173 critic_loss=27089661031312824.0000 entropy=17.7816 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 99580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-508864.9 mean_steps=15.1
|
|
[Episode 99590] reward=-789730924.2 actor_loss=0.3050 critic_loss=904092954179083.3750 entropy=17.7977 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 99600] reward=-5715244425.7 actor_loss=6.3897 critic_loss=40711939593705880.0000 entropy=17.7965 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1016 front_blocked=0
|
|
[Eval 99600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-257014.2 mean_steps=17.1
|
|
[Episode 99610] reward=-5701294917.3 actor_loss=0.2028 critic_loss=44734912265890472.0000 entropy=17.7945 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1113 front_blocked=0
|
|
[Episode 99620] reward=-135772221.2 actor_loss=0.3999 critic_loss=1272756342962.0869 entropy=17.7970 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 99620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-428670.3 mean_steps=14.8
|
|
[Episode 99630] reward=-116395073.7 actor_loss=0.2967 critic_loss=123434535594.6667 entropy=17.7823 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 99640] reward=-472612577.2 actor_loss=0.4116 critic_loss=391368930020556.8125 entropy=17.7704 approx_kl=0.0018 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 99640] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-75542647.2 mean_steps=18.1
|
|
[Episode 99650] reward=-812535229.4 actor_loss=13.9147 critic_loss=1212457389391872.0000 entropy=17.7853 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 99660] reward=-295787501.2 actor_loss=0.3640 critic_loss=89913539981767.1094 entropy=17.7890 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 99660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494461.1 mean_steps=14.7
|
|
[Episode 99670] reward=-115607553.7 actor_loss=0.3113 critic_loss=124621194035.2000 entropy=17.7940 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 99680] reward=-117134105.4 actor_loss=0.2935 critic_loss=115646985977.4359 entropy=17.7973 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 99680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-662747.8 mean_steps=12.3
|
|
[Episode 99690] reward=-120282565.8 actor_loss=0.3381 critic_loss=121663110237.0909 entropy=17.7854 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 99700] reward=-122190454.5 actor_loss=0.3048 critic_loss=127545683843.8788 entropy=17.7871 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 99700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-508971.5 mean_steps=15.9
|
|
[Episode 99710] reward=-399489696.0 actor_loss=0.2660 critic_loss=199002233620252.4375 entropy=17.7834 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 99720] reward=-124620248.3 actor_loss=0.2431 critic_loss=124136864517.6889 entropy=17.7937 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 99720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563562.4 mean_steps=13.6
|
|
[Episode 99730] reward=-121766327.4 actor_loss=0.3502 critic_loss=558918399650.3414 entropy=17.7997 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 99740] reward=-119467845.0 actor_loss=0.2994 critic_loss=128519759872.0000 entropy=17.7930 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 99740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509606.9 mean_steps=14.9
|
|
[Episode 99750] reward=-114324825.0 actor_loss=0.3430 critic_loss=115138363860.1143 entropy=17.8008 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 99760] reward=-117741860.0 actor_loss=0.2343 critic_loss=111986677467.4286 entropy=17.7969 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 99760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-370970.0 mean_steps=16.0
|
|
[Episode 99770] reward=-116658982.2 actor_loss=0.2643 critic_loss=115072078848.0000 entropy=17.8002 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 99780] reward=-120781908.0 actor_loss=0.2924 critic_loss=114520912630.5185 entropy=17.7961 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 99780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399503.7 mean_steps=16.1
|
|
[Episode 99790] reward=-116081047.4 actor_loss=0.2427 critic_loss=113873963144.5333 entropy=17.7840 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 99800] reward=-577926986.0 actor_loss=0.6968 critic_loss=628753040778661.6250 entropy=17.7831 approx_kl=0.0034 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 99800] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-667025.0 mean_steps=11.1
|
|
[Episode 99810] reward=-117700846.5 actor_loss=0.2568 critic_loss=111911691080.2051 entropy=17.7868 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 99820] reward=-117473181.3 actor_loss=0.2788 critic_loss=115438772224.0000 entropy=17.8091 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 99820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-506717.0 mean_steps=14.7
|
|
[Episode 99830] reward=-119673974.8 actor_loss=0.3572 critic_loss=115790477019.4286 entropy=17.7950 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 99840] reward=-121380963.4 actor_loss=0.2717 critic_loss=121990160570.1818 entropy=17.7855 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 99840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539927.2 mean_steps=13.2
|
|
[Episode 99850] reward=-119814568.2 actor_loss=0.2943 critic_loss=115508122185.1429 entropy=17.7874 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 99860] reward=-380355236.9 actor_loss=0.2491 critic_loss=141589869913702.4062 entropy=17.7923 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 99860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528291.7 mean_steps=12.9
|
|
[Episode 99870] reward=-119566494.2 actor_loss=0.3768 critic_loss=114408806809.6000 entropy=17.7979 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 99880] reward=-120088513.3 actor_loss=0.3791 critic_loss=115809801690.5366 entropy=17.8078 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 99880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-495445.9 mean_steps=12.8
|
|
[Episode 99890] reward=-118850734.7 actor_loss=0.3299 critic_loss=119531686752.7111 entropy=17.7923 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 99900] reward=-115049283.5 actor_loss=0.3568 critic_loss=106896813442.8445 entropy=17.7912 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 99900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-563757.2 mean_steps=14.2
|
|
[Episode 99910] reward=-122903691.6 actor_loss=0.2322 critic_loss=121628343978.6667 entropy=17.7861 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 99920] reward=-125863511.3 actor_loss=0.2771 critic_loss=124791478016.0000 entropy=17.8072 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 99920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437268.2 mean_steps=15.4
|
|
[Episode 99930] reward=-122819736.0 actor_loss=0.2178 critic_loss=118694806993.4545 entropy=17.7877 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 99940] reward=-122394419.9 actor_loss=0.3219 critic_loss=113837422353.8605 entropy=17.7805 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 99940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-651079.6 mean_steps=12.4
|
|
[Episode 99950] reward=-113082860.2 actor_loss=0.4010 critic_loss=109345512243.2000 entropy=17.7806 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 99960] reward=-114964379.0 actor_loss=0.3100 critic_loss=114932542122.6667 entropy=17.7624 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 99960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460274.3 mean_steps=14.8
|
|
[Episode 99970] reward=-119611737.0 actor_loss=0.2201 critic_loss=115831712972.8000 entropy=17.7625 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 99980] reward=-123441829.0 actor_loss=0.2721 critic_loss=244185779040.7111 entropy=17.7453 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 99980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423240.7 mean_steps=15.3
|
|
[Episode 99990] reward=-118108442.0 actor_loss=0.3206 critic_loss=116191669565.7931 entropy=17.7518 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 100000] reward=-681184427.3 actor_loss=1.8267 critic_loss=870007997528655.6250 entropy=17.7431 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 100000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-385861.4 mean_steps=15.1
|
|
[Episode 100010] reward=-125511129.6 actor_loss=0.2233 critic_loss=123077356514.7429 entropy=17.7575 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 100020] reward=-115643463.4 actor_loss=0.2633 critic_loss=110507134076.8781 entropy=17.7557 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 100020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614331.4 mean_steps=12.1
|
|
[Episode 100030] reward=-120975493.9 actor_loss=0.3498 critic_loss=114135436083.2000 entropy=17.7503 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 100040] reward=-125888709.8 actor_loss=0.2446 critic_loss=124206688392.5333 entropy=17.7449 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 100040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-592240.0 mean_steps=12.1
|
|
[Episode 100050] reward=-832975235.3 actor_loss=6.5778 critic_loss=1380642638004224.0000 entropy=17.7591 approx_kl=0.0033 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 100060] reward=-118736974.3 actor_loss=0.2237 critic_loss=116591940169.1429 entropy=17.7553 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 100060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-650051.5 mean_steps=12.3
|
|
[Episode 100070] reward=-112313747.7 actor_loss=0.3773 critic_loss=111245684345.9048 entropy=17.7746 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 100080] reward=-117363375.8 actor_loss=0.3132 critic_loss=118351875094.7556 entropy=17.7797 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 100080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465039.8 mean_steps=14.7
|
|
[Episode 100090] reward=-120433703.1 actor_loss=0.3195 critic_loss=115362894643.2000 entropy=17.7920 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 100100] reward=-116318892.8 actor_loss=0.2790 critic_loss=115521915037.5385 entropy=17.7841 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 100100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534956.6 mean_steps=13.1
|
|
[Episode 100110] reward=-3196099852.2 actor_loss=6.6649 critic_loss=12537155968565248.0000 entropy=17.7723 approx_kl=0.0027 kl_stop=1 intervention_rate=0.1042 front_blocked=0
|
|
[Episode 100120] reward=-120606515.2 actor_loss=0.2762 critic_loss=117096413047.4667 entropy=17.7856 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 100120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-667554.6 mean_steps=13.5
|
|
[Episode 100130] reward=-111743317.5 actor_loss=0.3478 critic_loss=111136360857.6000 entropy=17.7977 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 100140] reward=-117045555.8 actor_loss=0.2882 critic_loss=109009494289.0667 entropy=17.7916 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 100140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462581.4 mean_steps=14.7
|
|
[Episode 100150] reward=-121194721.3 actor_loss=0.2942 critic_loss=110292370636.8000 entropy=17.7794 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 100160] reward=-641429228.7 actor_loss=0.3797 critic_loss=812784280245092.8750 entropy=17.7904 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 100160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-352596.0 mean_steps=16.2
|
|
[Episode 100170] reward=-3133870966.2 actor_loss=2.5168 critic_loss=12835798848433230.0000 entropy=17.8116 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1139 front_blocked=0
|
|
[Episode 100180] reward=-2118629328.0 actor_loss=0.2587 critic_loss=5778867983045973.0000 entropy=17.8207 approx_kl=0.0031 kl_stop=1 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 100180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441482.6 mean_steps=15.0
|
|
[Episode 100190] reward=-119374569.5 actor_loss=0.3102 critic_loss=120179659753.2444 entropy=17.8150 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 100200] reward=-3585023942.9 actor_loss=9.8621 critic_loss=8954994329911296.0000 entropy=17.8191 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1042 front_blocked=0
|
|
[Eval 100200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562599.8 mean_steps=13.8
|
|
[Episode 100210] reward=-4165553762.9 actor_loss=0.2640 critic_loss=21368477818756208.0000 entropy=17.8262 approx_kl=0.0010 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 100220] reward=-927363865.3 actor_loss=0.3465 critic_loss=1503409129419207.0000 entropy=17.8591 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 100220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409036.1 mean_steps=15.8
|
|
[Episode 100230] reward=-255184674.6 actor_loss=0.2931 critic_loss=60541407883719.1094 entropy=17.8650 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 100240] reward=-120900242.5 actor_loss=0.3168 critic_loss=118791429014.0690 entropy=17.8750 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 100240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423327.3 mean_steps=15.5
|
|
[Episode 100250] reward=-117511801.7 actor_loss=0.4012 critic_loss=125422267596.8000 entropy=17.8758 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 100260] reward=-655930260.1 actor_loss=0.9417 critic_loss=782379335358277.8750 entropy=17.8744 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 100260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479030.9 mean_steps=14.7
|
|
[Episode 100270] reward=-123359107.4 actor_loss=0.2372 critic_loss=136147469548.3077 entropy=17.8699 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 100280] reward=-120179219.9 actor_loss=0.3144 critic_loss=127294837108.3636 entropy=17.8831 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 100280] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-371759.4 mean_steps=17.9
|
|
[Episode 100290] reward=-185477251.3 actor_loss=0.2615 critic_loss=15636986045690.3105 entropy=17.8793 approx_kl=0.0009 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 100300] reward=-122787806.0 actor_loss=0.2121 critic_loss=123950765579.3778 entropy=17.8879 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 100300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417294.5 mean_steps=15.6
|
|
[Episode 100310] reward=-119544274.3 actor_loss=0.2379 critic_loss=116913741095.8222 entropy=17.8955 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 100320] reward=-116036912.9 actor_loss=0.3363 critic_loss=119253822122.6667 entropy=17.8872 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 100320] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-670049.5 mean_steps=11.7
|
|
[Episode 100330] reward=-120849909.4 actor_loss=0.2748 critic_loss=121690231967.2889 entropy=17.8698 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 100340] reward=-116704757.1 actor_loss=0.2982 critic_loss=110702679745.4222 entropy=17.8595 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 100340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603553.7 mean_steps=12.7
|
|
[Episode 100350] reward=-119588685.1 actor_loss=0.2819 critic_loss=115803709440.0000 entropy=17.8593 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 100360] reward=-120783719.2 actor_loss=0.3176 critic_loss=118821730596.5714 entropy=17.8546 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 100360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-630563.8 mean_steps=11.9
|
|
[Episode 100370] reward=-117109383.6 actor_loss=0.3817 critic_loss=117425150634.6667 entropy=17.8518 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 100380] reward=-122283498.3 actor_loss=0.2551 critic_loss=114135859980.1905 entropy=17.8496 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 100380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386294.7 mean_steps=15.8
|
|
[Episode 100390] reward=-5176285433.2 actor_loss=0.1907 critic_loss=51253054874064576.0000 entropy=17.8438 approx_kl=-0.0014 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Episode 100400] reward=-6516968229.4 actor_loss=0.3950 critic_loss=62577397765819600.0000 entropy=17.8513 approx_kl=-0.0001 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 100400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-112885528.3 mean_steps=20.9
|
|
[Episode 100410] reward=-2832513830.9 actor_loss=0.2175 critic_loss=12949728096186824.0000 entropy=17.8675 approx_kl=-0.0006 kl_stop=0 intervention_rate=0.1074 front_blocked=0
|
|
[Episode 100420] reward=-11275623963.5 actor_loss=30.3355 critic_loss=105301160313679600.0000 entropy=17.8896 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 100420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-353779.9 mean_steps=15.7
|
|
[Episode 100430] reward=-1879527559.8 actor_loss=40.2596 critic_loss=7645751656548232.0000 entropy=17.8952 approx_kl=0.0136 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 100440] reward=-272503318.6 actor_loss=0.3879 critic_loss=70124784815672.8906 entropy=17.9135 approx_kl=0.0010 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 100440] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-394979.1 mean_steps=17.2
|
|
[Episode 100450] reward=-407023896.4 actor_loss=0.2886 critic_loss=293512165021013.3125 entropy=17.9162 approx_kl=0.0000 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 100460] reward=-351585835.2 actor_loss=0.3226 critic_loss=135459169841971.2031 entropy=17.9192 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 100460] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-316439.7 mean_steps=17.9
|
|
[Episode 100470] reward=-111564170.3 actor_loss=0.4029 critic_loss=119887178410.6667 entropy=17.9041 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 100480] reward=-4438500666.4 actor_loss=28.7639 critic_loss=39619431816821328.0000 entropy=17.8983 approx_kl=0.0005 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 100480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606011.3 mean_steps=13.2
|
|
[Episode 100490] reward=-115342366.5 actor_loss=0.3898 critic_loss=120377908155.7333 entropy=17.8994 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 100500] reward=-115974362.5 actor_loss=0.3099 critic_loss=112527362389.3333 entropy=17.9004 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 100500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-504197.1 mean_steps=13.7
|
|
[Episode 100510] reward=-116892692.8 actor_loss=0.2349 critic_loss=120707450977.5238 entropy=17.9278 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 100520] reward=-125486421.2 actor_loss=0.2521 critic_loss=140888393318.4000 entropy=17.9119 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 100520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471343.3 mean_steps=14.1
|
|
[Episode 100530] reward=-116895929.9 actor_loss=0.3748 critic_loss=116897431552.0000 entropy=17.9086 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 100540] reward=-119496760.7 actor_loss=0.2675 critic_loss=117810408106.6667 entropy=17.9077 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 100540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642729.7 mean_steps=13.0
|
|
[Episode 100550] reward=-3602164309.2 actor_loss=0.2975 critic_loss=26623997991021592.0000 entropy=17.9115 approx_kl=-0.0016 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 100560] reward=-4971075751.9 actor_loss=52.5178 critic_loss=46640161404800208.0000 entropy=17.9254 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 100560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432516.0 mean_steps=15.8
|
|
[Episode 100570] reward=-120769252.0 actor_loss=0.1888 critic_loss=119253437170.5263 entropy=17.9221 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 100580] reward=-2367338104.5 actor_loss=0.3876 critic_loss=6811818310412971.0000 entropy=17.9200 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1139 front_blocked=0
|
|
[Eval 100580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-376636.7 mean_steps=15.2
|
|
[Episode 100590] reward=-120488549.7 actor_loss=0.3375 critic_loss=119382490624.0000 entropy=17.9084 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 100600] reward=-1317542047.1 actor_loss=15.8937 critic_loss=3725379489796681.0000 entropy=17.9304 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 100600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457196.7 mean_steps=15.0
|
|
[Episode 100610] reward=-118027493.3 actor_loss=0.2544 critic_loss=115661297314.3415 entropy=17.9400 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 100620] reward=-120133478.2 actor_loss=0.2860 critic_loss=125802692243.9111 entropy=17.9436 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 100620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493277.5 mean_steps=13.5
|
|
[Episode 100630] reward=-114624095.3 actor_loss=0.3290 critic_loss=116810445172.3636 entropy=17.9488 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 100640] reward=-121120952.4 actor_loss=0.2450 critic_loss=116471524920.8889 entropy=17.9506 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 100640] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-674757.9 mean_steps=11.5
|
|
[Episode 100650] reward=-114451942.5 actor_loss=0.2763 critic_loss=116520982573.5111 entropy=17.9466 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 100660] reward=-6070163360.9 actor_loss=0.2851 critic_loss=75325316062324688.0000 entropy=17.9470 approx_kl=0.0011 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Eval 100660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-465415.8 mean_steps=13.2
|
|
[Episode 100670] reward=-119484385.9 actor_loss=0.2559 critic_loss=117329344193.4222 entropy=17.9267 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 100680] reward=-114610714.2 actor_loss=0.2617 critic_loss=116641849161.9556 entropy=17.9059 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 100680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382484.6 mean_steps=16.2
|
|
[Episode 100690] reward=-114828107.9 actor_loss=0.3287 critic_loss=119683852101.8182 entropy=17.9164 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 100700] reward=-118244299.8 actor_loss=0.3411 critic_loss=130587649489.4545 entropy=17.9102 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 100700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494440.7 mean_steps=14.4
|
|
[Episode 100710] reward=-116141220.1 actor_loss=0.3680 critic_loss=116231736524.8000 entropy=17.9223 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 100720] reward=-6457684910.7 actor_loss=7.1721 critic_loss=74444913336478736.0000 entropy=17.9018 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 100720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524863.7 mean_steps=14.2
|
|
[Episode 100730] reward=-116398216.0 actor_loss=0.3383 critic_loss=122373146112.0000 entropy=17.8976 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 100740] reward=-114390564.6 actor_loss=0.3788 critic_loss=119262202356.6222 entropy=17.8904 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 100740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577704.1 mean_steps=13.6
|
|
[Episode 100750] reward=-121018381.3 actor_loss=0.2325 critic_loss=123105015621.8182 entropy=17.8908 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 100760] reward=-111247313.9 actor_loss=0.3972 critic_loss=117083453912.6154 entropy=17.8905 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 100760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-608975.9 mean_steps=13.2
|
|
[Episode 100770] reward=-3869519010.5 actor_loss=0.3300 critic_loss=26354577078636452.0000 entropy=17.8940 approx_kl=-0.0016 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 100780] reward=-118813735.4 actor_loss=0.4300 critic_loss=116047458918.4000 entropy=17.8992 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 100780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425636.2 mean_steps=15.1
|
|
[Episode 100790] reward=-119460835.0 actor_loss=0.2825 critic_loss=116755681652.3636 entropy=17.8885 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 100800] reward=-122172375.8 actor_loss=0.2268 critic_loss=123445764096.0000 entropy=17.9053 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 100800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556697.7 mean_steps=13.2
|
|
[Episode 100810] reward=-10717269232.4 actor_loss=0.4080 critic_loss=109008469186622992.0000 entropy=17.9145 approx_kl=-0.0005 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Episode 100820] reward=-19161117084.4 actor_loss=1.2924 critic_loss=197117033469211072.0000 entropy=17.9222 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 100820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435752.2 mean_steps=15.7
|
|
[Episode 100830] reward=-23025610618.5 actor_loss=0.0636 critic_loss=241643281263059136.0000 entropy=17.9177 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 100840] reward=-119754052.5 actor_loss=0.2626 critic_loss=121433267521.8286 entropy=17.9325 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 100840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-627987.0 mean_steps=12.9
|
|
[Episode 100850] reward=-124185092.2 actor_loss=0.1746 critic_loss=135703223434.3784 entropy=17.9478 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 100860] reward=-4025003681.2 actor_loss=0.3708 critic_loss=28969112554828960.0000 entropy=17.9476 approx_kl=-0.0028 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 100860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-593170.2 mean_steps=12.8
|
|
[Episode 100870] reward=-51268296095.4 actor_loss=0.0624 critic_loss=962038289576731392.0000 entropy=17.9356 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 100880] reward=-22888346034.7 actor_loss=0.9732 critic_loss=263218994880708608.0000 entropy=17.9434 approx_kl=0.0108 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 100880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582254.3 mean_steps=12.8
|
|
[Episode 100890] reward=-23024742412.1 actor_loss=0.1298 critic_loss=233301757653313120.0000 entropy=17.9408 approx_kl=0.0526 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 100900] reward=-21937645818.0 actor_loss=114.0159 critic_loss=292234794064098112.0000 entropy=17.9600 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 100900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551756.3 mean_steps=13.2
|
|
[Episode 100910] reward=-11617612976.7 actor_loss=3.0631 critic_loss=124851243277702480.0000 entropy=17.9808 approx_kl=0.0007 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Episode 100920] reward=-1138488833.9 actor_loss=0.1639 critic_loss=2567321512143895.0000 entropy=17.9801 approx_kl=0.0003 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Eval 100920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397467.9 mean_steps=15.3
|
|
[Episode 100930] reward=-11048588508.2 actor_loss=9.5451 critic_loss=72328048396992512.0000 entropy=17.9876 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 100940] reward=-17215733299.5 actor_loss=191.8068 critic_loss=152649635985358848.0000 entropy=17.9975 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 100940] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-294650.9 mean_steps=17.4
|
|
[Episode 100950] reward=-8953488086.2 actor_loss=0.2409 critic_loss=157821950090251840.0000 entropy=17.9997 approx_kl=-0.0015 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 100960] reward=-119962766.5 actor_loss=0.3256 critic_loss=120884575232.0000 entropy=17.9976 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 100960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-400432.4 mean_steps=14.4
|
|
[Episode 100970] reward=-123148680.0 actor_loss=0.3950 critic_loss=126862573125.1892 entropy=17.9957 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 100980] reward=-10308177247.5 actor_loss=0.1833 critic_loss=108305744199295520.0000 entropy=17.9996 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Eval 100980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480062.5 mean_steps=15.1
|
|
[Episode 100990] reward=-3296450356.3 actor_loss=0.3617 critic_loss=17212585119079174.0000 entropy=18.0089 approx_kl=-0.0002 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 101000] reward=-4739740912.7 actor_loss=0.2751 critic_loss=45693431737611424.0000 entropy=18.0361 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 101000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462166.6 mean_steps=15.8
|
|
[Episode 101010] reward=-120326932.6 actor_loss=0.3594 critic_loss=125024485376.0000 entropy=18.0388 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 101020] reward=-9821719493.4 actor_loss=0.1570 critic_loss=90789417585248400.0000 entropy=18.0529 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 101020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-334753.2 mean_steps=16.4
|
|
[Episode 101030] reward=-14731205554.7 actor_loss=0.5165 critic_loss=262050090079897888.0000 entropy=18.0423 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1139 front_blocked=0
|
|
[Episode 101040] reward=-35676911623.1 actor_loss=0.1466 critic_loss=551673446157312896.0000 entropy=18.0442 approx_kl=0.0143 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 101040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-412873.1 mean_steps=14.5
|
|
[Episode 101050] reward=-16514355882.0 actor_loss=1.4059 critic_loss=204068441855535776.0000 entropy=18.0504 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1016 front_blocked=0
|
|
[Episode 101060] reward=-8542084609.9 actor_loss=0.2506 critic_loss=104684518646855392.0000 entropy=18.0522 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 101060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-352120.8 mean_steps=16.1
|
|
[Episode 101070] reward=-25963377052.8 actor_loss=2.5009 critic_loss=496821644580659008.0000 entropy=18.0558 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0996 front_blocked=0
|
|
[Episode 101080] reward=-124180916.9 actor_loss=0.3044 critic_loss=152706544071.1111 entropy=18.0603 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 101080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545034.7 mean_steps=14.3
|
|
[Episode 101090] reward=-12695351453.9 actor_loss=3.0963 critic_loss=151850523437418272.0000 entropy=18.0564 approx_kl=0.0244 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 101100] reward=-17590778730.5 actor_loss=16.9229 critic_loss=266423279991112640.0000 entropy=18.0722 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0996 front_blocked=0
|
|
[Eval 101100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482041.8 mean_steps=15.1
|
|
[Episode 101110] reward=-31573625317.9 actor_loss=0.0419 critic_loss=408765277512837824.0000 entropy=18.0755 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Episode 101120] reward=-25062725931.2 actor_loss=1.3217 critic_loss=216452016453777984.0000 entropy=18.0801 approx_kl=0.0070 kl_stop=0 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 101120] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-343088.0 mean_steps=16.7
|
|
[Episode 101130] reward=-21458504102.3 actor_loss=114.7861 critic_loss=366249215670219904.0000 entropy=18.0953 approx_kl=0.0014 kl_stop=0 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 101140] reward=-11349068615.2 actor_loss=0.2216 critic_loss=147948758846406656.0000 entropy=18.1098 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 101140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549328.5 mean_steps=13.3
|
|
[Episode 101150] reward=-43380819935.5 actor_loss=45.7823 critic_loss=508387651729152064.0000 entropy=18.1422 approx_kl=0.0017 kl_stop=0 intervention_rate=0.0326 front_blocked=0
|
|
[Episode 101160] reward=-25550068119.8 actor_loss=0.0978 critic_loss=410088690285243840.0000 entropy=18.1537 approx_kl=0.0181 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 101160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522092.1 mean_steps=13.2
|
|
[Episode 101170] reward=-29920811402.9 actor_loss=3.8097 critic_loss=370208937065813504.0000 entropy=18.1574 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 101180] reward=-12027932857.3 actor_loss=8.1780 critic_loss=134015890827493872.0000 entropy=18.1581 approx_kl=0.0019 kl_stop=1 intervention_rate=0.1042 front_blocked=0
|
|
[Eval 101180] success_rate=0.350 qp_infeasible_rate=0.600 mean_return=-8723592801.4 mean_steps=173.8
|
|
[Episode 101190] reward=-4881440561.6 actor_loss=0.2038 critic_loss=30921819531399076.0000 entropy=18.1793 approx_kl=0.0008 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Episode 101200] reward=-123415515.8 actor_loss=0.1664 critic_loss=133774465069.5111 entropy=18.2262 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 101200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-323837.1 mean_steps=16.8
|
|
[Episode 101210] reward=-11308572683.0 actor_loss=0.2900 critic_loss=110070313569438192.0000 entropy=18.2198 approx_kl=-0.0005 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 101220] reward=-13434640474.0 actor_loss=0.1697 critic_loss=140945318959469904.0000 entropy=18.2168 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 101220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-343669.5 mean_steps=17.0
|
|
[Episode 101230] reward=-120431290.4 actor_loss=0.2236 critic_loss=128247246210.8445 entropy=18.2016 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 101240] reward=-38927139382.2 actor_loss=73.0115 critic_loss=482382790692327360.0000 entropy=18.2009 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0599 front_blocked=0
|
|
[Eval 101240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390975.3 mean_steps=15.7
|
|
[Episode 101250] reward=-11904395289.9 actor_loss=0.2992 critic_loss=126153852455660208.0000 entropy=18.2009 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Episode 101260] reward=-9739405400.7 actor_loss=0.2909 critic_loss=89066630802285824.0000 entropy=18.2285 approx_kl=-0.0001 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 101260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559397.1 mean_steps=13.4
|
|
[Episode 101270] reward=-122829000.2 actor_loss=0.3289 critic_loss=162827801161.1429 entropy=18.2489 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 101280] reward=-4543643055.2 actor_loss=0.1819 critic_loss=24942467470857920.0000 entropy=18.2554 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1055 front_blocked=0
|
|
[Eval 101280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-276021229.1 mean_steps=26.6
|
|
[Episode 101290] reward=-19642004970.6 actor_loss=32.6621 critic_loss=210939480097342688.0000 entropy=18.2613 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 101300] reward=-120508266.2 actor_loss=0.3090 critic_loss=128383447255.5789 entropy=18.2566 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 101300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517732.4 mean_steps=13.4
|
|
[Episode 101310] reward=-1514313436.1 actor_loss=0.2028 critic_loss=4998491241409975.0000 entropy=18.2552 approx_kl=0.0023 kl_stop=1 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 101320] reward=-4257048270.5 actor_loss=0.2987 critic_loss=15853252284182254.0000 entropy=18.2587 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 101320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-481020.5 mean_steps=16.0
|
|
[Episode 101330] reward=-445598312.8 actor_loss=0.2645 critic_loss=341872416321262.9375 entropy=18.2652 approx_kl=-0.0004 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 101340] reward=-119647919.0 actor_loss=0.2827 critic_loss=126307837360.3556 entropy=18.2991 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 101340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504392.6 mean_steps=14.3
|
|
[Episode 101350] reward=-2208113007.3 actor_loss=0.1965 critic_loss=7164663319413100.0000 entropy=18.2945 approx_kl=0.0005 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 101360] reward=-126131419.0 actor_loss=0.3416 critic_loss=136571232256.0000 entropy=18.3063 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 101360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-489661.0 mean_steps=16.2
|
|
[Episode 101370] reward=-127926204.3 actor_loss=0.2164 critic_loss=142893145019.7333 entropy=18.3048 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 101380] reward=-138412227.7 actor_loss=0.3730 critic_loss=1262639043197.1555 entropy=18.2723 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 101380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464067.3 mean_steps=15.0
|
|
[Episode 101390] reward=-125415136.1 actor_loss=0.3381 critic_loss=133107115440.3556 entropy=18.2589 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 101400] reward=-5814769229.2 actor_loss=0.1637 critic_loss=24813244694379452.0000 entropy=18.2617 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 101400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-317496504.2 mean_steps=32.2
|
|
[Episode 101410] reward=-5954215392.1 actor_loss=0.2420 critic_loss=24038907760729564.0000 entropy=18.2720 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 101420] reward=-21911194360.6 actor_loss=0.1038 critic_loss=158693905378976288.0000 entropy=18.2873 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 101420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512980.9 mean_steps=14.6
|
|
[Episode 101430] reward=-8363157362.4 actor_loss=0.2028 critic_loss=39851139432651888.0000 entropy=18.3000 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 101440] reward=-18415143857.5 actor_loss=0.0269 critic_loss=131251831080024656.0000 entropy=18.3113 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 101440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480231.5 mean_steps=14.8
|
|
[Episode 101450] reward=-20245367458.5 actor_loss=2.5746 critic_loss=132918844495633952.0000 entropy=18.3121 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 101460] reward=-23493320661.3 actor_loss=-0.0281 critic_loss=181401975670970048.0000 entropy=18.3175 approx_kl=0.0046 kl_stop=0 intervention_rate=0.0469 front_blocked=0
|
|
[Eval 101460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492728.1 mean_steps=14.2
|
|
[Episode 101470] reward=-6899277789.0 actor_loss=0.4525 critic_loss=41905090660118840.0000 entropy=18.3283 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 101480] reward=-9543867144.7 actor_loss=0.1630 critic_loss=66163709010987512.0000 entropy=18.3359 approx_kl=0.0006 kl_stop=0 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 101480] success_rate=0.400 qp_infeasible_rate=0.550 mean_return=-10484734477.2 mean_steps=174.5
|
|
[Episode 101490] reward=-11049879428.8 actor_loss=1.7291 critic_loss=87316088740792608.0000 entropy=18.3467 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 101500] reward=-5708323109.3 actor_loss=0.1564 critic_loss=33662886128065648.0000 entropy=18.3602 approx_kl=0.0008 kl_stop=0 intervention_rate=0.0983 front_blocked=0
|
|
[Eval 101500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-456115.1 mean_steps=15.9
|
|
[Episode 101510] reward=-126769604.5 actor_loss=0.1809 critic_loss=252367161480.5333 entropy=18.3696 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 101520] reward=-13965823453.1 actor_loss=0.1658 critic_loss=102862191933632608.0000 entropy=18.3551 approx_kl=-0.0002 kl_stop=0 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 101520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528814.2 mean_steps=14.3
|
|
[Episode 101530] reward=-40873513870.5 actor_loss=6.0124 critic_loss=420647493515782848.0000 entropy=18.3616 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Episode 101540] reward=-18167595482.7 actor_loss=4.4172 critic_loss=220824409741268800.0000 entropy=18.3714 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 101540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385497.6 mean_steps=16.4
|
|
[Episode 101550] reward=-42420215719.0 actor_loss=-0.0221 critic_loss=532071859854350720.0000 entropy=18.3769 approx_kl=0.0002 kl_stop=0 intervention_rate=0.0397 front_blocked=0
|
|
[Episode 101560] reward=-39816770827.2 actor_loss=-0.0555 critic_loss=410434447642064000.0000 entropy=18.3813 approx_kl=0.0018 kl_stop=0 intervention_rate=0.0397 front_blocked=0
|
|
[Eval 101560] success_rate=0.300 qp_infeasible_rate=0.650 mean_return=-8588922178.1 mean_steps=173.3
|
|
[Episode 101570] reward=-43236140749.2 actor_loss=-0.1518 critic_loss=437996976112262592.0000 entropy=18.3839 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0150 front_blocked=0
|
|
[Episode 101580] reward=-47486700657.0 actor_loss=-0.1525 critic_loss=499879581886237888.0000 entropy=18.3879 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0098 front_blocked=0
|
|
[Eval 101580] success_rate=0.350 qp_infeasible_rate=0.600 mean_return=-11154423267.7 mean_steps=173.6
|
|
[Episode 101590] reward=-50713368431.8 actor_loss=-0.1645 critic_loss=557288510020113024.0000 entropy=18.3955 approx_kl=0.0060 kl_stop=0 intervention_rate=0.0059 front_blocked=0
|
|
[Episode 101600] reward=-51011052836.5 actor_loss=-0.1225 critic_loss=667149273089616512.0000 entropy=18.4054 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0169 front_blocked=0
|
|
[Eval 101600] success_rate=0.500 qp_infeasible_rate=0.450 mean_return=-8841781451.5 mean_steps=176.2
|
|
[Episode 101610] reward=-55530573236.9 actor_loss=-0.1278 critic_loss=730520396941725184.0000 entropy=18.4163 approx_kl=0.0001 kl_stop=0 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 101620] reward=-26522624804.9 actor_loss=2.1317 critic_loss=287894679217207168.0000 entropy=18.4351 approx_kl=0.0023 kl_stop=0 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 101620] success_rate=0.400 qp_infeasible_rate=0.450 mean_return=-46237766073.3 mean_steps=493.6
|
|
[Episode 101630] reward=-22539331808.2 actor_loss=0.0944 critic_loss=290744946645864704.0000 entropy=18.4546 approx_kl=-0.0015 kl_stop=0 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 101640] reward=-70873192814.8 actor_loss=-0.1869 critic_loss=1155502641749239808.0000 entropy=18.4870 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0000 front_blocked=0
|
|
[Eval 101640] success_rate=0.350 qp_infeasible_rate=0.450 mean_return=-58890765541.9 mean_steps=652.6
|
|
[Episode 101650] reward=-70738183326.1 actor_loss=-0.1835 critic_loss=949980640940485632.0000 entropy=18.4995 approx_kl=0.0049 kl_stop=0 intervention_rate=0.0013 front_blocked=0
|
|
[Episode 101660] reward=-46532261659.0 actor_loss=-0.0054 critic_loss=587830195024210560.0000 entropy=18.5197 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0436 front_blocked=0
|
|
[Eval 101660] success_rate=0.600 qp_infeasible_rate=0.150 mean_return=-59989790661.5 mean_steps=815.8
|
|
[Episode 101670] reward=-57046956958.3 actor_loss=51.8092 critic_loss=793475971063812352.0000 entropy=18.5215 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0221 front_blocked=0
|
|
[Episode 101680] reward=-64518184348.0 actor_loss=-0.1702 critic_loss=854945781231683712.0000 entropy=18.5302 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0078 front_blocked=0
|
|
[Eval 101680] success_rate=0.300 qp_infeasible_rate=0.550 mean_return=-30732116663.7 mean_steps=492.3
|
|
[Episode 101690] reward=-69261912788.2 actor_loss=-0.1735 critic_loss=879422110366171136.0000 entropy=18.5249 approx_kl=0.0016 kl_stop=0 intervention_rate=0.0026 front_blocked=0
|
|
[Episode 101700] reward=-76235142879.3 actor_loss=-0.1786 critic_loss=1378499625244187392.0000 entropy=18.5387 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0072 front_blocked=0
|
|
[Eval 101700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-3218461813.2 mean_steps=59.3
|
|
[Episode 101710] reward=-54416218741.3 actor_loss=-0.0963 critic_loss=768689113884949760.0000 entropy=18.5590 approx_kl=0.0013 kl_stop=0 intervention_rate=0.0267 front_blocked=0
|
|
[Episode 101720] reward=-71890732135.9 actor_loss=-0.1341 critic_loss=1215023520216794112.0000 entropy=18.5720 approx_kl=-0.0020 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Eval 101720] success_rate=0.550 qp_infeasible_rate=0.400 mean_return=-20477115889.7 mean_steps=176.7
|
|
[Episode 101730] reward=-69998915475.9 actor_loss=-0.1687 critic_loss=1068376344695406592.0000 entropy=18.5764 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0065 front_blocked=0
|
|
[Episode 101740] reward=-45137297378.3 actor_loss=-0.1701 critic_loss=456820996191108736.0000 entropy=18.5786 approx_kl=-0.0024 kl_stop=0 intervention_rate=0.0098 front_blocked=0
|
|
[Eval 101740] success_rate=0.300 qp_infeasible_rate=0.600 mean_return=-14034948222.0 mean_steps=332.9
|
|
[Episode 101750] reward=-45790846775.5 actor_loss=-0.1709 critic_loss=495258643752340672.0000 entropy=18.5909 approx_kl=0.0006 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Episode 101760] reward=-47758852693.0 actor_loss=-0.1557 critic_loss=577830413556369152.0000 entropy=18.5912 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0137 front_blocked=0
|
|
[Eval 101760] success_rate=0.450 qp_infeasible_rate=0.400 mean_return=-17172724880.5 mean_steps=494.6
|
|
[Episode 101770] reward=-53542387941.5 actor_loss=18.9878 critic_loss=679035018219466880.0000 entropy=18.5990 approx_kl=0.0011 kl_stop=0 intervention_rate=0.0078 front_blocked=0
|
|
[Episode 101780] reward=-38401473357.7 actor_loss=5.5504 critic_loss=356552244572297600.0000 entropy=18.6115 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0111 front_blocked=0
|
|
[Eval 101780] success_rate=0.500 qp_infeasible_rate=0.250 mean_return=-26425854489.7 mean_steps=814.4
|
|
[Episode 101790] reward=-46579571508.3 actor_loss=-0.1822 critic_loss=461937398489832704.0000 entropy=18.6212 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0052 front_blocked=0
|
|
[Episode 101800] reward=-33588986329.1 actor_loss=0.5270 critic_loss=248262554823813568.0000 entropy=18.6444 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0052 front_blocked=0
|
|
[Eval 101800] success_rate=0.400 qp_infeasible_rate=0.450 mean_return=-14573961688.7 mean_steps=493.7
|
|
[Episode 101810] reward=-47520495435.0 actor_loss=-0.1310 critic_loss=523447833529942016.0000 entropy=18.6555 approx_kl=0.0011 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Episode 101820] reward=-27701243425.3 actor_loss=1.1957 critic_loss=274486473073570624.0000 entropy=18.6665 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 101820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460487.2 mean_steps=15.4
|
|
[Episode 101830] reward=-33629711108.8 actor_loss=-0.0836 critic_loss=336899051946624768.0000 entropy=18.6546 approx_kl=0.0230 kl_stop=1 intervention_rate=0.0352 front_blocked=0
|
|
[Episode 101840] reward=-26660278193.9 actor_loss=-0.0195 critic_loss=185394908268144608.0000 entropy=18.6621 approx_kl=0.0003 kl_stop=0 intervention_rate=0.0182 front_blocked=0
|
|
[Eval 101840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509534.7 mean_steps=15.2
|
|
[Episode 101850] reward=-35760087201.7 actor_loss=-0.1282 critic_loss=310986632731680192.0000 entropy=18.6696 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0254 front_blocked=0
|
|
[Episode 101860] reward=-11669220944.2 actor_loss=0.3429 critic_loss=69138089704206040.0000 entropy=18.6912 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 101860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-667444.2 mean_steps=12.7
|
|
[Episode 101870] reward=-23653929773.9 actor_loss=0.6684 critic_loss=166678065375865152.0000 entropy=18.7206 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0423 front_blocked=0
|
|
[Episode 101880] reward=-34644470642.9 actor_loss=-0.1784 critic_loss=261899674080445888.0000 entropy=18.7357 approx_kl=-0.0010 kl_stop=0 intervention_rate=0.0085 front_blocked=0
|
|
[Eval 101880] success_rate=0.450 qp_infeasible_rate=0.500 mean_return=-3223684533.3 mean_steps=175.1
|
|
[Episode 101890] reward=-18789812086.5 actor_loss=0.0281 critic_loss=169509911976121312.0000 entropy=18.7451 approx_kl=0.0010 kl_stop=0 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 101900] reward=-25169495797.1 actor_loss=-0.1030 critic_loss=162311634405358144.0000 entropy=18.7455 approx_kl=0.0007 kl_stop=0 intervention_rate=0.0273 front_blocked=0
|
|
[Eval 101900] success_rate=0.550 qp_infeasible_rate=0.350 mean_return=-6706083144.8 mean_steps=336.8
|
|
[Episode 101910] reward=-11173626155.3 actor_loss=-0.0221 critic_loss=47738610457667176.0000 entropy=18.7731 approx_kl=-0.0000 kl_stop=0 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 101920] reward=-9142767999.9 actor_loss=0.0200 critic_loss=54279182029306080.0000 entropy=18.7858 approx_kl=0.0013 kl_stop=0 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 101920] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-300784.1 mean_steps=18.1
|
|
[Episode 101930] reward=-10756227851.3 actor_loss=-0.0146 critic_loss=60278530837193248.0000 entropy=18.8033 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0671 front_blocked=0
|
|
[Episode 101940] reward=-1558266159.7 actor_loss=32.3129 critic_loss=3196755056892313.5000 entropy=18.8122 approx_kl=-0.0001 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Eval 101940] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-281007.2 mean_steps=18.0
|
|
[Episode 101950] reward=-2160754488.2 actor_loss=0.2260 critic_loss=9104933072853038.0000 entropy=18.8544 approx_kl=0.0006 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 101960] reward=-1225361189.8 actor_loss=0.2695 critic_loss=2503121432640808.0000 entropy=18.8469 approx_kl=0.0003 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 101960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-345393.4 mean_steps=17.6
|
|
[Episode 101970] reward=-130666304.1 actor_loss=0.1292 critic_loss=153796563945.2444 entropy=18.8247 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 101980] reward=-127462517.4 actor_loss=0.2358 critic_loss=146466920129.4222 entropy=18.7691 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 101980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-334374.6 mean_steps=17.1
|
|
[Episode 101990] reward=-1589773150.7 actor_loss=0.2479 critic_loss=6849663718968525.0000 entropy=18.7571 approx_kl=0.0001 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 102000] reward=-131916042.6 actor_loss=0.2981 critic_loss=165774770904.1778 entropy=18.7459 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 102000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494947.3 mean_steps=15.4
|
|
[Episode 102010] reward=-17432669803.2 actor_loss=0.0051 critic_loss=107428456730364832.0000 entropy=18.7563 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 102020] reward=-6984619476.9 actor_loss=0.1322 critic_loss=35625022848499712.0000 entropy=18.7338 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0990 front_blocked=0
|
|
[Eval 102020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446517.5 mean_steps=15.7
|
|
[Episode 102030] reward=-1547611290.0 actor_loss=0.3156 critic_loss=3465884065745188.5000 entropy=18.7204 approx_kl=0.0034 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 102040] reward=-725002801.8 actor_loss=0.2185 critic_loss=1042795671951769.6250 entropy=18.7298 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 102040] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-400241.8 mean_steps=17.9
|
|
[Episode 102050] reward=-125573609.6 actor_loss=0.2522 critic_loss=151565920574.5778 entropy=18.7019 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 102060] reward=-131863023.9 actor_loss=0.2357 critic_loss=161280379562.6667 entropy=18.6689 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 102060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480311.0 mean_steps=15.0
|
|
[Episode 102070] reward=-128947793.8 actor_loss=0.2805 critic_loss=146463170377.9556 entropy=18.6642 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 102080] reward=-480095884.4 actor_loss=0.3479 critic_loss=292977133637541.0000 entropy=18.6382 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 102080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427350.3 mean_steps=14.3
|
|
[Episode 102090] reward=-130710325.5 actor_loss=0.2321 critic_loss=146495911981.5111 entropy=18.6247 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 102100] reward=-129619969.1 actor_loss=0.2754 critic_loss=146880726812.4445 entropy=18.6140 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 102100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-477411.6 mean_steps=17.5
|
|
[Episode 102110] reward=-131474591.4 actor_loss=0.2334 critic_loss=159591396875.3778 entropy=18.6025 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 102120] reward=-2999365912.1 actor_loss=0.2577 critic_loss=17305149419110036.0000 entropy=18.6179 approx_kl=0.0006 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 102120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584748.3 mean_steps=12.9
|
|
[Episode 102130] reward=-31995813776.4 actor_loss=-0.1372 critic_loss=254082382725603520.0000 entropy=18.6288 approx_kl=0.0044 kl_stop=0 intervention_rate=0.0215 front_blocked=0
|
|
[Episode 102140] reward=-2866525332.5 actor_loss=0.2397 critic_loss=11646993206922444.0000 entropy=18.6476 approx_kl=-0.0006 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 102140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421370.0 mean_steps=16.8
|
|
[Episode 102150] reward=-8476656490.8 actor_loss=0.1063 critic_loss=48827588248653008.0000 entropy=18.6447 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 102160] reward=-129838165.6 actor_loss=0.2121 critic_loss=152169479099.7333 entropy=18.6544 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 102160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-310279.8 mean_steps=17.0
|
|
[Episode 102170] reward=-139241665.2 actor_loss=0.2538 critic_loss=572007245414.4000 entropy=18.6509 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 102180] reward=-20401099573.3 actor_loss=1.3283 critic_loss=162858207743923552.0000 entropy=18.6451 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 102180] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-284548.4 mean_steps=17.9
|
|
[Episode 102190] reward=-1631246644.9 actor_loss=0.2458 critic_loss=5785218154828049.0000 entropy=18.6589 approx_kl=-0.0004 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 102200] reward=-126722259.6 actor_loss=0.3267 critic_loss=139207363606.7556 entropy=18.6791 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 102200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442113.8 mean_steps=15.9
|
|
[Episode 102210] reward=-5948604008.7 actor_loss=0.5231 critic_loss=62385950171262704.0000 entropy=18.6614 approx_kl=-0.0001 kl_stop=0 intervention_rate=0.1074 front_blocked=0
|
|
[Episode 102220] reward=-127210052.8 actor_loss=0.2853 critic_loss=142054712353.0323 entropy=18.6575 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 102220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528917.6 mean_steps=14.6
|
|
[Episode 102230] reward=-1649097142.6 actor_loss=0.2972 critic_loss=6049741116334990.0000 entropy=18.6604 approx_kl=-0.0025 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 102240] reward=-122979278.0 actor_loss=0.2662 critic_loss=135928638668.8000 entropy=18.6768 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 102240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-602989.8 mean_steps=12.1
|
|
[Episode 102250] reward=-128146767.2 actor_loss=0.2505 critic_loss=157566621403.4286 entropy=18.6744 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 102260] reward=-128773729.4 actor_loss=0.2306 critic_loss=155021242459.0222 entropy=18.6669 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 102260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510540.8 mean_steps=15.1
|
|
[Episode 102270] reward=-126664980.7 actor_loss=0.1989 critic_loss=156294766020.4651 entropy=18.6522 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 102280] reward=-128931543.6 actor_loss=0.3291 critic_loss=151368143485.1555 entropy=18.6365 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 102280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-529738.3 mean_steps=15.4
|
|
[Episode 102290] reward=-125196454.3 actor_loss=0.3037 critic_loss=140896561834.6667 entropy=18.6378 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 102300] reward=-120782756.6 actor_loss=0.3157 critic_loss=138151510198.0444 entropy=18.6114 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 102300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519815.3 mean_steps=15.3
|
|
[Episode 102310] reward=-132736026.7 actor_loss=0.3716 critic_loss=359158531646.4390 entropy=18.6018 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 102320] reward=-133872292.8 actor_loss=0.3055 critic_loss=221549482621.1555 entropy=18.5892 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 102320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-488520.3 mean_steps=16.1
|
|
[Episode 102330] reward=-129523865.0 actor_loss=0.2923 critic_loss=147575908670.5778 entropy=18.5840 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 102340] reward=-129508804.3 actor_loss=0.2725 critic_loss=146792817914.3111 entropy=18.5642 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 102340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550469.5 mean_steps=14.7
|
|
[Episode 102350] reward=-131328901.5 actor_loss=0.1951 critic_loss=138040304162.1333 entropy=18.5531 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 102360] reward=-129149069.4 actor_loss=0.3235 critic_loss=135808099896.8889 entropy=18.5708 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 102360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517128.5 mean_steps=15.4
|
|
[Episode 102370] reward=-127415784.5 actor_loss=0.3488 critic_loss=138193303233.4222 entropy=18.5512 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 102380] reward=-3604282154.4 actor_loss=0.2486 critic_loss=22316204562602756.0000 entropy=18.5493 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Eval 102380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-422888.6 mean_steps=13.5
|
|
[Episode 102390] reward=-125864755.7 actor_loss=0.2655 critic_loss=136687220940.8000 entropy=18.5434 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 102400] reward=-133617163.7 actor_loss=0.3311 critic_loss=275187437372.9524 entropy=18.5560 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 102400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489249.6 mean_steps=14.6
|
|
[Episode 102410] reward=-133609316.1 actor_loss=0.3047 critic_loss=211283947304.4211 entropy=18.5616 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 102420] reward=-128153031.3 actor_loss=0.2901 critic_loss=137328098067.6923 entropy=18.5515 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 102420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-579992.5 mean_steps=14.4
|
|
[Episode 102430] reward=-944528618.2 actor_loss=0.6562 critic_loss=1295914395448115.2500 entropy=18.5330 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 102440] reward=-128494861.1 actor_loss=0.2544 critic_loss=134172604461.5111 entropy=18.5103 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 102440] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-274526.3 mean_steps=17.4
|
|
[Episode 102450] reward=-129164038.7 actor_loss=0.2509 critic_loss=138337409768.7273 entropy=18.4945 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 102460] reward=-120949030.0 actor_loss=0.3806 critic_loss=126980531814.4000 entropy=18.4757 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 102460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556952.6 mean_steps=13.6
|
|
[Episode 102470] reward=-128944696.8 actor_loss=0.2341 critic_loss=135213104215.7714 entropy=18.4787 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 102480] reward=-128194876.8 actor_loss=0.1965 critic_loss=137226345793.8286 entropy=18.4713 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 102480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507059.2 mean_steps=13.2
|
|
[Episode 102490] reward=-130732189.4 actor_loss=0.3171 critic_loss=148093052994.0645 entropy=18.4631 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 102500] reward=-121952025.6 actor_loss=0.2949 critic_loss=138209983601.7778 entropy=18.4630 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 102500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525803.0 mean_steps=14.1
|
|
[Episode 102510] reward=-2736615760.3 actor_loss=0.2742 critic_loss=16389476135498184.0000 entropy=18.4699 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 102520] reward=-130077469.0 actor_loss=0.2473 critic_loss=151957982776.8889 entropy=18.4855 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 102520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500220.9 mean_steps=15.2
|
|
[Episode 102530] reward=-1177096888.8 actor_loss=0.2017 critic_loss=1675475114373211.0000 entropy=18.4874 approx_kl=-0.0006 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 102540] reward=-126006405.8 actor_loss=0.2494 critic_loss=133917866141.5385 entropy=18.4893 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 102540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-356480.6 mean_steps=16.9
|
|
[Episode 102550] reward=-121430075.6 actor_loss=0.3396 critic_loss=139052624058.1818 entropy=18.4916 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 102560] reward=-123899476.4 actor_loss=0.3210 critic_loss=130697640991.0303 entropy=18.4941 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 102560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567867.1 mean_steps=12.7
|
|
[Episode 102570] reward=-126148808.1 actor_loss=0.2334 critic_loss=135818427155.6923 entropy=18.4759 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 102580] reward=-128749678.2 actor_loss=0.2067 critic_loss=138430210048.0000 entropy=18.4535 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 102580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567098.6 mean_steps=12.8
|
|
[Episode 102590] reward=-128542221.6 actor_loss=0.2640 critic_loss=134061563721.9556 entropy=18.4315 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 102600] reward=-125024613.9 actor_loss=0.3201 critic_loss=134100481755.4286 entropy=18.4341 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 102600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-616877.8 mean_steps=14.2
|
|
[Episode 102610] reward=-124960985.2 actor_loss=0.2477 critic_loss=134067522218.6667 entropy=18.4159 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 102620] reward=-127544620.5 actor_loss=0.3200 critic_loss=133997210702.7692 entropy=18.3785 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 102620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-647743.4 mean_steps=13.2
|
|
[Episode 102630] reward=-126616739.7 actor_loss=0.2549 critic_loss=139385532274.7586 entropy=18.3560 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 102640] reward=-130341543.7 actor_loss=0.2933 critic_loss=143643093674.6667 entropy=18.3492 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 102640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503486.4 mean_steps=15.0
|
|
[Episode 102650] reward=-123446504.4 actor_loss=0.3724 critic_loss=127831164700.4444 entropy=18.3354 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 102660] reward=-130213023.1 actor_loss=0.2139 critic_loss=142326288875.5200 entropy=18.3237 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 102660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637478.4 mean_steps=12.1
|
|
[Episode 102670] reward=-120330830.6 actor_loss=0.3191 critic_loss=125865789758.5778 entropy=18.3092 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 102680] reward=-126960631.6 actor_loss=0.3039 critic_loss=133825916712.4211 entropy=18.3148 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 102680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437820.1 mean_steps=14.8
|
|
[Episode 102690] reward=-125377614.7 actor_loss=0.2669 critic_loss=132169597838.2222 entropy=18.3007 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 102700] reward=-122135450.1 actor_loss=0.1728 critic_loss=123201762378.9268 entropy=18.2905 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 102700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462754.7 mean_steps=15.8
|
|
[Episode 102710] reward=-122352329.2 actor_loss=0.3475 critic_loss=126107076432.4571 entropy=18.2811 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 102720] reward=-123849484.1 actor_loss=0.3578 critic_loss=140051379541.3333 entropy=18.2659 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 102720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621819.1 mean_steps=11.9
|
|
[Episode 102730] reward=-123375291.0 actor_loss=0.3231 critic_loss=123770700422.7368 entropy=18.2498 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 102740] reward=-131014436.3 actor_loss=0.3891 critic_loss=516139781026.9091 entropy=18.2504 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 102740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-644395.5 mean_steps=13.2
|
|
[Episode 102750] reward=-119071813.2 actor_loss=0.2545 critic_loss=124665962296.1951 entropy=18.2338 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 102760] reward=-125023658.0 actor_loss=0.2097 critic_loss=126610370104.8889 entropy=18.2234 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 102760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430253.5 mean_steps=15.3
|
|
[Episode 102770] reward=-126751468.6 actor_loss=0.1632 critic_loss=131180907283.6923 entropy=18.2186 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 102780] reward=-123040034.5 actor_loss=0.2506 critic_loss=123989355178.6667 entropy=18.2069 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 102780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535050.2 mean_steps=14.4
|
|
[Episode 102790] reward=-125576912.1 actor_loss=0.1545 critic_loss=126416738713.6000 entropy=18.1998 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 102800] reward=-125565679.8 actor_loss=0.2578 critic_loss=124984557999.1579 entropy=18.1909 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 102800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-373925.7 mean_steps=15.3
|
|
[Episode 102810] reward=-127904605.5 actor_loss=0.3580 critic_loss=132165976844.1905 entropy=18.1800 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 102820] reward=-120122123.8 actor_loss=0.2280 critic_loss=125351971498.6667 entropy=18.1630 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 102820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-384818.1 mean_steps=15.2
|
|
[Episode 102830] reward=-119149634.7 actor_loss=0.3407 critic_loss=118599659520.0000 entropy=18.1374 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 102840] reward=-121722453.5 actor_loss=0.2806 critic_loss=125710016876.0889 entropy=18.1426 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 102840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417990.9 mean_steps=15.2
|
|
[Episode 102850] reward=-119303256.5 actor_loss=0.3854 critic_loss=118746230387.6129 entropy=18.1246 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 102860] reward=-117649909.6 actor_loss=0.2327 critic_loss=116326412483.0476 entropy=18.1034 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 102860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547691.0 mean_steps=14.4
|
|
[Episode 102870] reward=-122452208.2 actor_loss=0.2444 critic_loss=126198805925.6471 entropy=18.0972 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 102880] reward=-123509502.4 actor_loss=0.2803 critic_loss=126378151389.8667 entropy=18.0765 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 102880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475931.8 mean_steps=13.9
|
|
[Episode 102890] reward=-122833112.6 actor_loss=0.2775 critic_loss=119682353444.5714 entropy=18.0529 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 102900] reward=-123884083.9 actor_loss=0.3377 critic_loss=121376974524.6316 entropy=18.0289 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 102900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541972.1 mean_steps=14.3
|
|
[Episode 102910] reward=-122470508.5 actor_loss=0.2962 critic_loss=123335798935.7037 entropy=18.0108 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 102920] reward=-121274800.5 actor_loss=0.1705 critic_loss=123439633274.4348 entropy=18.0176 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 102920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454218.5 mean_steps=14.6
|
|
[Episode 102930] reward=-118738375.1 actor_loss=0.2062 critic_loss=121418338619.0769 entropy=17.9988 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 102940] reward=-124367102.2 actor_loss=0.2213 critic_loss=121543081098.3784 entropy=18.0017 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 102940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-568945.7 mean_steps=14.3
|
|
[Episode 102950] reward=-120176135.3 actor_loss=0.2829 critic_loss=122052724736.0000 entropy=18.0001 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 102960] reward=-123732115.7 actor_loss=0.2794 critic_loss=119413839553.4222 entropy=18.0064 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 102960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556118.5 mean_steps=13.2
|
|
[Episode 102970] reward=-128578407.0 actor_loss=0.2924 critic_loss=128296638133.6774 entropy=17.9948 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 102980] reward=-123993052.0 actor_loss=0.2901 critic_loss=123814508305.8605 entropy=17.9729 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 102980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555549.1 mean_steps=12.7
|
|
[Episode 102990] reward=-125501504.8 actor_loss=0.2255 critic_loss=120736825344.0000 entropy=17.9665 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 103000] reward=-122903175.3 actor_loss=0.2893 critic_loss=123874842721.5238 entropy=17.9565 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 103000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513504.8 mean_steps=14.2
|
|
[Episode 103010] reward=-117937123.5 actor_loss=0.3927 critic_loss=115371055891.6923 entropy=17.9447 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 103020] reward=-118512463.4 actor_loss=0.3494 critic_loss=119255884712.2286 entropy=17.9185 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 103020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412248.3 mean_steps=16.2
|
|
[Episode 103030] reward=-123697360.4 actor_loss=0.3363 critic_loss=123566649835.5200 entropy=17.9305 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 103040] reward=-121646031.0 actor_loss=0.3288 critic_loss=120581482819.3684 entropy=17.9262 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 103040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486835.4 mean_steps=14.6
|
|
[Episode 103050] reward=-119382942.7 actor_loss=0.2321 critic_loss=116848872749.1765 entropy=17.9227 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 103060] reward=-127663372.5 actor_loss=0.2978 critic_loss=130668671622.7368 entropy=17.9119 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 103060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452845.8 mean_steps=15.7
|
|
[Episode 103070] reward=-123374009.4 actor_loss=0.2519 critic_loss=123408955659.1304 entropy=17.9123 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 103080] reward=-126958424.2 actor_loss=0.2175 critic_loss=123740659712.0000 entropy=17.9066 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 103080] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-367271.5 mean_steps=17.9
|
|
[Episode 103090] reward=-117469137.4 actor_loss=0.3152 critic_loss=111573346233.3793 entropy=17.9309 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 103100] reward=-121503905.9 actor_loss=0.2789 critic_loss=118437249871.4483 entropy=17.9238 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 103100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448277.3 mean_steps=15.4
|
|
[Episode 103110] reward=-121557763.8 actor_loss=0.3459 critic_loss=119723424367.3044 entropy=17.9159 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 103120] reward=-125500383.9 actor_loss=0.2590 critic_loss=123768488881.2308 entropy=17.9070 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 103120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-618939.8 mean_steps=12.1
|
|
[Episode 103130] reward=-126367820.1 actor_loss=0.2787 critic_loss=124772115524.2667 entropy=17.8917 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 103140] reward=-125381859.2 actor_loss=0.2069 critic_loss=123215023353.0811 entropy=17.8676 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 103140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-395336.7 mean_steps=15.3
|
|
[Episode 103150] reward=-117017369.7 actor_loss=0.3278 critic_loss=110759548791.4667 entropy=17.8475 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 103160] reward=-118416945.8 actor_loss=0.3048 critic_loss=117414731229.8667 entropy=17.8435 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 103160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551880.0 mean_steps=13.2
|
|
[Episode 103170] reward=-115661499.3 actor_loss=0.3781 critic_loss=108772912469.3333 entropy=17.8408 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 103180] reward=-121239749.4 actor_loss=0.2285 critic_loss=120298391096.8889 entropy=17.8422 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 103180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-594408.4 mean_steps=14.7
|
|
[Episode 103190] reward=-118280044.2 actor_loss=0.2657 critic_loss=112848421741.7143 entropy=17.8447 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 103200] reward=-121075834.2 actor_loss=0.2360 critic_loss=113174064878.9333 entropy=17.8469 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 103200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475950.3 mean_steps=14.2
|
|
[Episode 103210] reward=-117853137.9 actor_loss=0.2870 critic_loss=112275109821.9355 entropy=17.8459 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 103220] reward=-116918874.6 actor_loss=0.3188 critic_loss=113235909563.7333 entropy=17.8408 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 103220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607061.4 mean_steps=12.8
|
|
[Episode 103230] reward=-120762268.6 actor_loss=0.3253 critic_loss=119102800253.0233 entropy=17.8314 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 103240] reward=-120186307.1 actor_loss=0.2714 critic_loss=114777652617.8462 entropy=17.8351 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 103240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-302296.6 mean_steps=16.7
|
|
[Episode 103250] reward=-116293044.5 actor_loss=0.2063 critic_loss=111887834368.0000 entropy=17.8282 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 103260] reward=-116069382.7 actor_loss=0.3316 critic_loss=110630691002.1818 entropy=17.8008 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 103260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-282939.5 mean_steps=16.5
|
|
[Episode 103270] reward=-122235447.5 actor_loss=0.2799 critic_loss=121417414842.1818 entropy=17.7957 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 103280] reward=-118877548.0 actor_loss=0.3298 critic_loss=112069637764.7407 entropy=17.7872 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 103280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529281.2 mean_steps=14.6
|
|
[Episode 103290] reward=-116995728.4 actor_loss=0.3299 critic_loss=117492887096.8889 entropy=17.7696 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 103300] reward=-121004918.1 actor_loss=0.3651 critic_loss=113689152090.3529 entropy=17.7552 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 103300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469036.9 mean_steps=14.2
|
|
[Episode 103310] reward=-119215843.6 actor_loss=0.1989 critic_loss=112090473708.3077 entropy=17.7412 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 103320] reward=-116637011.9 actor_loss=0.3635 critic_loss=110974595163.0222 entropy=17.7219 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 103320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525111.1 mean_steps=14.2
|
|
[Episode 103330] reward=-120902874.6 actor_loss=0.2127 critic_loss=119113648225.5238 entropy=17.7123 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 103340] reward=-113660461.2 actor_loss=0.3207 critic_loss=103921361542.7368 entropy=17.7081 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 103340] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-649762.1 mean_steps=10.3
|
|
[Episode 103350] reward=-117374929.2 actor_loss=0.2553 critic_loss=113795280680.4211 entropy=17.7011 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 103360] reward=-118749320.2 actor_loss=0.2669 critic_loss=117287020013.0370 entropy=17.6941 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 103360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477158.1 mean_steps=14.7
|
|
[Episode 103370] reward=-113741131.0 actor_loss=0.2579 critic_loss=104848168158.6087 entropy=17.7051 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 103380] reward=-117968517.6 actor_loss=0.3440 critic_loss=108979029424.3556 entropy=17.6814 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 103380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577889.2 mean_steps=13.3
|
|
[Episode 103390] reward=-121735023.3 actor_loss=0.2386 critic_loss=114411067339.4872 entropy=17.6902 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 103400] reward=-121598507.0 actor_loss=0.3215 critic_loss=119312354690.8445 entropy=17.6688 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 103400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624634.5 mean_steps=13.2
|
|
[Episode 103410] reward=-121396011.1 actor_loss=0.3542 critic_loss=116786806601.9556 entropy=17.6429 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 103420] reward=-114305668.2 actor_loss=0.3373 critic_loss=105092466278.4000 entropy=17.6111 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 103420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462951.6 mean_steps=14.9
|
|
[Episode 103430] reward=-117814108.1 actor_loss=0.3511 critic_loss=111919201848.8889 entropy=17.6144 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 103440] reward=-121529982.7 actor_loss=0.2161 critic_loss=115164448722.4889 entropy=17.6157 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 103440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-570526.4 mean_steps=11.8
|
|
[Episode 103450] reward=-121656710.7 actor_loss=0.3466 critic_loss=111975896769.4222 entropy=17.6184 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 103460] reward=-120798823.9 actor_loss=0.2354 critic_loss=111571781586.4889 entropy=17.6067 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 103460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407460.5 mean_steps=16.4
|
|
[Episode 103470] reward=-117641406.3 actor_loss=0.3223 critic_loss=113335214080.0000 entropy=17.6023 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 103480] reward=-116055471.0 actor_loss=0.3973 critic_loss=109678459835.7333 entropy=17.6001 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 103480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-427260.3 mean_steps=16.4
|
|
[Episode 103490] reward=-122553290.0 actor_loss=0.2644 critic_loss=115505965511.1111 entropy=17.6039 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 103500] reward=-126289047.4 actor_loss=0.1196 critic_loss=114256896248.2424 entropy=17.6290 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 103500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516333.6 mean_steps=14.9
|
|
[Episode 103510] reward=-119106561.9 actor_loss=0.3130 critic_loss=117997127680.0000 entropy=17.6270 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 103520] reward=-114649569.2 actor_loss=0.2395 critic_loss=106216127692.8000 entropy=17.6114 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 103520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437351.8 mean_steps=15.4
|
|
[Episode 103530] reward=-118841374.4 actor_loss=0.3382 critic_loss=119997364087.4667 entropy=17.6090 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 103540] reward=-121513057.8 actor_loss=0.2526 critic_loss=122593441382.4000 entropy=17.6004 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 103540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487373.3 mean_steps=14.8
|
|
[Episode 103550] reward=-118356918.2 actor_loss=0.2511 critic_loss=118529984466.4889 entropy=17.5879 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 103560] reward=-110946772.7 actor_loss=0.3194 critic_loss=105077397185.4222 entropy=17.5955 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 103560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499345.5 mean_steps=14.3
|
|
[Episode 103570] reward=-118195979.4 actor_loss=0.3320 critic_loss=113182732573.7674 entropy=17.6044 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 103580] reward=-118896950.6 actor_loss=0.2357 critic_loss=110653891242.6667 entropy=17.6041 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 103580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-555787.7 mean_steps=14.7
|
|
[Episode 103590] reward=-112983665.2 actor_loss=0.3155 critic_loss=107883790336.0000 entropy=17.5855 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 103600] reward=-118779955.0 actor_loss=0.2959 critic_loss=111667923538.5806 entropy=17.6010 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 103600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542377.1 mean_steps=13.4
|
|
[Episode 103610] reward=-192488508.1 actor_loss=0.3387 critic_loss=18844213354313.9570 entropy=17.5944 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 103620] reward=-117956483.8 actor_loss=0.3184 critic_loss=114520682613.0286 entropy=17.6046 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 103620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545803.9 mean_steps=14.5
|
|
[Episode 103630] reward=-120020066.0 actor_loss=0.2873 critic_loss=134228135298.8445 entropy=17.6146 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 103640] reward=-115366572.2 actor_loss=0.2159 critic_loss=108859405471.2889 entropy=17.6148 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 103640] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632613.9 mean_steps=12.4
|
|
[Episode 103650] reward=-115476396.5 actor_loss=0.3039 critic_loss=109429603623.8222 entropy=17.6057 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 103660] reward=-109840141.0 actor_loss=0.4080 critic_loss=99984689444.5714 entropy=17.6316 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 103660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-497741.7 mean_steps=13.1
|
|
[Episode 103670] reward=-115961916.8 actor_loss=0.3189 critic_loss=110491990071.3513 entropy=17.6148 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 103680] reward=-656575404.9 actor_loss=0.9746 critic_loss=720061553471943.1250 entropy=17.6337 approx_kl=0.0026 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 103680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606400.1 mean_steps=12.9
|
|
[Episode 103690] reward=-123556016.9 actor_loss=0.1896 critic_loss=126913059288.6154 entropy=17.6287 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 103700] reward=-115722985.0 actor_loss=0.2507 critic_loss=108763673653.8947 entropy=17.6424 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 103700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-562224.9 mean_steps=14.7
|
|
[Episode 103710] reward=-113709510.0 actor_loss=0.3463 critic_loss=119535654466.7826 entropy=17.6421 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 103720] reward=-114578309.7 actor_loss=0.2889 critic_loss=118387131857.4545 entropy=17.6608 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 103720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590923.0 mean_steps=12.8
|
|
[Episode 103730] reward=-114498824.5 actor_loss=0.3646 critic_loss=107626710205.6296 entropy=17.6429 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 103740] reward=-113580254.9 actor_loss=0.3251 critic_loss=107997085096.5854 entropy=17.6262 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 103740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-474709.1 mean_steps=12.8
|
|
[Episode 103750] reward=-113516047.1 actor_loss=0.3332 critic_loss=104423056452.2667 entropy=17.6211 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 103760] reward=-116075739.1 actor_loss=0.3826 critic_loss=110505246720.0000 entropy=17.6295 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 103760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545554.4 mean_steps=13.8
|
|
[Episode 103770] reward=-114996426.0 actor_loss=0.3757 critic_loss=103519052218.8108 entropy=17.6165 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 103780] reward=-117567653.5 actor_loss=0.2402 critic_loss=129673793959.7241 entropy=17.6106 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 103780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630202.0 mean_steps=13.0
|
|
[Episode 103790] reward=-115916975.5 actor_loss=0.2954 critic_loss=110625192398.4516 entropy=17.5990 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 103800] reward=-116574896.0 actor_loss=0.3043 critic_loss=107719526466.0645 entropy=17.5944 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 103800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-392060.7 mean_steps=14.4
|
|
[Episode 103810] reward=-118352803.0 actor_loss=0.1956 critic_loss=116242906914.5946 entropy=17.6189 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 103820] reward=-118286137.8 actor_loss=0.2798 critic_loss=139382859422.8965 entropy=17.6113 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 103820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529622.5 mean_steps=14.4
|
|
[Episode 103830] reward=-114050680.0 actor_loss=0.2183 critic_loss=111143472332.8000 entropy=17.6124 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 103840] reward=-115635993.0 actor_loss=0.2527 critic_loss=105559125309.7931 entropy=17.6097 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 103840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-618994.9 mean_steps=13.1
|
|
[Episode 103850] reward=-115965094.6 actor_loss=0.2808 critic_loss=112476317598.4762 entropy=17.6148 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 103860] reward=-119614386.2 actor_loss=0.3021 critic_loss=110893977600.0000 entropy=17.6230 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 103860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-335284.7 mean_steps=17.0
|
|
[Episode 103870] reward=-113661278.4 actor_loss=0.3319 critic_loss=106471065190.4000 entropy=17.6089 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 103880] reward=-111860757.6 actor_loss=0.3230 critic_loss=106332354241.4222 entropy=17.6203 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 103880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517590.5 mean_steps=13.1
|
|
[Episode 103890] reward=-115424486.5 actor_loss=0.2477 critic_loss=114081835690.6667 entropy=17.6197 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 103900] reward=-696529595.1 actor_loss=0.4776 critic_loss=1010060435617655.5000 entropy=17.6084 approx_kl=-0.0023 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 103900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463096.1 mean_steps=14.2
|
|
[Episode 103910] reward=-121747551.7 actor_loss=0.2912 critic_loss=115875549683.5122 entropy=17.6098 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 103920] reward=-115963854.4 actor_loss=0.3003 critic_loss=109172336776.5333 entropy=17.6004 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 103920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589656.6 mean_steps=13.8
|
|
[Episode 103930] reward=-115805514.4 actor_loss=0.3423 critic_loss=106199067058.4242 entropy=17.6038 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 103940] reward=-113769037.6 actor_loss=0.3676 critic_loss=104765215708.6897 entropy=17.6106 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 103940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-602655.5 mean_steps=12.2
|
|
[Episode 103950] reward=-116225594.5 actor_loss=0.3128 critic_loss=109586756812.8000 entropy=17.6047 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 103960] reward=-119234233.4 actor_loss=0.3154 critic_loss=114622512241.7778 entropy=17.6017 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 103960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426042.9 mean_steps=15.8
|
|
[Episode 103970] reward=-114820180.6 actor_loss=0.2868 critic_loss=104050538566.6207 entropy=17.5969 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 103980] reward=-116801557.8 actor_loss=0.2411 critic_loss=106489723352.6154 entropy=17.6053 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 103980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555449.1 mean_steps=13.5
|
|
[Episode 103990] reward=-116074227.7 actor_loss=0.3460 critic_loss=109150832640.0000 entropy=17.5908 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 104000] reward=-116552828.2 actor_loss=0.4109 critic_loss=110138341376.0000 entropy=17.6000 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 104000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-682307.1 mean_steps=11.3
|
|
[Episode 104010] reward=-113306111.1 actor_loss=0.3319 critic_loss=105773451311.6279 entropy=17.6090 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 104020] reward=-122845292.5 actor_loss=0.3071 critic_loss=124636027107.5556 entropy=17.6199 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 104020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-553940.5 mean_steps=12.5
|
|
[Episode 104030] reward=-114883917.3 actor_loss=0.3248 critic_loss=112269661388.8000 entropy=17.6046 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 104040] reward=-120543812.7 actor_loss=0.2741 critic_loss=111853897374.8965 entropy=17.6040 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 104040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-598021.9 mean_steps=14.2
|
|
[Episode 104050] reward=-117587879.4 actor_loss=0.3656 critic_loss=109746128310.8571 entropy=17.6029 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 104060] reward=-114963309.9 actor_loss=0.3192 critic_loss=109819402831.6444 entropy=17.6127 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 104060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-373228.5 mean_steps=15.2
|
|
[Episode 104070] reward=-116604004.0 actor_loss=0.2392 critic_loss=112875639528.7273 entropy=17.6134 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 104080] reward=-116158761.3 actor_loss=0.2700 critic_loss=111145098873.9048 entropy=17.6233 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 104080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-724164.3 mean_steps=12.4
|
|
[Episode 104090] reward=-113761137.3 actor_loss=0.3665 critic_loss=112228149384.5333 entropy=17.6286 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 104100] reward=-117069591.1 actor_loss=0.2336 critic_loss=112207259602.4889 entropy=17.6133 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 104100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427863.3 mean_steps=15.8
|
|
[Episode 104110] reward=-120850374.4 actor_loss=0.3023 critic_loss=116040643523.7647 entropy=17.6174 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 104120] reward=-114035070.3 actor_loss=0.3568 critic_loss=105822611228.4444 entropy=17.6134 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 104120] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-312477.0 mean_steps=16.6
|
|
[Episode 104130] reward=-119293075.2 actor_loss=0.3692 critic_loss=212542741897.8462 entropy=17.6141 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 104140] reward=-117043459.4 actor_loss=0.2623 critic_loss=122619172756.2105 entropy=17.5959 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 104140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541651.7 mean_steps=13.4
|
|
[Episode 104150] reward=-117528027.8 actor_loss=0.2944 critic_loss=110816168813.7143 entropy=17.6212 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 104160] reward=-192115774.9 actor_loss=0.3061 critic_loss=20279636472627.1992 entropy=17.6211 approx_kl=0.0037 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 104160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-343088.4 mean_steps=16.9
|
|
[Episode 104170] reward=-111242062.1 actor_loss=0.3273 critic_loss=105215105942.0690 entropy=17.6271 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 104180] reward=-113999576.7 actor_loss=0.2993 critic_loss=102242967055.5152 entropy=17.6366 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 104180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505206.1 mean_steps=14.3
|
|
[Episode 104190] reward=-117799211.5 actor_loss=0.3378 critic_loss=116565767719.3846 entropy=17.6384 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 104200] reward=-115371337.0 actor_loss=0.3757 critic_loss=109277895959.2727 entropy=17.6355 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 104200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557501.6 mean_steps=12.8
|
|
[Episode 104210] reward=-119694537.4 actor_loss=0.2249 critic_loss=113340612022.8571 entropy=17.6341 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 104220] reward=-124735886.1 actor_loss=0.3148 critic_loss=411468073761.3913 entropy=17.6373 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 104220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-426309.4 mean_steps=16.6
|
|
[Episode 104230] reward=-123469387.9 actor_loss=0.2184 critic_loss=120681535897.6000 entropy=17.6429 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 104240] reward=-116757792.3 actor_loss=0.2442 critic_loss=108259597425.7778 entropy=17.6425 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 104240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458292.7 mean_steps=14.8
|
|
[Episode 104250] reward=-115524584.1 actor_loss=0.4833 critic_loss=108643095062.2609 entropy=17.6401 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 104260] reward=-120750579.4 actor_loss=0.3333 critic_loss=111957566610.2857 entropy=17.6340 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 104260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487179.7 mean_steps=13.8
|
|
[Episode 104270] reward=-117089272.3 actor_loss=0.2272 critic_loss=110948492462.8293 entropy=17.6357 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 104280] reward=-120920677.2 actor_loss=0.3218 critic_loss=113249876038.6207 entropy=17.6379 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 104280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549371.1 mean_steps=13.8
|
|
[Episode 104290] reward=-117333523.1 actor_loss=0.3212 critic_loss=109898064671.2195 entropy=17.6277 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 104300] reward=-111503134.2 actor_loss=0.3258 critic_loss=104639055313.4545 entropy=17.6207 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 104300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454167.1 mean_steps=15.0
|
|
[Episode 104310] reward=-117904819.0 actor_loss=0.2828 critic_loss=115540018790.4000 entropy=17.6231 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 104320] reward=-117960881.0 actor_loss=0.3268 critic_loss=104072731111.6190 entropy=17.6257 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 104320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626594.4 mean_steps=12.7
|
|
[Episode 104330] reward=-115976435.6 actor_loss=0.3567 critic_loss=105099189030.7879 entropy=17.6141 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 104340] reward=-112660041.7 actor_loss=0.3961 critic_loss=104387177312.7111 entropy=17.6030 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 104340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504719.3 mean_steps=14.3
|
|
[Episode 104350] reward=-116971162.9 actor_loss=0.2054 critic_loss=107830652836.9778 entropy=17.6010 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 104360] reward=-114951525.0 actor_loss=0.2651 critic_loss=107937754368.0000 entropy=17.6134 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 104360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-443452.3 mean_steps=16.5
|
|
[Episode 104370] reward=-116884495.0 actor_loss=0.2837 critic_loss=112626381619.2000 entropy=17.6197 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 104380] reward=-118581864.7 actor_loss=0.2920 critic_loss=110077602474.6667 entropy=17.5995 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 104380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548740.0 mean_steps=12.7
|
|
[Episode 104390] reward=-118162798.7 actor_loss=0.2661 critic_loss=115995544675.0968 entropy=17.6025 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 104400] reward=-116062335.4 actor_loss=0.2370 critic_loss=109215760068.9231 entropy=17.6115 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 104400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437896.0 mean_steps=14.6
|
|
[Episode 104410] reward=-116940092.0 actor_loss=0.3559 critic_loss=113429276672.0000 entropy=17.6074 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 104420] reward=-117887850.9 actor_loss=0.3082 critic_loss=112852181975.0400 entropy=17.6196 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 104420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560874.8 mean_steps=12.7
|
|
[Episode 104430] reward=-111569093.3 actor_loss=0.3251 critic_loss=108419428219.8710 entropy=17.6101 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 104440] reward=-233173345.9 actor_loss=0.4266 critic_loss=53817603351596.5234 entropy=17.6188 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 104440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413970.4 mean_steps=15.3
|
|
[Episode 104450] reward=-111479707.3 actor_loss=0.3807 critic_loss=105908088721.2973 entropy=17.6100 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 104460] reward=-110246226.5 actor_loss=0.3569 critic_loss=107017755215.6444 entropy=17.6086 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 104460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499204.5 mean_steps=14.1
|
|
[Episode 104470] reward=-118941152.1 actor_loss=0.2076 critic_loss=112297229789.8667 entropy=17.6052 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 104480] reward=-211157575.3 actor_loss=0.2058 critic_loss=21044021285236.3633 entropy=17.6106 approx_kl=0.0038 kl_stop=1 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 104480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452832.6 mean_steps=15.3
|
|
[Episode 104490] reward=-114197330.3 actor_loss=0.3101 critic_loss=110662160042.6667 entropy=17.6005 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 104500] reward=-125289458.8 actor_loss=0.2754 critic_loss=1270032353052.4443 entropy=17.6024 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 104500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367098.3 mean_steps=16.1
|
|
[Episode 104510] reward=-115936138.4 actor_loss=0.2189 critic_loss=119073372346.1818 entropy=17.5905 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 104520] reward=-118341434.5 actor_loss=0.4463 critic_loss=116739921547.6364 entropy=17.5951 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 104520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581263.5 mean_steps=12.9
|
|
[Episode 104530] reward=-129482659.9 actor_loss=0.3606 critic_loss=1491156820150.0444 entropy=17.5910 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 104540] reward=-118591341.6 actor_loss=0.2415 critic_loss=114125175193.6000 entropy=17.5970 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 104540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462234.6 mean_steps=13.7
|
|
[Episode 104550] reward=-109643985.8 actor_loss=0.3928 critic_loss=107168938393.6000 entropy=17.6026 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 104560] reward=-129544455.4 actor_loss=0.2690 critic_loss=1276621475524.9231 entropy=17.5908 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 104560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-632339.0 mean_steps=12.7
|
|
[Episode 104570] reward=-119043682.1 actor_loss=0.1924 critic_loss=111591072524.1905 entropy=17.5958 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 104580] reward=-107450885.3 actor_loss=0.4027 critic_loss=97663674504.5333 entropy=17.5928 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 104580] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-733920.0 mean_steps=11.6
|
|
[Episode 104590] reward=-116306810.4 actor_loss=0.3612 critic_loss=108792089114.9474 entropy=17.6023 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 104600] reward=-114147068.7 actor_loss=0.3765 critic_loss=103388819547.0222 entropy=17.6167 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 104600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-562045.3 mean_steps=14.3
|
|
[Episode 104610] reward=-112616832.3 actor_loss=0.3674 critic_loss=111476406521.7561 entropy=17.6056 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 104620] reward=-117733110.1 actor_loss=0.2713 critic_loss=107107133940.6222 entropy=17.5924 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 104620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604120.9 mean_steps=12.5
|
|
[Episode 104630] reward=-114157821.2 actor_loss=0.2915 critic_loss=106130340067.5556 entropy=17.5986 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 104640] reward=-117655038.5 actor_loss=0.3131 critic_loss=109097041538.9767 entropy=17.6208 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 104640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-566576.3 mean_steps=14.3
|
|
[Episode 104650] reward=-112069024.0 actor_loss=0.2524 critic_loss=107180999098.8108 entropy=17.6081 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 104660] reward=-113321517.5 actor_loss=0.2746 critic_loss=107596169431.5789 entropy=17.6196 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 104660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506861.9 mean_steps=14.1
|
|
[Episode 104670] reward=-117136467.8 actor_loss=0.3024 critic_loss=118798646649.2632 entropy=17.6084 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 104680] reward=-116238856.4 actor_loss=0.2985 critic_loss=109522484906.6667 entropy=17.6114 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 104680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-666430.1 mean_steps=12.2
|
|
[Episode 104690] reward=-117566558.1 actor_loss=0.2172 critic_loss=105777311948.8000 entropy=17.6086 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 104700] reward=-112837373.3 actor_loss=0.2815 critic_loss=108379803296.9143 entropy=17.6098 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 104700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435497.6 mean_steps=14.7
|
|
[Episode 104710] reward=-112431046.6 actor_loss=0.2478 critic_loss=109384711463.8222 entropy=17.5994 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 104720] reward=-117131080.2 actor_loss=0.1777 critic_loss=106637857914.8800 entropy=17.5999 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 104720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-648907.8 mean_steps=12.3
|
|
[Episode 104730] reward=-115270096.3 actor_loss=0.2449 critic_loss=109155354760.5333 entropy=17.5782 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 104740] reward=-117763414.9 actor_loss=0.3115 critic_loss=110311539825.7778 entropy=17.5625 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 104740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500845.9 mean_steps=13.1
|
|
[Episode 104750] reward=-114557067.1 actor_loss=0.2447 critic_loss=113716070809.6000 entropy=17.5666 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 104760] reward=-119482431.1 actor_loss=0.3074 critic_loss=117347776609.5238 entropy=17.5554 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 104760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549819.3 mean_steps=13.2
|
|
[Episode 104770] reward=-109502192.4 actor_loss=0.2618 critic_loss=105695818496.0000 entropy=17.5638 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 104780] reward=-123734900.6 actor_loss=0.2042 critic_loss=122831078617.2121 entropy=17.5681 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 104780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467449.4 mean_steps=14.8
|
|
[Episode 104790] reward=-121424564.7 actor_loss=0.2302 critic_loss=118922035882.6667 entropy=17.5685 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 104800] reward=-115204149.7 actor_loss=0.1896 critic_loss=107713622562.1333 entropy=17.5584 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 104800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590288.5 mean_steps=12.7
|
|
[Episode 104810] reward=-116684613.5 actor_loss=0.3648 critic_loss=108877286306.9091 entropy=17.5738 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 104820] reward=-113491895.1 actor_loss=0.3114 critic_loss=109824775633.4545 entropy=17.6065 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 104820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390869.4 mean_steps=14.9
|
|
[Episode 104830] reward=-117152330.4 actor_loss=0.2420 critic_loss=119758399406.0800 entropy=17.6221 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 104840] reward=-118764575.3 actor_loss=0.3020 critic_loss=119843046286.2222 entropy=17.6460 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 104840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-584814.4 mean_steps=11.8
|
|
[Episode 104850] reward=-159872358.3 actor_loss=0.3278 critic_loss=10343148992011.3770 entropy=17.6312 approx_kl=-0.0010 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 104860] reward=-116029312.7 actor_loss=0.2611 critic_loss=111790136797.8667 entropy=17.6561 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 104860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498296.7 mean_steps=14.2
|
|
[Episode 104870] reward=-114278659.2 actor_loss=0.2845 critic_loss=108245706524.4444 entropy=17.6674 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 104880] reward=-113569513.2 actor_loss=0.2774 critic_loss=111751012562.0513 entropy=17.6653 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 104880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444559.1 mean_steps=14.7
|
|
[Episode 104890] reward=-115904546.7 actor_loss=0.2260 critic_loss=114022090174.3590 entropy=17.6714 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 104900] reward=-113709324.4 actor_loss=0.3993 critic_loss=115224251733.3333 entropy=17.6477 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 104900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-567779.8 mean_steps=14.2
|
|
[Episode 104910] reward=-112836349.1 actor_loss=0.4138 critic_loss=106848634197.3333 entropy=17.6448 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 104920] reward=-117049231.9 actor_loss=0.2497 critic_loss=107920385274.3111 entropy=17.6537 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 104920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-653862.1 mean_steps=12.2
|
|
[Episode 104930] reward=-114941495.2 actor_loss=0.3659 critic_loss=115388336264.5333 entropy=17.6419 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 104940] reward=-113147038.8 actor_loss=0.2983 critic_loss=101196437003.3778 entropy=17.6385 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 104940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443810.6 mean_steps=15.4
|
|
[Episode 104950] reward=-113190344.3 actor_loss=0.3968 critic_loss=103944027977.9556 entropy=17.6433 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 104960] reward=-115419545.2 actor_loss=0.3442 critic_loss=110208288491.2432 entropy=17.6407 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 104960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-406929.6 mean_steps=16.9
|
|
[Episode 104970] reward=-110926167.9 actor_loss=0.3783 critic_loss=103324117284.5714 entropy=17.6460 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 104980] reward=-118003869.2 actor_loss=0.4079 critic_loss=108264530448.5161 entropy=17.6423 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 104980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551011.8 mean_steps=13.4
|
|
[Episode 104990] reward=-113200616.5 actor_loss=0.3019 critic_loss=106726523972.2667 entropy=17.6415 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 105000] reward=-115229889.0 actor_loss=0.3293 critic_loss=107228109790.9677 entropy=17.6145 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 105000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523945.0 mean_steps=13.6
|
|
[Episode 105010] reward=-114832683.9 actor_loss=0.2934 critic_loss=105661854105.6000 entropy=17.5824 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 105020] reward=-115559366.0 actor_loss=0.2475 critic_loss=108448106632.5333 entropy=17.5878 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 105020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607728.4 mean_steps=12.7
|
|
[Episode 105030] reward=-118905663.7 actor_loss=0.2624 critic_loss=110159175680.0000 entropy=17.5803 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 105040] reward=-111315954.7 actor_loss=0.3167 critic_loss=113448985122.1333 entropy=17.5859 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 105040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480067.7 mean_steps=13.7
|
|
[Episode 105050] reward=-116349312.1 actor_loss=0.3598 critic_loss=104944549888.0000 entropy=17.5959 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 105060] reward=-117665707.1 actor_loss=0.2969 critic_loss=109544778752.0000 entropy=17.6143 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 105060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425005.5 mean_steps=15.4
|
|
[Episode 105070] reward=-112012778.3 actor_loss=0.3576 critic_loss=103859481622.7556 entropy=17.6208 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 105080] reward=-114101525.9 actor_loss=0.3373 critic_loss=109484465744.8421 entropy=17.6319 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 105080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485370.7 mean_steps=13.7
|
|
[Episode 105090] reward=-109835644.2 actor_loss=0.3449 critic_loss=106117232867.5556 entropy=17.6333 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 105100] reward=-116342960.5 actor_loss=0.2882 critic_loss=108753388588.5217 entropy=17.6255 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 105100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-472796.2 mean_steps=13.1
|
|
[Episode 105110] reward=-114708592.7 actor_loss=0.2679 critic_loss=109527384808.7273 entropy=17.6338 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 105120] reward=-112038769.5 actor_loss=0.3459 critic_loss=104012384200.6487 entropy=17.6248 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 105120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491432.3 mean_steps=14.9
|
|
[Episode 105130] reward=-116017579.2 actor_loss=0.2878 critic_loss=109305467825.2308 entropy=17.6296 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 105140] reward=-115824537.7 actor_loss=0.2497 critic_loss=107295427242.6667 entropy=17.6480 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 105140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477688.4 mean_steps=14.0
|
|
[Episode 105150] reward=-118194021.0 actor_loss=0.3757 critic_loss=111445947970.7826 entropy=17.6611 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 105160] reward=-117998924.8 actor_loss=0.2200 critic_loss=109920005558.8571 entropy=17.6509 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 105160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620518.7 mean_steps=11.8
|
|
[Episode 105170] reward=-109070948.0 actor_loss=0.3258 critic_loss=99421885781.3333 entropy=17.6642 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 105180] reward=-114820284.4 actor_loss=0.3697 critic_loss=105288807174.9189 entropy=17.6558 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 105180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-573419.7 mean_steps=15.3
|
|
[Episode 105190] reward=-115036688.9 actor_loss=0.3329 critic_loss=105538585804.8000 entropy=17.6526 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 105200] reward=-115521206.8 actor_loss=0.3458 critic_loss=105687857561.6000 entropy=17.6481 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 105200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529342.0 mean_steps=13.3
|
|
[Episode 105210] reward=-116687715.8 actor_loss=0.2841 critic_loss=177103199625.8462 entropy=17.6353 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 105220] reward=-111500680.4 actor_loss=0.3127 critic_loss=119544805255.5294 entropy=17.6229 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 105220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474373.7 mean_steps=14.9
|
|
[Episode 105230] reward=-119187378.4 actor_loss=0.1997 critic_loss=113355242216.7273 entropy=17.6309 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 105240] reward=-117147135.0 actor_loss=0.2588 critic_loss=108847517828.1290 entropy=17.6306 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 105240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-614700.0 mean_steps=14.0
|
|
[Episode 105250] reward=-114847345.5 actor_loss=0.3596 critic_loss=124769042022.4000 entropy=17.6340 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 105260] reward=-116613718.1 actor_loss=0.2740 critic_loss=106117669956.2667 entropy=17.6299 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 105260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537450.7 mean_steps=13.9
|
|
[Episode 105270] reward=-117052825.2 actor_loss=0.2796 critic_loss=105191671125.3333 entropy=17.6383 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 105280] reward=-120028750.4 actor_loss=0.3424 critic_loss=107026148374.7556 entropy=17.6289 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 105280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549497.9 mean_steps=13.3
|
|
[Episode 105290] reward=-112596873.6 actor_loss=0.3173 critic_loss=101897538290.5263 entropy=17.6157 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 105300] reward=-111851786.9 actor_loss=0.3214 critic_loss=115159908352.0000 entropy=17.6242 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 105300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-582238.1 mean_steps=13.4
|
|
[Episode 105310] reward=-113521186.8 actor_loss=0.3544 critic_loss=108150305951.2889 entropy=17.6231 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 105320] reward=-117872568.8 actor_loss=0.2983 critic_loss=111948844782.9333 entropy=17.6162 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 105320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510598.3 mean_steps=13.9
|
|
[Episode 105330] reward=-117090599.3 actor_loss=0.2948 critic_loss=116219834641.0667 entropy=17.6145 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 105340] reward=-112132593.2 actor_loss=0.2900 critic_loss=101205734377.2444 entropy=17.5823 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 105340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-581861.5 mean_steps=14.6
|
|
[Episode 105350] reward=-114028111.7 actor_loss=0.2779 critic_loss=104420644363.3778 entropy=17.5813 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 105360] reward=-115316683.7 actor_loss=0.2435 critic_loss=107341932544.0000 entropy=17.5745 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 105360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538092.2 mean_steps=13.6
|
|
[Episode 105370] reward=-114947922.1 actor_loss=0.2731 critic_loss=109754742676.2105 entropy=17.5732 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 105380] reward=-112490030.6 actor_loss=0.3114 critic_loss=108149024722.4889 entropy=17.5609 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 105380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567630.9 mean_steps=13.0
|
|
[Episode 105390] reward=-117838274.9 actor_loss=0.2383 critic_loss=107217623160.4706 entropy=17.5571 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 105400] reward=-120395564.6 actor_loss=0.1692 critic_loss=249762529826.1333 entropy=17.5770 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 105400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-594365.4 mean_steps=14.7
|
|
[Episode 105410] reward=-109640143.9 actor_loss=0.2437 critic_loss=102488406086.6207 entropy=17.5822 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 105420] reward=-115065728.2 actor_loss=0.2944 critic_loss=105174659276.8000 entropy=17.5797 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 105420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-680025.8 mean_steps=12.2
|
|
[Episode 105430] reward=-109010451.7 actor_loss=0.2937 critic_loss=102313459968.0000 entropy=17.5775 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 105440] reward=-110068552.3 actor_loss=0.4320 critic_loss=99222521735.5294 entropy=17.5840 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 105440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-647337.5 mean_steps=12.1
|
|
[Episode 105450] reward=-116703567.1 actor_loss=0.3050 critic_loss=105690416560.3556 entropy=17.5902 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 105460] reward=-114704966.9 actor_loss=0.3046 critic_loss=109850717262.7692 entropy=17.6025 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 105460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507624.0 mean_steps=13.0
|
|
[Episode 105470] reward=-118537600.2 actor_loss=0.2998 critic_loss=108370354722.1333 entropy=17.6078 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 105480] reward=-114646386.0 actor_loss=0.2579 critic_loss=105527786154.6667 entropy=17.6137 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 105480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523915.5 mean_steps=13.4
|
|
[Episode 105490] reward=-118312745.3 actor_loss=0.2473 critic_loss=108441904317.6296 entropy=17.6219 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 105500] reward=-112396768.7 actor_loss=0.3722 critic_loss=107203689050.3529 entropy=17.6042 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 105500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512463.0 mean_steps=13.9
|
|
[Episode 105510] reward=-116553167.9 actor_loss=0.3901 critic_loss=110429132390.4000 entropy=17.5850 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 105520] reward=-115878059.3 actor_loss=0.3247 critic_loss=106303267726.2222 entropy=17.5905 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 105520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597444.7 mean_steps=12.7
|
|
[Episode 105530] reward=-113928634.8 actor_loss=0.1969 critic_loss=106864555956.1481 entropy=17.5945 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 105540] reward=-117137993.3 actor_loss=0.2954 critic_loss=109979124456.7273 entropy=17.6116 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 105540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410484.7 mean_steps=16.6
|
|
[Episode 105550] reward=-116810978.3 actor_loss=0.2774 critic_loss=108800153099.3778 entropy=17.5969 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 105560] reward=-110686368.6 actor_loss=0.2714 critic_loss=105358169253.1613 entropy=17.5843 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 105560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481507.5 mean_steps=14.0
|
|
[Episode 105570] reward=-110779445.4 actor_loss=0.3911 critic_loss=108026794683.3171 entropy=17.5842 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 105580] reward=-116842462.7 actor_loss=0.3021 critic_loss=112625342297.9460 entropy=17.5781 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 105580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-606914.6 mean_steps=12.1
|
|
[Episode 105590] reward=-115142262.8 actor_loss=0.2420 critic_loss=103018902323.2000 entropy=17.5825 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 105600] reward=-117096263.1 actor_loss=0.3216 critic_loss=104107387325.2174 entropy=17.5789 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 105600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-363864.9 mean_steps=15.2
|
|
[Episode 105610] reward=-112535511.4 actor_loss=0.2676 critic_loss=101784575635.9111 entropy=17.5849 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 105620] reward=-120161782.1 actor_loss=0.3627 critic_loss=117335712719.2381 entropy=17.5832 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 105620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-448867.8 mean_steps=16.6
|
|
[Episode 105630] reward=-115022417.5 actor_loss=0.3180 critic_loss=102452789430.0444 entropy=17.5768 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 105640] reward=-112868860.9 actor_loss=0.3202 critic_loss=107860315042.9091 entropy=17.5668 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 105640] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-649924.6 mean_steps=12.1
|
|
[Episode 105650] reward=-112841893.4 actor_loss=0.5563 critic_loss=105343985506.4615 entropy=17.5750 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1536 front_blocked=0
|
|
[Episode 105660] reward=-115756582.5 actor_loss=0.3135 critic_loss=105589956608.0000 entropy=17.6037 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 105660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-439824.3 mean_steps=16.4
|
|
[Episode 105670] reward=-116688995.0 actor_loss=0.2039 critic_loss=109629743650.1333 entropy=17.6042 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 105680] reward=-114122638.4 actor_loss=0.2476 critic_loss=104158834961.0667 entropy=17.6135 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 105680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644643.3 mean_steps=11.9
|
|
[Episode 105690] reward=-113445168.4 actor_loss=0.3026 critic_loss=108588465265.7778 entropy=17.6090 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 105700] reward=-115661534.3 actor_loss=0.3436 critic_loss=109800851206.2439 entropy=17.6095 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 105700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425036.9 mean_steps=14.5
|
|
[Episode 105710] reward=-120620754.6 actor_loss=0.2566 critic_loss=114471638942.4762 entropy=17.5984 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 105720] reward=-116426944.3 actor_loss=0.3167 critic_loss=122082009391.4074 entropy=17.5877 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 105720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449996.9 mean_steps=15.7
|
|
[Episode 105730] reward=-112086208.4 actor_loss=0.3315 critic_loss=103290993777.7778 entropy=17.5947 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 105740] reward=-115233994.8 actor_loss=0.2715 critic_loss=101492558116.5714 entropy=17.6078 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 105740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488602.0 mean_steps=14.7
|
|
[Episode 105750] reward=-120498438.2 actor_loss=0.3099 critic_loss=109952010674.4242 entropy=17.6130 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 105760] reward=-119426747.1 actor_loss=0.2765 critic_loss=111797367674.4348 entropy=17.6192 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 105760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-415501.2 mean_steps=15.2
|
|
[Episode 105770] reward=-117926589.5 actor_loss=0.2085 critic_loss=118140781288.7273 entropy=17.6175 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 105780] reward=-115471347.3 actor_loss=0.3096 critic_loss=105476095114.3784 entropy=17.6162 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 105780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477359.6 mean_steps=13.8
|
|
[Episode 105790] reward=-115255203.1 actor_loss=0.3375 critic_loss=107202710732.8000 entropy=17.6059 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 105800] reward=-111069748.0 actor_loss=0.3074 critic_loss=98327244544.0000 entropy=17.5915 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 105800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-413232.8 mean_steps=14.2
|
|
[Episode 105810] reward=-117385717.2 actor_loss=0.2112 critic_loss=104931627404.3871 entropy=17.6006 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 105820] reward=-118215986.3 actor_loss=0.3055 critic_loss=108799902753.0323 entropy=17.5944 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 105820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-575698.6 mean_steps=14.5
|
|
[Episode 105830] reward=-119274473.6 actor_loss=0.3478 critic_loss=113837086401.4222 entropy=17.5805 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 105840] reward=-117946542.2 actor_loss=0.2666 critic_loss=108521270294.7556 entropy=17.5619 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 105840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-417382.4 mean_steps=14.5
|
|
[Episode 105850] reward=-111394846.4 actor_loss=0.3654 critic_loss=103039101246.5778 entropy=17.5495 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 105860] reward=-118071733.9 actor_loss=0.2208 critic_loss=107501467329.4222 entropy=17.5282 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 105860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620329.3 mean_steps=11.8
|
|
[Episode 105870] reward=-114354413.1 actor_loss=0.1909 critic_loss=101459258208.7111 entropy=17.5125 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 105880] reward=-117608581.4 actor_loss=0.2729 critic_loss=106062383240.5333 entropy=17.5237 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 105880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-438783.1 mean_steps=13.9
|
|
[Episode 105890] reward=-116728153.0 actor_loss=0.3097 critic_loss=104895003761.7778 entropy=17.5142 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 105900] reward=-112652862.6 actor_loss=0.3463 critic_loss=99778686520.8889 entropy=17.5167 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 105900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542130.8 mean_steps=14.7
|
|
[Episode 105910] reward=-117113474.0 actor_loss=0.3677 critic_loss=116656322787.5556 entropy=17.5245 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 105920] reward=-121657967.9 actor_loss=0.2694 critic_loss=113697807655.8222 entropy=17.5416 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 105920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-480477.4 mean_steps=13.1
|
|
[Episode 105930] reward=-121289377.3 actor_loss=0.2888 critic_loss=114524803444.3636 entropy=17.5595 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 105940] reward=-114420875.1 actor_loss=0.3816 critic_loss=107572256221.8667 entropy=17.5553 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 105940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440162.7 mean_steps=14.3
|
|
[Episode 105950] reward=-116215650.5 actor_loss=0.3401 critic_loss=103213484243.8621 entropy=17.5674 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 105960] reward=-113424688.2 actor_loss=0.2843 critic_loss=107039072512.0000 entropy=17.5786 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 105960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470814.6 mean_steps=14.8
|
|
[Episode 105970] reward=-118287923.6 actor_loss=0.2693 critic_loss=108165195217.4545 entropy=17.5810 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 105980] reward=-118395675.9 actor_loss=0.2789 critic_loss=110689909898.3784 entropy=17.5816 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 105980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377400.9 mean_steps=16.0
|
|
[Episode 105990] reward=-117602703.9 actor_loss=0.2377 critic_loss=110035476772.5714 entropy=17.5885 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 106000] reward=-119431091.5 actor_loss=0.2755 critic_loss=114577978880.0000 entropy=17.5856 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 106000] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-350363.8 mean_steps=17.8
|
|
[Episode 106010] reward=-112900743.5 actor_loss=0.3485 critic_loss=100406566729.9556 entropy=17.5866 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 106020] reward=-115370215.9 actor_loss=0.2125 critic_loss=111239474907.4286 entropy=17.5851 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 106020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-559920.1 mean_steps=11.9
|
|
[Episode 106030] reward=-116789914.7 actor_loss=0.2456 critic_loss=108279610529.6842 entropy=17.5887 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 106040] reward=-119400720.7 actor_loss=0.2998 critic_loss=111915417053.8667 entropy=17.5553 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 106040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382074.1 mean_steps=16.2
|
|
[Episode 106050] reward=-118094452.9 actor_loss=0.2676 critic_loss=106760390610.4889 entropy=17.5755 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 106060] reward=-121488578.5 actor_loss=0.1510 critic_loss=117088652939.6364 entropy=17.5878 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 106060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-352019.2 mean_steps=16.2
|
|
[Episode 106070] reward=-114212530.5 actor_loss=0.2969 critic_loss=112714888533.3333 entropy=17.5691 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 106080] reward=-118647958.0 actor_loss=0.2192 critic_loss=116480379289.6000 entropy=17.5598 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 106080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575100.1 mean_steps=12.8
|
|
[Episode 106090] reward=-115620256.6 actor_loss=0.1986 critic_loss=119649500455.8222 entropy=17.5499 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 106100] reward=-107659983.1 actor_loss=0.2204 critic_loss=95774212096.0000 entropy=17.5291 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 106100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436589.7 mean_steps=15.8
|
|
[Episode 106110] reward=-112576898.9 actor_loss=0.2637 critic_loss=106286972381.8667 entropy=17.5287 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 106120] reward=-119606565.3 actor_loss=0.2627 critic_loss=110550301354.6667 entropy=17.5414 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 106120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656152.4 mean_steps=12.2
|
|
[Episode 106130] reward=-121136743.3 actor_loss=0.2752 critic_loss=116607045905.0667 entropy=17.5521 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 106140] reward=-114231566.2 actor_loss=0.3180 critic_loss=103312590262.8571 entropy=17.5388 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 106140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603163.3 mean_steps=12.9
|
|
[Episode 106150] reward=-116052196.9 actor_loss=0.3274 critic_loss=106976631193.6000 entropy=17.5164 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 106160] reward=-119314998.0 actor_loss=0.2922 critic_loss=110555937382.4000 entropy=17.5101 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 106160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-628209.7 mean_steps=11.9
|
|
[Episode 106170] reward=-112199996.6 actor_loss=0.2376 critic_loss=101239631963.0222 entropy=17.5048 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 106180] reward=-113534440.6 actor_loss=0.2754 critic_loss=106143628544.0000 entropy=17.4952 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 106180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-646550.7 mean_steps=12.4
|
|
[Episode 106190] reward=-118479839.5 actor_loss=0.2132 critic_loss=109692020177.4545 entropy=17.4862 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 106200] reward=-112846716.0 actor_loss=0.3532 critic_loss=104675029232.9412 entropy=17.4792 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 106200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503844.7 mean_steps=14.7
|
|
[Episode 106210] reward=-118218917.1 actor_loss=0.2845 critic_loss=110843704661.3333 entropy=17.4879 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 106220] reward=-118062892.7 actor_loss=0.3203 critic_loss=108571168626.7586 entropy=17.4857 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 106220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449743.2 mean_steps=15.6
|
|
[Episode 106230] reward=-120068258.4 actor_loss=0.2218 critic_loss=112915309968.6956 entropy=17.5012 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 106240] reward=-114345697.0 actor_loss=0.3757 critic_loss=103058565802.6667 entropy=17.4878 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 106240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483364.0 mean_steps=14.8
|
|
[Episode 106250] reward=-141240058.2 actor_loss=0.2531 critic_loss=2251237116404.6221 entropy=17.4984 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 106260] reward=-130920011.4 actor_loss=0.3019 critic_loss=977871875723.6364 entropy=17.5165 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 106260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505987.3 mean_steps=12.9
|
|
[Episode 106270] reward=-116346971.5 actor_loss=0.2863 critic_loss=119105951185.4545 entropy=17.5245 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 106280] reward=-115336970.7 actor_loss=0.2917 critic_loss=109021792392.5333 entropy=17.5316 approx_kl=0.0105 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 106280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-579658.8 mean_steps=13.4
|
|
[Episode 106290] reward=-9069619001.4 actor_loss=0.9487 critic_loss=159368706016044640.0000 entropy=17.5158 approx_kl=0.0005 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 106300] reward=-114210594.7 actor_loss=0.2284 critic_loss=104472253599.2889 entropy=17.5198 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 106300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-557749.5 mean_steps=11.2
|
|
[Episode 106310] reward=-112648228.8 actor_loss=0.2699 critic_loss=97002800859.4286 entropy=17.5141 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 106320] reward=-113032794.6 actor_loss=0.3591 critic_loss=103251891723.3778 entropy=17.4989 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 106320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607522.8 mean_steps=12.9
|
|
[Episode 106330] reward=-112958188.0 actor_loss=0.1682 critic_loss=105935775636.2105 entropy=17.5042 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 106340] reward=-115678356.9 actor_loss=0.2781 critic_loss=107197260458.6667 entropy=17.5043 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 106340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386167.8 mean_steps=16.2
|
|
[Episode 106350] reward=-117873863.5 actor_loss=0.1977 critic_loss=106301007510.5882 entropy=17.5029 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 106360] reward=-112041640.5 actor_loss=0.3284 critic_loss=104593022976.0000 entropy=17.5044 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 106360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-539100.1 mean_steps=11.3
|
|
[Episode 106370] reward=-115497582.6 actor_loss=0.2210 critic_loss=104440055440.4103 entropy=17.5139 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 106380] reward=-115930715.5 actor_loss=0.2670 critic_loss=101026133333.3333 entropy=17.5252 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 106380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521716.0 mean_steps=14.1
|
|
[Episode 106390] reward=-111435489.2 actor_loss=0.4440 critic_loss=102574978194.2857 entropy=17.5183 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 106400] reward=-112503953.7 actor_loss=0.1956 critic_loss=105831538688.0000 entropy=17.5193 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 106400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478528.4 mean_steps=14.8
|
|
[Episode 106410] reward=-119153191.6 actor_loss=0.2559 critic_loss=110591108096.0000 entropy=17.5297 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 106420] reward=-113741818.1 actor_loss=0.3819 critic_loss=105254152874.6667 entropy=17.5236 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 106420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496976.1 mean_steps=14.2
|
|
[Episode 106430] reward=-114780102.2 actor_loss=0.3352 critic_loss=103337130954.1053 entropy=17.5303 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 106440] reward=-115469594.4 actor_loss=0.2791 critic_loss=110321561014.8571 entropy=17.5259 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 106440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486361.9 mean_steps=14.8
|
|
[Episode 106450] reward=-114759253.4 actor_loss=0.3581 critic_loss=112883185152.0000 entropy=17.5661 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 106460] reward=-112352074.9 actor_loss=0.3800 critic_loss=104553209249.1852 entropy=17.5590 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 106460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553891.3 mean_steps=14.3
|
|
[Episode 106470] reward=-117993090.0 actor_loss=0.2857 critic_loss=103099054806.7097 entropy=17.5208 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 106480] reward=-118459517.6 actor_loss=0.2700 critic_loss=104082234611.8095 entropy=17.5163 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 106480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-641393.3 mean_steps=12.9
|
|
[Episode 106490] reward=-119999679.5 actor_loss=0.2796 critic_loss=112852762259.9111 entropy=17.4944 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 106500] reward=-115849303.0 actor_loss=0.3225 critic_loss=109935639507.4783 entropy=17.5020 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 106500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-683028.9 mean_steps=12.1
|
|
[Episode 106510] reward=-115540351.0 actor_loss=0.3523 critic_loss=110106556211.2000 entropy=17.5054 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 106520] reward=-114160218.0 actor_loss=0.3854 critic_loss=103890075940.5714 entropy=17.4983 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 106520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492301.6 mean_steps=13.8
|
|
[Episode 106530] reward=-118073530.5 actor_loss=0.2968 critic_loss=107077517676.0889 entropy=17.4936 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 106540] reward=-113567990.5 actor_loss=0.2584 critic_loss=101162540327.8222 entropy=17.5073 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 106540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445309.6 mean_steps=14.7
|
|
[Episode 106550] reward=-117791406.0 actor_loss=0.2857 critic_loss=108165026981.1613 entropy=17.5205 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 106560] reward=-119166142.4 actor_loss=0.3148 critic_loss=109343765663.2889 entropy=17.5152 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 106560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-580083.5 mean_steps=11.6
|
|
[Episode 106570] reward=-117540362.8 actor_loss=0.2970 critic_loss=109932753481.1429 entropy=17.5210 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 106580] reward=-114406224.7 actor_loss=0.3734 critic_loss=107892705667.4595 entropy=17.5212 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 106580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-355857.3 mean_steps=17.0
|
|
[Episode 106590] reward=-116900908.3 actor_loss=0.2392 critic_loss=107226735432.2051 entropy=17.5170 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 106600] reward=-113792075.8 actor_loss=0.3153 critic_loss=104388872765.4400 entropy=17.4896 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 106600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401417.7 mean_steps=16.0
|
|
[Episode 106610] reward=-110986157.9 actor_loss=0.3381 critic_loss=102417046674.2857 entropy=17.4963 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 106620] reward=-119106659.4 actor_loss=0.3099 critic_loss=116949700699.0222 entropy=17.5091 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 106620] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-629793.7 mean_steps=12.2
|
|
[Episode 106630] reward=-120814040.8 actor_loss=0.2652 critic_loss=114341318018.8445 entropy=17.5114 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 106640] reward=-113698579.5 actor_loss=0.3244 critic_loss=102686770608.3556 entropy=17.5143 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 106640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440277.2 mean_steps=14.4
|
|
[Episode 106650] reward=-119868151.4 actor_loss=0.3203 critic_loss=112381514789.9259 entropy=17.5099 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 106660] reward=-117502054.2 actor_loss=0.4284 critic_loss=110086997606.4000 entropy=17.5196 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 106660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360040.0 mean_steps=15.8
|
|
[Episode 106670] reward=-117050701.2 actor_loss=0.2827 critic_loss=106092970348.0889 entropy=17.5020 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 106680] reward=-117186355.8 actor_loss=0.2931 critic_loss=111651164160.0000 entropy=17.5021 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 106680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-562379.1 mean_steps=14.4
|
|
[Episode 106690] reward=-121377419.2 actor_loss=0.2215 critic_loss=112332402346.6667 entropy=17.4951 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 106700] reward=-120526886.9 actor_loss=0.3136 critic_loss=146309210567.1111 entropy=17.4903 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 106700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460294.6 mean_steps=14.6
|
|
[Episode 106710] reward=-118651639.7 actor_loss=0.3384 critic_loss=109491005576.5333 entropy=17.5063 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 106720] reward=-112184209.5 actor_loss=0.4087 critic_loss=103987045080.1778 entropy=17.5126 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 106720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-469237.9 mean_steps=15.6
|
|
[Episode 106730] reward=-121525985.2 actor_loss=0.3068 critic_loss=108784760513.4222 entropy=17.5278 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 106740] reward=-113569314.9 actor_loss=0.3190 critic_loss=104297726225.0667 entropy=17.5377 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 106740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541650.3 mean_steps=14.1
|
|
[Episode 106750] reward=-118458983.5 actor_loss=0.3398 critic_loss=110598479462.4000 entropy=17.5411 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 106760] reward=-115660371.7 actor_loss=0.3355 critic_loss=110397918890.6667 entropy=17.5419 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 106760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-346528.2 mean_steps=17.1
|
|
[Episode 106770] reward=-120284593.4 actor_loss=0.3817 critic_loss=108174008320.0000 entropy=17.5501 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 106780] reward=-113326768.5 actor_loss=0.3919 critic_loss=133567084316.4444 entropy=17.5402 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 106780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-654845.5 mean_steps=12.2
|
|
[Episode 106790] reward=-1202142331.4 actor_loss=0.2846 critic_loss=3137035657281536.0000 entropy=17.5466 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 106800] reward=-118035950.6 actor_loss=0.3158 critic_loss=113283913366.5882 entropy=17.5527 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 106800] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-661614.0 mean_steps=11.6
|
|
[Episode 106810] reward=-612819347.1 actor_loss=0.3436 critic_loss=751571832324460.1250 entropy=17.5456 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 106820] reward=-4090684449.3 actor_loss=0.2879 critic_loss=35383477896408816.0000 entropy=17.5731 approx_kl=0.0008 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 106820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455007.0 mean_steps=14.3
|
|
[Episode 106830] reward=-2910819418.4 actor_loss=0.3510 critic_loss=18401696541153688.0000 entropy=17.5671 approx_kl=-0.0015 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 106840] reward=-6831018621.3 actor_loss=0.3938 critic_loss=93705665924050304.0000 entropy=17.5802 approx_kl=-0.0002 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 106840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532611.1 mean_steps=14.2
|
|
[Episode 106850] reward=-115866823.2 actor_loss=0.2151 critic_loss=108574233236.6452 entropy=17.5934 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 106860] reward=-258905533.2 actor_loss=0.3170 critic_loss=90779663478146.8438 entropy=17.6005 approx_kl=-0.0019 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 106860] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-642293.2 mean_steps=11.1
|
|
[Episode 106870] reward=-113148385.7 actor_loss=0.3842 critic_loss=105436083200.0000 entropy=17.6285 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 106880] reward=-2385769420.6 actor_loss=0.7458 critic_loss=12490054091232598.0000 entropy=17.6412 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 106880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492559.9 mean_steps=14.0
|
|
[Episode 106890] reward=-113839949.4 actor_loss=0.3027 critic_loss=108369895424.0000 entropy=17.6303 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 106900] reward=-2623308358.7 actor_loss=0.2768 critic_loss=15148885485953570.0000 entropy=17.6230 approx_kl=-0.0000 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 106900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470697.6 mean_steps=14.8
|
|
[Episode 106910] reward=-2333867647.9 actor_loss=0.3800 critic_loss=11988423063773730.0000 entropy=17.6279 approx_kl=-0.0020 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 106920] reward=-9975307488.5 actor_loss=0.8502 critic_loss=157371669918462784.0000 entropy=17.6338 approx_kl=0.0003 kl_stop=0 intervention_rate=0.1016 front_blocked=0
|
|
[Eval 106920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606066.1 mean_steps=13.1
|
|
[Episode 106930] reward=-124289478.7 actor_loss=0.3235 critic_loss=723592150311.8223 entropy=17.6469 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 106940] reward=-5978134703.8 actor_loss=0.2784 critic_loss=72758708889737984.0000 entropy=17.6676 approx_kl=0.0010 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 106940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423116.7 mean_steps=16.6
|
|
[Episode 106950] reward=-112906396.4 actor_loss=0.2405 critic_loss=108849397104.6400 entropy=17.6780 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 106960] reward=-2136253937.1 actor_loss=0.2285 critic_loss=9110723619344292.0000 entropy=17.6927 approx_kl=-0.0001 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 106960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448421.1 mean_steps=15.6
|
|
[Episode 106970] reward=-119970370.5 actor_loss=0.2545 critic_loss=114219442566.0952 entropy=17.7043 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 106980] reward=-119638432.4 actor_loss=0.2173 critic_loss=110301702553.6000 entropy=17.7086 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 106980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536244.2 mean_steps=14.3
|
|
[Episode 106990] reward=-118815554.5 actor_loss=0.2710 critic_loss=112876236604.9524 entropy=17.7073 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 107000] reward=-14174182736.9 actor_loss=0.6042 critic_loss=250171073214021632.0000 entropy=17.7145 approx_kl=0.0013 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 107000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433199.6 mean_steps=14.8
|
|
[Episode 107010] reward=-116002895.9 actor_loss=0.2822 critic_loss=112628996687.6444 entropy=17.7208 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 107020] reward=-180972265.5 actor_loss=0.2658 critic_loss=16772640060939.3770 entropy=17.7366 approx_kl=0.0008 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 107020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623770.1 mean_steps=13.2
|
|
[Episode 107030] reward=-117451388.8 actor_loss=0.3359 critic_loss=113181334807.2727 entropy=17.7401 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 107040] reward=-516949722.2 actor_loss=0.3014 critic_loss=496085564403074.8750 entropy=17.7719 approx_kl=0.0012 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 107040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401975.6 mean_steps=15.3
|
|
[Episode 107050] reward=-8950831866.9 actor_loss=0.3033 critic_loss=155459885485732672.0000 entropy=17.7792 approx_kl=-0.0024 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 107060] reward=-4214223697.5 actor_loss=0.3772 critic_loss=51072144599993552.0000 entropy=17.7915 approx_kl=-0.0006 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 107060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598506.0 mean_steps=12.8
|
|
[Episode 107070] reward=-19520537254.1 actor_loss=0.2377 critic_loss=396553970889560640.0000 entropy=17.8063 approx_kl=0.0034 kl_stop=1 intervention_rate=0.1061 front_blocked=0
|
|
[Episode 107080] reward=-16957301451.7 actor_loss=0.2948 critic_loss=506398356498849600.0000 entropy=17.8329 approx_kl=-0.0047 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 107080] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-328762.9 mean_steps=18.0
|
|
[Episode 107090] reward=-118455869.8 actor_loss=0.4061 critic_loss=119060410276.9778 entropy=17.8520 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 107100] reward=-10821379225.2 actor_loss=0.2746 critic_loss=219869123740015008.0000 entropy=17.8693 approx_kl=-0.0002 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 107100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467987.9 mean_steps=13.8
|
|
[Episode 107110] reward=-8223700507.6 actor_loss=0.2483 critic_loss=132295695011751888.0000 entropy=17.8818 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 107120] reward=-724940638.8 actor_loss=0.3117 critic_loss=1556858699979889.7500 entropy=17.8692 approx_kl=0.0003 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 107120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529947.9 mean_steps=13.6
|
|
[Episode 107130] reward=-38741228318.7 actor_loss=0.1285 critic_loss=758778819301488896.0000 entropy=17.9021 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 107140] reward=-2873403802.5 actor_loss=0.2936 critic_loss=17949354808923478.0000 entropy=17.9246 approx_kl=0.0003 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 107140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-444517.4 mean_steps=13.8
|
|
[Episode 107150] reward=-667684005.5 actor_loss=0.3189 critic_loss=874586548242568.5000 entropy=17.9523 approx_kl=-0.0013 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 107160] reward=-4707677236.7 actor_loss=0.1492 critic_loss=25298028485940928.0000 entropy=17.9681 approx_kl=-0.0012 kl_stop=0 intervention_rate=0.1048 front_blocked=0
|
|
[Eval 107160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537343.4 mean_steps=14.4
|
|
[Episode 107170] reward=-12701014454.5 actor_loss=0.4594 critic_loss=209544565244297216.0000 entropy=17.9843 approx_kl=0.0032 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 107180] reward=-119737276.0 actor_loss=0.4220 critic_loss=131081048941.7143 entropy=17.9930 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 107180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500112.4 mean_steps=15.2
|
|
[Episode 107190] reward=-119650915.9 actor_loss=0.3877 critic_loss=117168309910.5882 entropy=18.0049 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 107200] reward=-8712077115.3 actor_loss=0.2532 critic_loss=146092256307689824.0000 entropy=18.0301 approx_kl=0.0031 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 107200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466031.7 mean_steps=14.9
|
|
[Episode 107210] reward=-516946686.9 actor_loss=0.3748 critic_loss=600162406830353.1250 entropy=18.0315 approx_kl=-0.0023 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 107220] reward=-122392938.1 actor_loss=0.2959 critic_loss=123869776691.2000 entropy=18.0257 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 107220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460534.3 mean_steps=14.0
|
|
[Episode 107230] reward=-1836723484.4 actor_loss=0.3813 critic_loss=7003752441251248.0000 entropy=18.0281 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 107240] reward=-1078021165.7 actor_loss=0.2279 critic_loss=2059682022290591.2500 entropy=18.0265 approx_kl=0.0009 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 107240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482711.6 mean_steps=15.2
|
|
[Episode 107250] reward=-122170463.2 actor_loss=0.2396 critic_loss=123744931072.0000 entropy=18.0523 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 107260] reward=-122024138.5 actor_loss=0.3271 critic_loss=117574710886.4000 entropy=18.0541 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 107260] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-669539.9 mean_steps=10.3
|
|
[Episode 107270] reward=-123477590.4 actor_loss=0.2284 critic_loss=122957542377.2444 entropy=18.0501 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 107280] reward=-124993946.9 actor_loss=0.2043 critic_loss=122690409085.1555 entropy=18.0352 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 107280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603993.5 mean_steps=12.5
|
|
[Episode 107290] reward=-121738459.5 actor_loss=0.3289 critic_loss=125639270035.9111 entropy=18.0336 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 107300] reward=-124160734.9 actor_loss=0.2976 critic_loss=120616771115.8857 entropy=18.0340 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 107300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449472.7 mean_steps=14.7
|
|
[Episode 107310] reward=-123567017.6 actor_loss=0.3131 critic_loss=122513002951.1111 entropy=18.0309 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 107320] reward=-122407045.5 actor_loss=0.3189 critic_loss=122185115420.4444 entropy=18.0164 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 107320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541176.0 mean_steps=14.3
|
|
[Episode 107330] reward=-120016333.6 actor_loss=0.2774 critic_loss=118444775316.2105 entropy=18.0160 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 107340] reward=-122407959.1 actor_loss=0.2186 critic_loss=118037067161.6000 entropy=18.0129 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 107340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421630.0 mean_steps=15.4
|
|
[Episode 107350] reward=-119835292.8 actor_loss=0.3057 critic_loss=117159046868.2927 entropy=18.0257 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 107360] reward=-121529238.1 actor_loss=0.3416 critic_loss=124280692553.9556 entropy=18.0338 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 107360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443437.9 mean_steps=15.0
|
|
[Episode 107370] reward=-120172361.8 actor_loss=0.2992 critic_loss=114712730191.6444 entropy=18.0278 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 107380] reward=-123482805.0 actor_loss=0.3269 critic_loss=120250619431.3846 entropy=18.0255 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 107380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397659.4 mean_steps=15.3
|
|
[Episode 107390] reward=-118059085.2 actor_loss=0.3507 critic_loss=114717157888.0000 entropy=18.0113 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 107400] reward=-119710794.1 actor_loss=0.2329 critic_loss=122518519417.9048 entropy=18.0072 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 107400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-411524.6 mean_steps=14.4
|
|
[Episode 107410] reward=-125364634.7 actor_loss=0.2700 critic_loss=123983922097.2308 entropy=17.9955 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 107420] reward=-114804727.2 actor_loss=0.2930 critic_loss=112335758056.7273 entropy=18.0143 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 107420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465522.6 mean_steps=14.8
|
|
[Episode 107430] reward=-123367000.9 actor_loss=0.2653 critic_loss=149704988619.4872 entropy=18.0241 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 107440] reward=-119346535.3 actor_loss=0.2033 critic_loss=135228376529.4545 entropy=18.0288 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 107440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-527119.4 mean_steps=14.8
|
|
[Episode 107450] reward=-118985230.1 actor_loss=0.3273 critic_loss=111810196945.4545 entropy=18.0320 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 107460] reward=-120463211.7 actor_loss=0.2458 critic_loss=119321244636.6897 entropy=18.0321 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 107460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-475565.0 mean_steps=15.9
|
|
[Episode 107470] reward=-117590500.8 actor_loss=0.2463 critic_loss=110920115486.7200 entropy=18.0339 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 107480] reward=-117919156.2 actor_loss=0.2674 critic_loss=109797537529.4359 entropy=18.0321 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 107480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-415265.1 mean_steps=17.9
|
|
[Episode 107490] reward=-374876113.2 actor_loss=0.3492 critic_loss=204548008423208.4062 entropy=18.0208 approx_kl=0.0039 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 107500] reward=-121324964.3 actor_loss=0.2968 critic_loss=116654353732.6829 entropy=18.0170 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 107500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-504518.0 mean_steps=13.2
|
|
[Episode 107510] reward=-119283957.1 actor_loss=0.2572 critic_loss=122088082697.4815 entropy=18.0008 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 107520] reward=-122989453.1 actor_loss=0.2484 critic_loss=120052020370.2857 entropy=17.9901 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 107520] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-629640.0 mean_steps=10.8
|
|
[Episode 107530] reward=-123029944.8 actor_loss=0.2209 critic_loss=115158162909.8667 entropy=17.9925 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 107540] reward=-126316421.0 actor_loss=0.2008 critic_loss=124910230528.0000 entropy=17.9921 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 107540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-572207.9 mean_steps=14.6
|
|
[Episode 107550] reward=-114624083.8 actor_loss=0.2953 critic_loss=113192141892.2667 entropy=17.9978 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 107560] reward=-123302558.3 actor_loss=0.2129 critic_loss=120335442168.2424 entropy=18.0026 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 107560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434681.4 mean_steps=16.0
|
|
[Episode 107570] reward=-121955374.2 actor_loss=0.2563 critic_loss=119039725808.9412 entropy=17.9965 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 107580] reward=-127724961.0 actor_loss=0.2577 critic_loss=126547339946.6667 entropy=17.9749 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 107580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479244.8 mean_steps=14.8
|
|
[Episode 107590] reward=-117045389.9 actor_loss=0.3478 critic_loss=116198271268.5714 entropy=17.9665 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 107600] reward=-119100954.0 actor_loss=0.2245 critic_loss=117779163347.8621 entropy=17.9545 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 107600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-456057.7 mean_steps=15.6
|
|
[Episode 107610] reward=-122056761.5 actor_loss=0.2737 critic_loss=123447749836.8000 entropy=17.9826 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 107620] reward=-124651826.5 actor_loss=0.3578 critic_loss=129107238912.0000 entropy=17.9921 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 107620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465497.3 mean_steps=14.8
|
|
[Episode 107630] reward=-117712406.9 actor_loss=0.3062 critic_loss=111533826247.8049 entropy=17.9962 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 107640] reward=-2603679617.3 actor_loss=0.3267 critic_loss=14704778601894162.0000 entropy=17.9923 approx_kl=-0.0010 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 107640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551441.3 mean_steps=13.6
|
|
[Episode 107650] reward=-2813952159.5 actor_loss=0.3019 critic_loss=17354287615003124.0000 entropy=18.0014 approx_kl=-0.0026 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 107660] reward=-5772798968.0 actor_loss=17.6465 critic_loss=67314109988266896.0000 entropy=18.0156 approx_kl=-0.0004 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 107660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-662988.8 mean_steps=12.2
|
|
[Episode 107670] reward=-6236780485.1 actor_loss=0.2803 critic_loss=46754711809445800.0000 entropy=18.0293 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Episode 107680] reward=-5883214012.2 actor_loss=0.2896 critic_loss=68081609095372256.0000 entropy=18.0554 approx_kl=-0.0007 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 107680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448269.2 mean_steps=14.7
|
|
[Episode 107690] reward=-2544335448.4 actor_loss=0.2674 critic_loss=14183240174339504.0000 entropy=18.0741 approx_kl=-0.0011 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 107700] reward=-30302056261.9 actor_loss=0.1832 critic_loss=618119387109136128.0000 entropy=18.0873 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 107700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488810.3 mean_steps=13.9
|
|
[Episode 107710] reward=-8417129032.9 actor_loss=0.3376 critic_loss=137070836215470848.0000 entropy=18.0967 approx_kl=-0.0030 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 107720] reward=-17985925104.0 actor_loss=50.5460 critic_loss=238508988028891968.0000 entropy=18.1152 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 107720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-579390.3 mean_steps=14.3
|
|
[Episode 107730] reward=-22539874461.5 actor_loss=33.9411 critic_loss=300534576318350016.0000 entropy=18.1294 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 107740] reward=-16437546627.7 actor_loss=0.2011 critic_loss=341815618951521408.0000 entropy=18.1245 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0990 front_blocked=0
|
|
[Eval 107740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475314.2 mean_steps=14.8
|
|
[Episode 107750] reward=-23181856759.4 actor_loss=0.1243 critic_loss=612877486039556992.0000 entropy=18.1454 approx_kl=-0.0014 kl_stop=0 intervention_rate=0.0970 front_blocked=0
|
|
[Episode 107760] reward=-20414430013.2 actor_loss=0.1199 critic_loss=288055038406819840.0000 entropy=18.1408 approx_kl=0.0014 kl_stop=0 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 107760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401922.6 mean_steps=16.3
|
|
[Episode 107770] reward=-14375132380.8 actor_loss=0.0960 critic_loss=153786758069083616.0000 entropy=18.1531 approx_kl=-0.0002 kl_stop=0 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 107780] reward=-36805117189.8 actor_loss=-0.0144 critic_loss=795641657737650048.0000 entropy=18.1794 approx_kl=-0.0000 kl_stop=0 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 107780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513970.5 mean_steps=14.0
|
|
[Episode 107790] reward=-27067662707.8 actor_loss=0.1536 critic_loss=525125881070237952.0000 entropy=18.1937 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 107800] reward=-12726600377.7 actor_loss=0.2293 critic_loss=173419416265226816.0000 entropy=18.2011 approx_kl=0.0000 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 107800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-347137.7 mean_steps=15.9
|
|
[Episode 107810] reward=-13552635010.5 actor_loss=0.0943 critic_loss=197951824119592096.0000 entropy=18.2167 approx_kl=-0.0014 kl_stop=0 intervention_rate=0.0990 front_blocked=0
|
|
[Episode 107820] reward=-5019258144.3 actor_loss=0.2125 critic_loss=50889405147073016.0000 entropy=18.2366 approx_kl=-0.0016 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 107820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-406106.4 mean_steps=16.7
|
|
[Episode 107830] reward=-7536634654.8 actor_loss=24.5616 critic_loss=110941539477272080.0000 entropy=18.2431 approx_kl=-0.0029 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 107840] reward=-9883109616.9 actor_loss=15.0788 critic_loss=105213522746773056.0000 entropy=18.2634 approx_kl=-0.0023 kl_stop=0 intervention_rate=0.0977 front_blocked=0
|
|
[Eval 107840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575088.0 mean_steps=13.9
|
|
[Episode 107850] reward=-118674600.0 actor_loss=0.2416 critic_loss=117470034875.7333 entropy=18.2695 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 107860] reward=-1221406478.2 actor_loss=0.1768 critic_loss=3099860311346016.5000 entropy=18.2799 approx_kl=-0.0014 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 107860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477828.0 mean_steps=15.3
|
|
[Episode 107870] reward=-3303747118.4 actor_loss=0.2696 critic_loss=29966321974806664.0000 entropy=18.2762 approx_kl=-0.0034 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 107880] reward=-128036455.9 actor_loss=0.2591 critic_loss=127967521213.2174 entropy=18.2921 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 107880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513473.7 mean_steps=14.4
|
|
[Episode 107890] reward=-5614984623.9 actor_loss=0.1895 critic_loss=65024153971455136.0000 entropy=18.2990 approx_kl=0.0000 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 107900] reward=-7529499037.5 actor_loss=0.2953 critic_loss=110453739425622336.0000 entropy=18.2965 approx_kl=-0.0033 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 107900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499058.1 mean_steps=14.0
|
|
[Episode 107910] reward=-13502172653.4 actor_loss=0.2271 critic_loss=200137642355026976.0000 entropy=18.3114 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 107920] reward=-5241043355.9 actor_loss=0.3342 critic_loss=55089571157047888.0000 entropy=18.3263 approx_kl=-0.0058 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 107920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442682.6 mean_steps=15.7
|
|
[Episode 107930] reward=-11752842936.1 actor_loss=0.2104 critic_loss=117383360110960464.0000 entropy=18.3438 approx_kl=0.0044 kl_stop=0 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 107940] reward=-11473149088.5 actor_loss=0.1634 critic_loss=145851387637883520.0000 entropy=18.3522 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 107940] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-274129.9 mean_steps=18.1
|
|
[Episode 107950] reward=-2229609650.5 actor_loss=0.2392 critic_loss=10636844183876040.0000 entropy=18.3747 approx_kl=-0.0012 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 107960] reward=-555133364.1 actor_loss=0.2305 critic_loss=547009131597733.0000 entropy=18.3883 approx_kl=-0.0008 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 107960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441894.3 mean_steps=15.9
|
|
[Episode 107970] reward=-1873333813.0 actor_loss=0.2255 critic_loss=7675536484178330.0000 entropy=18.3828 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 107980] reward=-6102344472.3 actor_loss=0.1997 critic_loss=63210112833296960.0000 entropy=18.3901 approx_kl=0.0020 kl_stop=1 intervention_rate=0.1107 front_blocked=0
|
|
[Eval 107980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-349486.3 mean_steps=16.0
|
|
[Episode 107990] reward=-119488092.3 actor_loss=0.3330 critic_loss=123986919780.1739 entropy=18.3911 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 108000] reward=-124883267.5 actor_loss=0.2245 critic_loss=168975730278.4000 entropy=18.4025 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 108000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448915.9 mean_steps=14.6
|
|
[Episode 108010] reward=-122817312.4 actor_loss=0.2297 critic_loss=130707673641.5135 entropy=18.4194 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 108020] reward=-1160818186.1 actor_loss=0.6234 critic_loss=2891936067969206.0000 entropy=18.4316 approx_kl=0.0010 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 108020] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-233609567.0 mean_steps=25.2
|
|
[Episode 108030] reward=-127527658.6 actor_loss=0.2524 critic_loss=129324417752.1778 entropy=18.4599 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 108040] reward=-4337693221.7 actor_loss=0.1982 critic_loss=37019083667488400.0000 entropy=18.4560 approx_kl=-0.0012 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 108040] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-214148.6 mean_steps=18.2
|
|
[Episode 108050] reward=-7752712925.7 actor_loss=0.2169 critic_loss=62504395156808592.0000 entropy=18.4438 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1055 front_blocked=0
|
|
[Episode 108060] reward=-7133979016.9 actor_loss=0.2082 critic_loss=55577210004258448.0000 entropy=18.4637 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 108060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583387.3 mean_steps=13.9
|
|
[Episode 108070] reward=-123007008.7 actor_loss=0.3251 critic_loss=129398410931.8919 entropy=18.4606 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 108080] reward=-5870239909.1 actor_loss=0.1570 critic_loss=39124854783997088.0000 entropy=18.4676 approx_kl=-0.0001 kl_stop=0 intervention_rate=0.1074 front_blocked=0
|
|
[Eval 108080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421526.9 mean_steps=16.8
|
|
[Episode 108090] reward=-1687966881.3 actor_loss=0.2778 critic_loss=7743550989754550.0000 entropy=18.4634 approx_kl=-0.0026 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 108100] reward=-2448498053.1 actor_loss=0.2451 critic_loss=13134202906782378.0000 entropy=18.4865 approx_kl=0.0000 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 108100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399930.9 mean_steps=15.6
|
|
[Episode 108110] reward=-1126861530.1 actor_loss=0.2168 critic_loss=2605561212746913.5000 entropy=18.4968 approx_kl=0.0004 kl_stop=1 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 108120] reward=-8457513231.3 actor_loss=0.2450 critic_loss=136328919365236240.0000 entropy=18.5078 approx_kl=-0.0011 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 108120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-512598.6 mean_steps=14.9
|
|
[Episode 108130] reward=-134683727.0 actor_loss=0.2898 critic_loss=325538874336.9697 entropy=18.5044 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 108140] reward=-125037881.3 actor_loss=0.2377 critic_loss=127681548405.0286 entropy=18.4957 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 108140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-466408.6 mean_steps=15.8
|
|
[Episode 108150] reward=-1416194448.4 actor_loss=0.2506 critic_loss=6109807886338913.0000 entropy=18.5163 approx_kl=-0.0024 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 108160] reward=-282760812.1 actor_loss=0.2420 critic_loss=79436328252939.3750 entropy=18.5242 approx_kl=0.0009 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 108160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596868.0 mean_steps=12.9
|
|
[Episode 108170] reward=-1310894733.5 actor_loss=0.2206 critic_loss=3627592901960954.5000 entropy=18.5378 approx_kl=-0.0005 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 108180] reward=-121139555.0 actor_loss=0.2940 critic_loss=127250520291.5556 entropy=18.5433 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 108180] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-306994.4 mean_steps=17.8
|
|
[Episode 108190] reward=-121967121.7 actor_loss=0.2265 critic_loss=142629242652.4445 entropy=18.5434 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 108200] reward=-4962489080.3 actor_loss=0.2505 critic_loss=30257477255592424.0000 entropy=18.5478 approx_kl=0.0027 kl_stop=1 intervention_rate=0.1094 front_blocked=0
|
|
[Eval 108200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478950.9 mean_steps=15.0
|
|
[Episode 108210] reward=-128016416.8 actor_loss=0.3113 critic_loss=134387430741.3333 entropy=18.5428 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 108220] reward=-7139408813.4 actor_loss=83.3398 critic_loss=63536730329079064.0000 entropy=18.5557 approx_kl=0.0341 kl_stop=1 intervention_rate=0.1087 front_blocked=0
|
|
[Eval 108220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502447.5 mean_steps=14.3
|
|
[Episode 108230] reward=-6524446191.5 actor_loss=4.6957 critic_loss=35060578067502420.0000 entropy=18.5467 approx_kl=0.0196 kl_stop=1 intervention_rate=0.0983 front_blocked=0
|
|
[Episode 108240] reward=-127692191.5 actor_loss=0.2510 critic_loss=137756033954.9091 entropy=18.5394 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 108240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483693.7 mean_steps=15.4
|
|
[Episode 108250] reward=-130299083.3 actor_loss=0.2332 critic_loss=146726998946.9091 entropy=18.5590 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 108260] reward=-127834831.7 actor_loss=0.3826 critic_loss=146237246410.1053 entropy=18.5631 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 108260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407313.5 mean_steps=16.2
|
|
[Episode 108270] reward=-7485941435.9 actor_loss=0.2451 critic_loss=109225319603861360.0000 entropy=18.5902 approx_kl=-0.0002 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 108280] reward=-2212981809.4 actor_loss=0.2462 critic_loss=10127218993441450.0000 entropy=18.6032 approx_kl=-0.0022 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 108280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394198.1 mean_steps=16.4
|
|
[Episode 108290] reward=-130576483.0 actor_loss=0.3546 critic_loss=377189449728.0000 entropy=18.6059 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 108300] reward=-3941192427.2 actor_loss=0.3096 critic_loss=33121802378088720.0000 entropy=18.6248 approx_kl=-0.0007 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 108300] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-351830.0 mean_steps=17.9
|
|
[Episode 108310] reward=-9490470625.2 actor_loss=0.1767 critic_loss=111036539572561152.0000 entropy=18.6343 approx_kl=-0.0004 kl_stop=0 intervention_rate=0.1035 front_blocked=0
|
|
[Episode 108320] reward=-15707527422.4 actor_loss=0.8343 critic_loss=174710284529696768.0000 entropy=18.6440 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 108320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425766.5 mean_steps=14.6
|
|
[Episode 108330] reward=-3680179216.4 actor_loss=0.2372 critic_loss=28394421067535700.0000 entropy=18.6530 approx_kl=0.0149 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 108340] reward=-1320685486.1 actor_loss=0.2890 critic_loss=3515090928280098.0000 entropy=18.6508 approx_kl=0.0003 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 108340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486017.2 mean_steps=15.3
|
|
[Episode 108350] reward=-8978227317.3 actor_loss=0.1476 critic_loss=86878387341984480.0000 entropy=18.6704 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1009 front_blocked=0
|
|
[Episode 108360] reward=-7739613934.4 actor_loss=0.1985 critic_loss=118008909308164688.0000 entropy=18.6620 approx_kl=-0.0000 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 108360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498114.1 mean_steps=13.8
|
|
[Episode 108370] reward=-124716418.0 actor_loss=0.2327 critic_loss=141712384728.1778 entropy=18.6677 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 108380] reward=-5276632143.5 actor_loss=0.2501 critic_loss=45925302848530384.0000 entropy=18.6630 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Eval 108380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482062.3 mean_steps=14.0
|
|
[Episode 108390] reward=-678032010.9 actor_loss=0.2645 critic_loss=851950294444714.6250 entropy=18.6732 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 108400] reward=-131491976.6 actor_loss=0.1974 critic_loss=142569413017.6000 entropy=18.6732 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 108400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592373.6 mean_steps=12.9
|
|
[Episode 108410] reward=-3307074221.0 actor_loss=0.2759 critic_loss=22900030562480220.0000 entropy=18.6888 approx_kl=-0.0011 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 108420] reward=-6093543572.4 actor_loss=0.1099 critic_loss=35459164191873796.0000 entropy=18.7138 approx_kl=0.0010 kl_stop=0 intervention_rate=0.0990 front_blocked=0
|
|
[Eval 108420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-631561.7 mean_steps=12.8
|
|
[Episode 108430] reward=-991410501.7 actor_loss=0.3158 critic_loss=2071100950431516.5000 entropy=18.7264 approx_kl=-0.0007 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 108440] reward=-5335151603.7 actor_loss=0.2445 critic_loss=57953773082156872.0000 entropy=18.7519 approx_kl=-0.0011 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 108440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449836.1 mean_steps=15.7
|
|
[Episode 108450] reward=-128293561.3 actor_loss=0.2524 critic_loss=149953627672.3810 entropy=18.7581 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 108460] reward=-4836941967.4 actor_loss=0.4561 critic_loss=48587238633682440.0000 entropy=18.7747 approx_kl=-0.0025 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 108460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506342.0 mean_steps=14.1
|
|
[Episode 108470] reward=-130180581.1 actor_loss=0.2926 critic_loss=146397370260.2105 entropy=18.7815 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 108480] reward=-128567626.9 actor_loss=0.2788 critic_loss=227230927624.8276 entropy=18.7685 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 108480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-429891.3 mean_steps=16.6
|
|
[Episode 108490] reward=-6902877840.3 actor_loss=0.1288 critic_loss=53821301871982296.0000 entropy=18.7685 approx_kl=0.0040 kl_stop=1 intervention_rate=0.1022 front_blocked=0
|
|
[Episode 108500] reward=-127417512.6 actor_loss=0.3421 critic_loss=151088115438.9333 entropy=18.7696 approx_kl=0.0115 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 108500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413259.6 mean_steps=16.9
|
|
[Episode 108510] reward=-130688926.7 actor_loss=0.3401 critic_loss=180041149098.6667 entropy=18.7644 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 108520] reward=-703928363.6 actor_loss=0.2508 critic_loss=952169024046876.5000 entropy=18.7708 approx_kl=-0.0039 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 108520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592748.7 mean_steps=14.1
|
|
[Episode 108530] reward=-1246663591.1 actor_loss=0.2678 critic_loss=3419561615472139.5000 entropy=18.7620 approx_kl=0.0001 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 108540] reward=-126575192.1 actor_loss=0.2380 critic_loss=134068493056.0000 entropy=18.7609 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 108540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466278.1 mean_steps=14.9
|
|
[Episode 108550] reward=-133101711.0 actor_loss=0.3267 critic_loss=143050203318.0444 entropy=18.7571 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 108560] reward=-126689978.6 actor_loss=0.2470 critic_loss=145990447319.5789 entropy=18.7444 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 108560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582827.2 mean_steps=13.0
|
|
[Episode 108570] reward=-125497857.0 actor_loss=0.3653 critic_loss=135356369578.6667 entropy=18.7352 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 108580] reward=-124654066.5 actor_loss=0.2498 critic_loss=150932521688.1778 entropy=18.7281 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 108580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-423583.2 mean_steps=13.7
|
|
[Episode 108590] reward=-126837490.0 actor_loss=0.3744 critic_loss=138615254317.1765 entropy=18.6999 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 108600] reward=-130495939.8 actor_loss=0.2200 critic_loss=148786054394.3111 entropy=18.6870 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 108600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563587.9 mean_steps=14.2
|
|
[Episode 108610] reward=-129895055.9 actor_loss=0.1644 critic_loss=139600643510.8571 entropy=18.6683 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 108620] reward=-140415182.9 actor_loss=0.2465 critic_loss=1030846728730.9474 entropy=18.6460 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 108620] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-601968.7 mean_steps=12.2
|
|
[Episode 108630] reward=-131520566.4 actor_loss=0.2283 critic_loss=138681708544.0000 entropy=18.6140 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 108640] reward=-124306664.9 actor_loss=0.2672 critic_loss=133468413587.9111 entropy=18.6028 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 108640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-530197.5 mean_steps=12.7
|
|
[Episode 108650] reward=-125984311.4 actor_loss=0.3235 critic_loss=135284267651.6572 entropy=18.5876 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 108660] reward=-127828445.9 actor_loss=0.2006 critic_loss=136948893958.5641 entropy=18.5680 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 108660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518335.2 mean_steps=14.6
|
|
[Episode 108670] reward=-129495701.7 actor_loss=0.2702 critic_loss=143362489871.5151 entropy=18.5413 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 108680] reward=-131375306.9 actor_loss=0.2652 critic_loss=135387544780.8000 entropy=18.5301 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 108680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-417710.3 mean_steps=18.1
|
|
[Episode 108690] reward=-127018665.4 actor_loss=0.2006 critic_loss=127114096515.8788 entropy=18.5127 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 108700] reward=-132896221.3 actor_loss=0.2870 critic_loss=137610067148.8000 entropy=18.5017 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 108700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-502012.5 mean_steps=16.3
|
|
[Episode 108710] reward=-125473537.7 actor_loss=0.2759 critic_loss=127460950562.1333 entropy=18.4894 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 108720] reward=-130107448.5 actor_loss=0.3729 critic_loss=134624756039.6800 entropy=18.4657 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 108720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452311.7 mean_steps=16.2
|
|
[Episode 108730] reward=-130853968.4 actor_loss=0.2553 critic_loss=133315893248.0000 entropy=18.4684 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 108740] reward=-126222368.7 actor_loss=0.3124 critic_loss=137610411239.2258 entropy=18.4527 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 108740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558168.6 mean_steps=13.7
|
|
[Episode 108750] reward=-125993525.4 actor_loss=0.2194 critic_loss=127072015397.9259 entropy=18.4470 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 108760] reward=-125191153.1 actor_loss=0.2122 critic_loss=129105308876.8000 entropy=18.4314 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 108760] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-186102.2 mean_steps=19.2
|
|
[Episode 108770] reward=-129046350.6 actor_loss=0.2311 critic_loss=130670387200.0000 entropy=18.4058 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 108780] reward=-126026811.1 actor_loss=0.2779 critic_loss=138482130215.8222 entropy=18.3964 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 108780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376246.0 mean_steps=16.6
|
|
[Episode 108790] reward=-128177344.3 actor_loss=0.2108 critic_loss=125486243475.9111 entropy=18.3734 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 108800] reward=-127465417.5 actor_loss=0.2232 critic_loss=129938655550.5778 entropy=18.3484 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 108800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466535.9 mean_steps=15.2
|
|
[Episode 108810] reward=-120369001.3 actor_loss=0.2744 critic_loss=133761415031.4667 entropy=18.3252 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 108820] reward=-130533043.1 actor_loss=0.1498 critic_loss=130342304153.6000 entropy=18.3168 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 108820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475169.8 mean_steps=15.3
|
|
[Episode 108830] reward=-125634564.7 actor_loss=0.3332 critic_loss=122699472661.9429 entropy=18.3109 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 108840] reward=-119277020.4 actor_loss=0.3436 critic_loss=119729110129.7778 entropy=18.2990 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 108840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644864.7 mean_steps=12.5
|
|
[Episode 108850] reward=-126050579.4 actor_loss=0.2741 critic_loss=124770474666.6667 entropy=18.2812 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 108860] reward=-126416990.1 actor_loss=0.3550 critic_loss=132501305935.6444 entropy=18.2572 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 108860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516399.9 mean_steps=14.4
|
|
[Episode 108870] reward=-120703614.4 actor_loss=0.2600 critic_loss=121042124117.3333 entropy=18.2427 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 108880] reward=-128008713.9 actor_loss=0.2957 critic_loss=128239877431.6522 entropy=18.2583 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 108880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571868.0 mean_steps=13.8
|
|
[Episode 108890] reward=-123099043.3 actor_loss=0.1909 critic_loss=123816358884.3243 entropy=18.2521 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 108900] reward=-125797127.3 actor_loss=0.2998 critic_loss=125143363128.8889 entropy=18.2447 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 108900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-619111.8 mean_steps=12.2
|
|
[Episode 108910] reward=-125223627.4 actor_loss=0.2475 critic_loss=122541693734.7879 entropy=18.2319 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 108920] reward=-123011395.4 actor_loss=0.2969 critic_loss=121065449009.5484 entropy=18.2140 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 108920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499916.2 mean_steps=14.4
|
|
[Episode 108930] reward=-342432988.9 actor_loss=0.3212 critic_loss=163196162591948.8125 entropy=18.2141 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 108940] reward=-124589938.5 actor_loss=0.3094 critic_loss=129197393296.6956 entropy=18.2187 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 108940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561823.6 mean_steps=13.5
|
|
[Episode 108950] reward=-123705299.8 actor_loss=0.3608 critic_loss=133957138220.1379 entropy=18.2348 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 108960] reward=-120113364.9 actor_loss=0.3419 critic_loss=124576403304.2963 entropy=18.2448 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 108960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-521432.1 mean_steps=16.3
|
|
[Episode 108970] reward=-125322430.1 actor_loss=0.3268 critic_loss=126526911829.3333 entropy=18.2474 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 108980] reward=-122587502.7 actor_loss=0.2067 critic_loss=118896338261.3333 entropy=18.2445 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 108980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438593.5 mean_steps=15.8
|
|
[Episode 108990] reward=-124872751.1 actor_loss=0.3224 critic_loss=124180562944.0000 entropy=18.2285 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 109000] reward=-128078887.1 actor_loss=0.1775 critic_loss=128941368570.3111 entropy=18.2297 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 109000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-601795.0 mean_steps=13.1
|
|
[Episode 109010] reward=-120700660.6 actor_loss=0.1892 critic_loss=120806898073.6000 entropy=18.2219 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 109020] reward=-121092246.4 actor_loss=0.2985 critic_loss=116799911034.8800 entropy=18.1962 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 109020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501201.2 mean_steps=13.3
|
|
[Episode 109030] reward=-119474145.1 actor_loss=0.3731 critic_loss=113376084787.2000 entropy=18.1784 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 109040] reward=-127924094.7 actor_loss=0.2602 critic_loss=126105400002.2069 entropy=18.1627 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 109040] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-267786.6 mean_steps=16.6
|
|
[Episode 109050] reward=-121459313.3 actor_loss=0.3469 critic_loss=126074462704.4848 entropy=18.1622 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 109060] reward=-125047739.4 actor_loss=0.3818 critic_loss=120947582566.4000 entropy=18.1501 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 109060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424962.9 mean_steps=14.8
|
|
[Episode 109070] reward=-125593357.1 actor_loss=0.2806 critic_loss=129410623078.4000 entropy=18.1392 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 109080] reward=-127339279.9 actor_loss=0.3035 critic_loss=130444987960.8889 entropy=18.1338 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 109080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-403300.9 mean_steps=17.2
|
|
[Episode 109090] reward=-122057669.7 actor_loss=0.2562 critic_loss=114173547613.0909 entropy=18.1262 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 109100] reward=-123642487.6 actor_loss=0.2137 critic_loss=122637369890.1333 entropy=18.1047 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 109100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507255.5 mean_steps=13.6
|
|
[Episode 109110] reward=-121562993.8 actor_loss=0.2834 critic_loss=116253622462.5116 entropy=18.0968 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 109120] reward=-123763130.2 actor_loss=0.3129 critic_loss=119268662503.2258 entropy=18.0819 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 109120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454434.1 mean_steps=14.9
|
|
[Episode 109130] reward=-123835213.1 actor_loss=0.2111 critic_loss=118601806901.8947 entropy=18.0830 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 109140] reward=-123673762.7 actor_loss=0.2747 critic_loss=120306560279.2727 entropy=18.0664 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 109140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504448.4 mean_steps=15.1
|
|
[Episode 109150] reward=-124661756.6 actor_loss=0.1947 critic_loss=118411909722.3529 entropy=18.0585 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 109160] reward=-119694963.0 actor_loss=0.2906 critic_loss=123478386149.0526 entropy=18.0663 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 109160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485266.0 mean_steps=14.3
|
|
[Episode 109170] reward=-121496550.6 actor_loss=0.2979 critic_loss=118255692640.7111 entropy=18.0607 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 109180] reward=-126479970.7 actor_loss=0.2223 critic_loss=133552252878.0488 entropy=18.0402 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 109180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-586028.7 mean_steps=11.9
|
|
[Episode 109190] reward=-119813368.5 actor_loss=0.2862 critic_loss=116724333350.7879 entropy=18.0274 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 109200] reward=-124136204.7 actor_loss=0.2907 critic_loss=121782566209.8286 entropy=18.0207 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 109200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503242.2 mean_steps=14.2
|
|
[Episode 109210] reward=-123820444.0 actor_loss=0.3766 critic_loss=115230197168.3556 entropy=18.0075 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 109220] reward=-123869319.8 actor_loss=0.3503 critic_loss=119816788570.3529 entropy=17.9982 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 109220] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-695183.6 mean_steps=11.7
|
|
[Episode 109230] reward=-121726599.9 actor_loss=0.2652 critic_loss=114011953624.6154 entropy=17.9790 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 109240] reward=-125360450.4 actor_loss=0.2852 critic_loss=127976738816.0000 entropy=17.9641 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 109240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494446.7 mean_steps=15.1
|
|
[Episode 109250] reward=-120464453.2 actor_loss=0.2738 critic_loss=117213770729.2444 entropy=17.9533 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 109260] reward=-115842999.2 actor_loss=0.3079 critic_loss=109145975271.6190 entropy=17.9610 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 109260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-496036.4 mean_steps=15.9
|
|
[Episode 109270] reward=-119338381.8 actor_loss=0.2548 critic_loss=110087154565.1200 entropy=17.9470 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 109280] reward=-120772751.7 actor_loss=0.2933 critic_loss=120816109300.8696 entropy=17.9342 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 109280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509291.1 mean_steps=14.1
|
|
[Episode 109290] reward=-116592301.7 actor_loss=0.3386 critic_loss=113446293900.3871 entropy=17.9401 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 109300] reward=-119080318.5 actor_loss=0.2906 critic_loss=115738289038.2222 entropy=17.9209 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 109300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419939.9 mean_steps=15.3
|
|
[Episode 109310] reward=-118754878.8 actor_loss=0.3332 critic_loss=117956882804.3636 entropy=17.9072 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 109320] reward=-118418612.7 actor_loss=0.2142 critic_loss=118378046392.5581 entropy=17.9094 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 109320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-277690.4 mean_steps=15.4
|
|
[Episode 109330] reward=-121667159.9 actor_loss=0.2292 critic_loss=118088934238.3158 entropy=17.9076 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 109340] reward=-116927261.4 actor_loss=0.3592 critic_loss=112031871385.6000 entropy=17.9131 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 109340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513968.7 mean_steps=14.2
|
|
[Episode 109350] reward=-119198836.3 actor_loss=0.3055 critic_loss=110525136523.6364 entropy=17.9068 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 109360] reward=-117130738.7 actor_loss=0.3840 critic_loss=114661987669.3333 entropy=17.8987 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 109360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551728.6 mean_steps=12.7
|
|
[Episode 109370] reward=-122266247.8 actor_loss=0.2965 critic_loss=111322927012.9778 entropy=17.8924 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 109380] reward=-117393594.9 actor_loss=0.2727 critic_loss=113063725843.6923 entropy=17.8821 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 109380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456158.2 mean_steps=13.8
|
|
[Episode 109390] reward=-117595102.8 actor_loss=0.2750 critic_loss=110416218248.5333 entropy=17.8779 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 109400] reward=-117062435.0 actor_loss=0.3205 critic_loss=112242519062.7556 entropy=17.8647 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 109400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-439696.1 mean_steps=13.6
|
|
[Episode 109410] reward=-117225561.3 actor_loss=0.3529 critic_loss=114298094980.4138 entropy=17.8519 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 109420] reward=-119317912.1 actor_loss=0.2606 critic_loss=115718762354.7586 entropy=17.8519 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 109420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-582437.3 mean_steps=11.7
|
|
[Episode 109430] reward=-113232532.1 actor_loss=0.2475 critic_loss=106105839988.3636 entropy=17.8502 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 109440] reward=-120393429.9 actor_loss=0.2727 critic_loss=146470573541.0526 entropy=17.8481 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 109440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575301.6 mean_steps=13.8
|
|
[Episode 109450] reward=-117903864.5 actor_loss=0.4844 critic_loss=114904675840.0000 entropy=17.8332 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 109460] reward=-117079278.0 actor_loss=0.2802 critic_loss=115265194302.5778 entropy=17.8284 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 109460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-466554.8 mean_steps=13.4
|
|
[Episode 109470] reward=-121335290.9 actor_loss=0.3042 critic_loss=112228260352.0000 entropy=17.8229 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 109480] reward=-117129226.0 actor_loss=0.3551 critic_loss=108881765808.3556 entropy=17.8149 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 109480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-559262.0 mean_steps=14.6
|
|
[Episode 109490] reward=-119909901.2 actor_loss=0.3671 critic_loss=113447721187.5556 entropy=17.7977 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 109500] reward=-120069841.0 actor_loss=0.1444 critic_loss=111408891858.4889 entropy=17.7947 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 109500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539512.1 mean_steps=13.3
|
|
[Episode 109510] reward=-112940754.5 actor_loss=0.2541 critic_loss=106367215773.5385 entropy=17.7807 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 109520] reward=-120718645.5 actor_loss=0.3200 critic_loss=112132904598.5882 entropy=17.7753 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 109520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419904.1 mean_steps=15.2
|
|
[Episode 109530] reward=-121039466.7 actor_loss=0.2464 critic_loss=299814091539.6923 entropy=17.7845 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 109540] reward=-120757197.8 actor_loss=0.3233 critic_loss=114433880155.0222 entropy=17.7946 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 109540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435045.3 mean_steps=15.4
|
|
[Episode 109550] reward=-121145685.7 actor_loss=0.2633 critic_loss=120560920348.4444 entropy=17.7999 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 109560] reward=-121391670.3 actor_loss=0.3493 critic_loss=120336754845.5385 entropy=17.7999 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 109560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582581.4 mean_steps=12.4
|
|
[Episode 109570] reward=-115617258.3 actor_loss=0.3358 critic_loss=109078498547.8095 entropy=17.7997 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 109580] reward=-119009957.5 actor_loss=0.3363 critic_loss=111947383414.1538 entropy=17.7895 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 109580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-408785.4 mean_steps=16.4
|
|
[Episode 109590] reward=-118297322.1 actor_loss=0.2729 critic_loss=108425834105.9048 entropy=17.8006 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 109600] reward=-118052933.8 actor_loss=0.3202 critic_loss=108261207939.8788 entropy=17.8172 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 109600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508368.4 mean_steps=13.1
|
|
[Episode 109610] reward=-113407445.3 actor_loss=0.4669 critic_loss=110893438657.4222 entropy=17.8040 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 109620] reward=-122773047.6 actor_loss=0.2854 critic_loss=117933606725.8182 entropy=17.7914 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 109620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-573459.4 mean_steps=14.3
|
|
[Episode 109630] reward=-123917289.3 actor_loss=0.2233 critic_loss=117767326014.5778 entropy=17.7988 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 109640] reward=-116078883.4 actor_loss=0.3857 critic_loss=111332880570.1818 entropy=17.7812 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 109640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531339.3 mean_steps=14.2
|
|
[Episode 109650] reward=-113146549.4 actor_loss=0.3280 critic_loss=100682979220.2105 entropy=17.7544 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 109660] reward=-123814568.7 actor_loss=0.2797 critic_loss=114520577686.5882 entropy=17.7506 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 109660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580316.2 mean_steps=12.7
|
|
[Episode 109670] reward=-118186023.4 actor_loss=0.2121 critic_loss=115517441780.8696 entropy=17.7374 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 109680] reward=-122713025.5 actor_loss=0.2411 critic_loss=110360091073.5610 entropy=17.7330 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 109680] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-643809.0 mean_steps=11.2
|
|
[Episode 109690] reward=-121234434.1 actor_loss=0.2083 critic_loss=109945546654.4762 entropy=17.7278 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 109700] reward=-118854142.6 actor_loss=0.3658 critic_loss=108608882326.5882 entropy=17.7257 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 109700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490862.1 mean_steps=15.1
|
|
[Episode 109710] reward=-117443285.6 actor_loss=0.3409 critic_loss=131081324032.0000 entropy=17.7211 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 109720] reward=-122197094.6 actor_loss=0.2903 critic_loss=112983881578.1463 entropy=17.7305 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 109720] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-401143.6 mean_steps=17.2
|
|
[Episode 109730] reward=-118705145.0 actor_loss=0.3444 critic_loss=115057987343.0588 entropy=17.7330 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 109740] reward=-112977656.6 actor_loss=0.3457 critic_loss=104277409063.8222 entropy=17.7433 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 109740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523003.4 mean_steps=14.1
|
|
[Episode 109750] reward=-115401285.7 actor_loss=0.3872 critic_loss=106338140766.8148 entropy=17.7448 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 109760] reward=-121232795.2 actor_loss=0.2368 critic_loss=115171443052.0889 entropy=17.7426 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 109760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-592331.2 mean_steps=11.9
|
|
[Episode 109770] reward=-117708375.8 actor_loss=0.2804 critic_loss=109819047749.8182 entropy=17.7536 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 109780] reward=-118107793.1 actor_loss=0.4209 critic_loss=107161384277.3333 entropy=17.7589 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 109780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515911.1 mean_steps=14.2
|
|
[Episode 109790] reward=-119021348.2 actor_loss=0.2698 critic_loss=108965402487.4667 entropy=17.7546 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 109800] reward=-118401829.8 actor_loss=0.2061 critic_loss=107913474288.9412 entropy=17.7421 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 109800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-428057.3 mean_steps=13.8
|
|
[Episode 109810] reward=-116386241.6 actor_loss=0.3524 critic_loss=108448088064.0000 entropy=17.7413 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 109820] reward=-117415696.1 actor_loss=0.2961 critic_loss=106225334499.5556 entropy=17.7347 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 109820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524045.3 mean_steps=13.1
|
|
[Episode 109830] reward=-116384381.2 actor_loss=0.3072 critic_loss=107645840588.8000 entropy=17.7202 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 109840] reward=-122808462.1 actor_loss=0.2913 critic_loss=116474771456.0000 entropy=17.7218 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 109840] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-690226.8 mean_steps=11.3
|
|
[Episode 109850] reward=-120027818.6 actor_loss=0.2510 critic_loss=110167213056.0000 entropy=17.7124 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 109860] reward=-119874448.9 actor_loss=0.2986 critic_loss=125269689533.6296 entropy=17.7073 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 109860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463479.3 mean_steps=14.8
|
|
[Episode 109870] reward=-123003815.0 actor_loss=0.2440 critic_loss=115698932589.7143 entropy=17.7092 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 109880] reward=-125288995.3 actor_loss=0.2833 critic_loss=119064206449.7778 entropy=17.7057 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 109880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572259.7 mean_steps=12.3
|
|
[Episode 109890] reward=-119813948.2 actor_loss=0.2524 critic_loss=107139977801.1429 entropy=17.7006 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 109900] reward=-112896431.9 actor_loss=0.3505 critic_loss=102537404112.5926 entropy=17.6908 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 109900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-638842.9 mean_steps=12.0
|
|
[Episode 109910] reward=-122583477.5 actor_loss=0.2895 critic_loss=116317149646.4516 entropy=17.7060 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 109920] reward=-117412096.3 actor_loss=0.3199 critic_loss=109606135216.3556 entropy=17.7210 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 109920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-567289.5 mean_steps=11.7
|
|
[Episode 109930] reward=-118585718.6 actor_loss=0.2549 critic_loss=109251013290.6667 entropy=17.7122 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 109940] reward=-121747258.5 actor_loss=0.2668 critic_loss=112807872325.8182 entropy=17.7071 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 109940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575065.2 mean_steps=13.3
|
|
[Episode 109950] reward=-123065419.9 actor_loss=0.3056 critic_loss=112111436504.1778 entropy=17.6851 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 109960] reward=-123661036.0 actor_loss=0.2875 critic_loss=113370410188.8000 entropy=17.6656 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 109960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-681771.2 mean_steps=12.2
|
|
[Episode 109970] reward=-121245209.3 actor_loss=0.2190 critic_loss=108733320555.3548 entropy=17.6692 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 109980] reward=-120309683.9 actor_loss=0.2344 critic_loss=108682980669.7931 entropy=17.6698 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 109980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389155.3 mean_steps=15.9
|
|
[Episode 109990] reward=-115628793.3 actor_loss=0.2645 critic_loss=106096450218.6667 entropy=17.6612 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 110000] reward=-114296016.5 actor_loss=0.2808 critic_loss=105426060174.2222 entropy=17.6561 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 110000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-667536.8 mean_steps=12.4
|
|
[Episode 110010] reward=-119019118.8 actor_loss=0.3398 critic_loss=112187083854.7692 entropy=17.6399 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 110020] reward=-111991312.7 actor_loss=0.4370 critic_loss=101201701336.6154 entropy=17.6412 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 110020] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-744769.2 mean_steps=11.1
|
|
[Episode 110030] reward=-120915227.6 actor_loss=0.2550 critic_loss=106481848228.9778 entropy=17.6361 approx_kl=0.0108 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 110040] reward=-119932553.1 actor_loss=0.3309 critic_loss=109435620192.7111 entropy=17.6465 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 110040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450993.1 mean_steps=13.9
|
|
[Episode 110050] reward=-121858290.0 actor_loss=0.2999 critic_loss=114290405243.8710 entropy=17.6315 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 110060] reward=-119457561.0 actor_loss=0.3300 critic_loss=111044638358.5882 entropy=17.6090 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 110060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574244.3 mean_steps=12.7
|
|
[Episode 110070] reward=-115160603.7 actor_loss=0.2917 critic_loss=192808452667.5349 entropy=17.6010 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 110080] reward=-123128219.5 actor_loss=0.2740 critic_loss=109648132698.3529 entropy=17.5819 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 110080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538127.0 mean_steps=13.7
|
|
[Episode 110090] reward=-121758724.0 actor_loss=0.2543 critic_loss=113518512742.4000 entropy=17.5774 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 110100] reward=-115842446.6 actor_loss=0.2790 critic_loss=108114112401.2973 entropy=17.5764 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 110100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-558496.4 mean_steps=12.4
|
|
[Episode 110110] reward=-118252847.1 actor_loss=0.1830 critic_loss=109161486493.5385 entropy=17.5964 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 110120] reward=-118876161.0 actor_loss=0.3555 critic_loss=107700606174.6087 entropy=17.6030 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 110120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-407857.2 mean_steps=15.1
|
|
[Episode 110130] reward=-110665114.7 actor_loss=0.3548 critic_loss=99845034621.1555 entropy=17.5959 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 110140] reward=-118228066.2 actor_loss=0.3810 critic_loss=110355454179.5556 entropy=17.6038 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 110140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-511127.7 mean_steps=15.8
|
|
[Episode 110150] reward=-117808022.1 actor_loss=0.1768 critic_loss=108622807230.5116 entropy=17.5923 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 110160] reward=-119605606.0 actor_loss=0.2750 critic_loss=115032520411.4286 entropy=17.5879 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 110160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433765.3 mean_steps=15.6
|
|
[Episode 110170] reward=-112105127.2 actor_loss=0.3670 critic_loss=102036592139.3778 entropy=17.5968 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 110180] reward=-119119714.3 actor_loss=0.2679 critic_loss=108704859932.4444 entropy=17.5952 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 110180] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-708574.7 mean_steps=11.4
|
|
[Episode 110190] reward=-114593099.3 actor_loss=0.3804 critic_loss=105328572643.5556 entropy=17.5886 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 110200] reward=-114858733.5 actor_loss=0.3148 critic_loss=106938417152.0000 entropy=17.6091 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 110200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-568109.1 mean_steps=14.1
|
|
[Episode 110210] reward=-124126486.1 actor_loss=0.2710 critic_loss=111945829717.3333 entropy=17.5903 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 110220] reward=-116100817.9 actor_loss=0.2573 critic_loss=106404058663.3846 entropy=17.5909 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 110220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510069.0 mean_steps=13.8
|
|
[Episode 110230] reward=-119324896.9 actor_loss=0.2420 critic_loss=108990008398.7692 entropy=17.5908 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 110240] reward=-119669068.4 actor_loss=0.3092 critic_loss=112660071310.2222 entropy=17.5939 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 110240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-532038.5 mean_steps=15.2
|
|
[Episode 110250] reward=-121734239.9 actor_loss=0.2500 critic_loss=106807287504.5926 entropy=17.6101 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 110260] reward=-119744437.3 actor_loss=0.2438 critic_loss=110516743021.7143 entropy=17.6029 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 110260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-591157.8 mean_steps=14.7
|
|
[Episode 110270] reward=-113414954.5 actor_loss=0.3535 critic_loss=107487328548.5714 entropy=17.5917 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 110280] reward=-117355418.1 actor_loss=0.3145 critic_loss=108400167253.3333 entropy=17.5998 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 110280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-515018.9 mean_steps=14.8
|
|
[Episode 110290] reward=-118154506.8 actor_loss=0.2056 critic_loss=103213430465.4222 entropy=17.6146 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 110300] reward=-114875206.8 actor_loss=0.3038 critic_loss=105572268259.5556 entropy=17.6090 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 110300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561647.9 mean_steps=13.8
|
|
[Episode 110310] reward=-116161649.6 actor_loss=0.3680 critic_loss=108730979214.2222 entropy=17.6106 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 110320] reward=-114771963.2 actor_loss=0.3626 critic_loss=100422898639.2381 entropy=17.5933 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 110320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-475407.2 mean_steps=15.8
|
|
[Episode 110330] reward=-114668171.1 actor_loss=0.3134 critic_loss=105560888542.6087 entropy=17.5992 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 110340] reward=-119219207.7 actor_loss=0.3415 critic_loss=110256476842.6667 entropy=17.5782 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 110340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-669693.0 mean_steps=13.3
|
|
[Episode 110350] reward=-114040690.8 actor_loss=0.3449 critic_loss=105489421835.3778 entropy=17.5727 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 110360] reward=-119050222.4 actor_loss=0.2892 critic_loss=106793823904.9143 entropy=17.5782 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 110360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481774.8 mean_steps=13.8
|
|
[Episode 110370] reward=-120606957.2 actor_loss=0.2332 critic_loss=109801396321.5238 entropy=17.5697 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 110380] reward=-116680463.7 actor_loss=0.3331 critic_loss=103000164165.8182 entropy=17.5643 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 110380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467035.0 mean_steps=14.8
|
|
[Episode 110390] reward=-120289541.6 actor_loss=0.2874 critic_loss=112693624832.0000 entropy=17.5438 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 110400] reward=-111454848.7 actor_loss=0.3479 critic_loss=110093538024.7273 entropy=17.5809 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 110400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506144.3 mean_steps=14.6
|
|
[Episode 110410] reward=-120930688.6 actor_loss=0.2610 critic_loss=113336780208.3556 entropy=17.5787 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 110420] reward=-116010545.6 actor_loss=0.2639 critic_loss=107173452549.6889 entropy=17.5485 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 110420] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-618793.0 mean_steps=11.1
|
|
[Episode 110430] reward=-118668170.7 actor_loss=0.2930 critic_loss=111159090107.7333 entropy=17.5255 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 110440] reward=-117467310.6 actor_loss=0.3377 critic_loss=106363744040.4211 entropy=17.5411 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 110440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611370.8 mean_steps=12.8
|
|
[Episode 110450] reward=-113685962.9 actor_loss=0.2839 critic_loss=104679081392.3556 entropy=17.5528 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 110460] reward=-117110351.7 actor_loss=0.2853 critic_loss=105071073529.7561 entropy=17.5826 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 110460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-684349.8 mean_steps=12.5
|
|
[Episode 110470] reward=-118328228.0 actor_loss=0.2302 critic_loss=109548490843.0222 entropy=17.5785 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 110480] reward=-120128962.0 actor_loss=0.3286 critic_loss=111745425408.0000 entropy=17.5988 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 110480] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-355181.6 mean_steps=17.3
|
|
[Episode 110490] reward=-117045299.3 actor_loss=0.2890 critic_loss=111545341269.3333 entropy=17.6133 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 110500] reward=-118743979.7 actor_loss=0.3078 critic_loss=107243042406.4000 entropy=17.6181 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 110500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656944.3 mean_steps=12.0
|
|
[Episode 110510] reward=-113805551.8 actor_loss=0.4917 critic_loss=119420568765.6296 entropy=17.6240 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 110520] reward=-112090298.1 actor_loss=0.3663 critic_loss=101679439216.6400 entropy=17.6204 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 110520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503456.2 mean_steps=13.1
|
|
[Episode 110530] reward=-159275253.8 actor_loss=0.2030 critic_loss=7077991948834.1338 entropy=17.6011 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 110540] reward=-113989227.0 actor_loss=0.2906 critic_loss=103069084009.4118 entropy=17.6031 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 110540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-388306.2 mean_steps=15.0
|
|
[Episode 110550] reward=-115684993.7 actor_loss=0.2912 critic_loss=101336414435.5556 entropy=17.6024 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 110560] reward=-112013632.4 actor_loss=0.4162 critic_loss=99386946807.1724 entropy=17.6134 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 110560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529411.1 mean_steps=13.4
|
|
[Episode 110570] reward=-193361037.6 actor_loss=0.3359 critic_loss=19941722388343.4648 entropy=17.6107 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 110580] reward=-145974986.8 actor_loss=0.4029 critic_loss=3646246991579.4287 entropy=17.6253 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 110580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-619596.9 mean_steps=12.3
|
|
[Episode 110590] reward=-118854183.8 actor_loss=0.3227 critic_loss=108258082451.9111 entropy=17.6268 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 110600] reward=-115293848.3 actor_loss=0.2773 critic_loss=103289324248.1778 entropy=17.6199 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 110600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-560414.9 mean_steps=14.4
|
|
[Episode 110610] reward=-116095834.8 actor_loss=0.2921 critic_loss=111509209998.2222 entropy=17.6189 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 110620] reward=-119852672.4 actor_loss=0.3510 critic_loss=113452791905.5238 entropy=17.6180 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 110620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-397465.3 mean_steps=15.9
|
|
[Episode 110630] reward=-124313165.5 actor_loss=0.2273 critic_loss=114555397120.0000 entropy=17.6248 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 110640] reward=-192861532.7 actor_loss=0.3527 critic_loss=21657946102897.7773 entropy=17.6191 approx_kl=0.0024 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 110640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475693.4 mean_steps=13.8
|
|
[Episode 110650] reward=-132274873.8 actor_loss=0.3540 critic_loss=1087731670129.7778 entropy=17.6307 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 110660] reward=-122411862.2 actor_loss=0.1902 critic_loss=109093050823.1111 entropy=17.6275 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 110660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-334151.2 mean_steps=15.9
|
|
[Episode 110670] reward=-123333554.0 actor_loss=0.2305 critic_loss=115760730535.7241 entropy=17.6306 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 110680] reward=-115529212.2 actor_loss=0.2131 critic_loss=102330172120.1778 entropy=17.6342 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 110680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460037.1 mean_steps=14.7
|
|
[Episode 110690] reward=-116555336.9 actor_loss=0.2701 critic_loss=109803861606.4000 entropy=17.6404 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 110700] reward=-117381820.3 actor_loss=0.2541 critic_loss=101449309297.7778 entropy=17.6694 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 110700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425822.3 mean_steps=14.7
|
|
[Episode 110710] reward=-115710153.2 actor_loss=0.3211 critic_loss=115220127015.8222 entropy=17.6536 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 110720] reward=-120223525.6 actor_loss=0.2297 critic_loss=198153545318.4000 entropy=17.6539 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 110720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401130.7 mean_steps=15.3
|
|
[Episode 110730] reward=-121134358.4 actor_loss=0.2821 critic_loss=304861249536.0000 entropy=17.6450 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 110740] reward=-119351043.3 actor_loss=0.3280 critic_loss=119688496965.8182 entropy=17.6547 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 110740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502760.1 mean_steps=14.1
|
|
[Episode 110750] reward=-152131932.2 actor_loss=0.3638 critic_loss=2573324831948.7998 entropy=17.6694 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 110760] reward=-117369553.5 actor_loss=0.3698 critic_loss=107692937116.0976 entropy=17.6761 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 110760] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-656913.5 mean_steps=11.4
|
|
[Episode 110770] reward=-142101550.8 actor_loss=0.3688 critic_loss=2304729434404.5713 entropy=17.6859 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 110780] reward=-120240790.0 actor_loss=0.2994 critic_loss=134008231548.5405 entropy=17.6972 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 110780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493529.4 mean_steps=14.8
|
|
[Episode 110790] reward=-123341193.2 actor_loss=0.2040 critic_loss=118767816248.8889 entropy=17.6837 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 110800] reward=-118614729.0 actor_loss=0.2512 critic_loss=109472949713.4545 entropy=17.6944 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 110800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518590.4 mean_steps=14.3
|
|
[Episode 110810] reward=-115581194.8 actor_loss=0.3263 critic_loss=112452379238.4000 entropy=17.6781 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 110820] reward=-113447728.7 actor_loss=0.3717 critic_loss=106698530230.8571 entropy=17.6753 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 110820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-507845.7 mean_steps=15.3
|
|
[Episode 110830] reward=-140410727.5 actor_loss=0.2954 critic_loss=1629307147969.4221 entropy=17.6850 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 110840] reward=-116710835.7 actor_loss=0.2179 critic_loss=108726060464.3556 entropy=17.7004 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 110840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555761.5 mean_steps=12.9
|
|
[Episode 110850] reward=-117259577.0 actor_loss=0.3350 critic_loss=111389338530.9091 entropy=17.7047 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 110860] reward=-114731879.3 actor_loss=0.2157 critic_loss=108108055134.8148 entropy=17.7013 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 110860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-406762.5 mean_steps=14.4
|
|
[Episode 110870] reward=-118073361.5 actor_loss=0.3745 critic_loss=110645276987.0769 entropy=17.7007 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 110880] reward=-120648870.4 actor_loss=0.2402 critic_loss=112065146060.8000 entropy=17.6875 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 110880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-498784.9 mean_steps=16.2
|
|
[Episode 110890] reward=-115827327.5 actor_loss=0.3038 critic_loss=104802072985.6000 entropy=17.6760 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 110900] reward=-116140425.2 actor_loss=0.3439 critic_loss=103589319475.2000 entropy=17.6797 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 110900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567738.5 mean_steps=12.5
|
|
[Episode 110910] reward=-116393596.3 actor_loss=0.2639 critic_loss=108834080491.2432 entropy=17.6694 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 110920] reward=-117634702.0 actor_loss=0.2725 critic_loss=106694443008.0000 entropy=17.6636 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 110920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518054.5 mean_steps=13.3
|
|
[Episode 110930] reward=-116063113.9 actor_loss=0.3068 critic_loss=99828965558.0444 entropy=17.6446 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 110940] reward=-117849255.4 actor_loss=0.2982 critic_loss=106705304832.0000 entropy=17.6484 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 110940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498030.4 mean_steps=15.2
|
|
[Episode 110950] reward=-121099695.5 actor_loss=0.2890 critic_loss=109315765277.2571 entropy=17.6495 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 110960] reward=-121498546.7 actor_loss=0.2272 critic_loss=112591403053.5111 entropy=17.6517 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 110960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432468.1 mean_steps=15.3
|
|
[Episode 110970] reward=-118243809.5 actor_loss=0.1910 critic_loss=105403500316.4444 entropy=17.6419 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 110980] reward=-115760605.4 actor_loss=0.3652 critic_loss=106014935970.9091 entropy=17.6404 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 110980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413780.4 mean_steps=15.7
|
|
[Episode 110990] reward=-119348382.6 actor_loss=0.2996 critic_loss=108017652349.1555 entropy=17.6456 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 111000] reward=-117243804.2 actor_loss=0.2532 critic_loss=103520827619.5556 entropy=17.6443 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 111000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569758.8 mean_steps=13.8
|
|
[Episode 111010] reward=-116520573.2 actor_loss=0.3520 critic_loss=104435404800.0000 entropy=17.6349 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 111020] reward=-319844444.4 actor_loss=0.4206 critic_loss=141698596984989.5312 entropy=17.6316 approx_kl=0.0017 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 111020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524886.8 mean_steps=13.2
|
|
[Episode 111030] reward=-120320679.5 actor_loss=0.4702 critic_loss=113098697788.2353 entropy=17.6314 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 111040] reward=-115796504.0 actor_loss=0.3732 critic_loss=103132853452.8000 entropy=17.6300 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 111040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548436.4 mean_steps=14.2
|
|
[Episode 111050] reward=-117152417.5 actor_loss=0.3076 critic_loss=108253568824.1951 entropy=17.6133 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 111060] reward=-177842969.7 actor_loss=0.2755 critic_loss=16319042138203.0215 entropy=17.6115 approx_kl=-0.0001 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 111060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-453966.0 mean_steps=13.9
|
|
[Episode 111070] reward=-120028107.2 actor_loss=0.1504 critic_loss=110685040366.9333 entropy=17.6012 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 111080] reward=-136696431.1 actor_loss=0.3659 critic_loss=2256497432517.4858 entropy=17.5941 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 111080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-628789.2 mean_steps=12.2
|
|
[Episode 111090] reward=-121835376.9 actor_loss=0.2447 critic_loss=108549324800.0000 entropy=17.5882 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 111100] reward=-121247001.4 actor_loss=0.3133 critic_loss=114303199914.6667 entropy=17.5876 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 111100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552728.9 mean_steps=14.2
|
|
[Episode 111110] reward=-115899917.8 actor_loss=0.3378 critic_loss=107520698368.0000 entropy=17.5902 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 111120] reward=-115107314.8 actor_loss=0.3380 critic_loss=107303516160.0000 entropy=17.5881 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 111120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-640396.9 mean_steps=13.1
|
|
[Episode 111130] reward=-117182914.3 actor_loss=0.3109 critic_loss=105879742958.3448 entropy=17.5834 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 111140] reward=-113231975.4 actor_loss=0.3463 critic_loss=107338247996.9524 entropy=17.5874 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 111140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518363.4 mean_steps=13.0
|
|
[Episode 111150] reward=-111261985.2 actor_loss=0.3210 critic_loss=105948097012.6222 entropy=17.5788 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 111160] reward=-114457202.4 actor_loss=0.4161 critic_loss=107032017053.5385 entropy=17.5707 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 111160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524425.1 mean_steps=13.4
|
|
[Episode 111170] reward=-127215072.6 actor_loss=0.3482 critic_loss=120836349152.7805 entropy=17.5749 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 111180] reward=-117636705.5 actor_loss=0.2874 critic_loss=106129971553.1035 entropy=17.5915 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 111180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424732.3 mean_steps=15.6
|
|
[Episode 111190] reward=-116179654.8 actor_loss=0.3011 critic_loss=107384968229.9259 entropy=17.5903 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 111200] reward=-116172634.7 actor_loss=0.3900 critic_loss=105054398584.4706 entropy=17.6033 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 111200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447698.2 mean_steps=14.7
|
|
[Episode 111210] reward=-116277943.3 actor_loss=0.2768 critic_loss=110253150580.3636 entropy=17.6034 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 111220] reward=-119837307.6 actor_loss=0.2700 critic_loss=108344443672.7742 entropy=17.6083 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 111220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-333441.6 mean_steps=15.7
|
|
[Episode 111230] reward=-119509902.6 actor_loss=0.3116 critic_loss=109803575237.4857 entropy=17.6222 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 111240] reward=-116537088.7 actor_loss=0.3017 critic_loss=106784575583.2558 entropy=17.6274 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 111240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456873.3 mean_steps=15.0
|
|
[Episode 111250] reward=-122652374.9 actor_loss=0.2796 critic_loss=109525759850.1463 entropy=17.6144 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 111260] reward=-113212128.1 actor_loss=0.3856 critic_loss=105331668172.8000 entropy=17.6249 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 111260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622875.7 mean_steps=12.1
|
|
[Episode 111270] reward=-113917698.1 actor_loss=0.3818 critic_loss=101349344665.6000 entropy=17.6256 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 111280] reward=-115894287.6 actor_loss=0.2603 critic_loss=100324301209.6000 entropy=17.6274 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 111280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495758.2 mean_steps=15.1
|
|
[Episode 111290] reward=-113062196.6 actor_loss=0.2178 critic_loss=107857948034.8445 entropy=17.6223 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 111300] reward=-114047699.3 actor_loss=0.2443 critic_loss=104567531227.4286 entropy=17.6377 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 111300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548711.5 mean_steps=12.5
|
|
[Episode 111310] reward=-119588673.5 actor_loss=0.2660 critic_loss=109688353199.1579 entropy=17.6579 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 111320] reward=-121666383.0 actor_loss=0.2806 critic_loss=110461756666.3111 entropy=17.6535 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 111320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-532114.9 mean_steps=12.6
|
|
[Episode 111330] reward=-119460423.3 actor_loss=0.2966 critic_loss=108730400768.0000 entropy=17.6575 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 111340] reward=-117014461.5 actor_loss=0.2818 critic_loss=109688464452.2667 entropy=17.6459 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 111340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405323.0 mean_steps=16.1
|
|
[Episode 111350] reward=-121723732.7 actor_loss=0.2693 critic_loss=109592518283.6364 entropy=17.6325 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 111360] reward=-114209969.4 actor_loss=0.3939 critic_loss=100563845120.0000 entropy=17.6161 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 111360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515990.1 mean_steps=14.2
|
|
[Episode 111370] reward=-121231384.8 actor_loss=0.3413 critic_loss=114028656685.5111 entropy=17.6151 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 111380] reward=-115927126.4 actor_loss=0.2708 critic_loss=100594289027.4595 entropy=17.6101 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 111380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612880.5 mean_steps=12.8
|
|
[Episode 111390] reward=-117876628.2 actor_loss=0.2935 critic_loss=107657656183.4667 entropy=17.6085 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 111400] reward=-118491912.7 actor_loss=0.2635 critic_loss=108491192596.7568 entropy=17.6073 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 111400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519292.4 mean_steps=15.4
|
|
[Episode 111410] reward=-116503533.6 actor_loss=0.2505 critic_loss=107254420457.2444 entropy=17.6111 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 111420] reward=-115316959.8 actor_loss=0.3502 critic_loss=103747739465.9556 entropy=17.6141 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 111420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-473894.1 mean_steps=15.7
|
|
[Episode 111430] reward=-118953246.6 actor_loss=0.2715 critic_loss=108024967259.0222 entropy=17.6251 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 111440] reward=-112621917.0 actor_loss=0.3639 critic_loss=96535319893.3333 entropy=17.6327 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 111440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445790.5 mean_steps=14.3
|
|
[Episode 111450] reward=-118645282.5 actor_loss=0.2728 critic_loss=106772403968.0000 entropy=17.6375 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 111460] reward=-116130989.3 actor_loss=0.2931 critic_loss=106619356228.2667 entropy=17.6270 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 111460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470502.5 mean_steps=13.8
|
|
[Episode 111470] reward=-113695541.5 actor_loss=0.4009 critic_loss=103906083297.8824 entropy=17.6191 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 111480] reward=-122108540.7 actor_loss=0.1886 critic_loss=112398610887.1111 entropy=17.6324 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 111480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447986.5 mean_steps=14.7
|
|
[Episode 111490] reward=-114651716.3 actor_loss=0.2064 critic_loss=100196855027.8095 entropy=17.6293 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 111500] reward=-115221500.0 actor_loss=0.2805 critic_loss=100379833509.1613 entropy=17.6271 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 111500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456684.4 mean_steps=14.6
|
|
[Episode 111510] reward=-112336603.5 actor_loss=0.3179 critic_loss=103208486684.4444 entropy=17.6314 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 111520] reward=-119907051.9 actor_loss=0.2448 critic_loss=105483998367.2889 entropy=17.6273 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 111520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426993.3 mean_steps=14.3
|
|
[Episode 111530] reward=-119793627.1 actor_loss=0.3157 critic_loss=108012620003.5556 entropy=17.6205 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 111540] reward=-117759514.8 actor_loss=0.2745 critic_loss=105542699300.5714 entropy=17.6125 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 111540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-602463.5 mean_steps=11.8
|
|
[Episode 111550] reward=-112789311.3 actor_loss=0.3878 critic_loss=106056119149.7143 entropy=17.6111 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 111560] reward=-115371576.9 actor_loss=0.3725 critic_loss=104005754424.8889 entropy=17.6157 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 111560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-553825.0 mean_steps=12.4
|
|
[Episode 111570] reward=-118848662.9 actor_loss=0.2923 critic_loss=105639025322.6667 entropy=17.6118 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 111580] reward=-116078339.1 actor_loss=0.3037 critic_loss=104905854585.9048 entropy=17.5998 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 111580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-613570.0 mean_steps=13.8
|
|
[Episode 111590] reward=-113151628.0 actor_loss=0.3283 critic_loss=104056438374.4000 entropy=17.6057 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 111600] reward=-116355617.4 actor_loss=0.3055 critic_loss=107539663530.6667 entropy=17.6124 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 111600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528774.2 mean_steps=13.8
|
|
[Episode 111610] reward=-119039804.0 actor_loss=0.2922 critic_loss=107480440475.8261 entropy=17.6202 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 111620] reward=-117084238.6 actor_loss=0.1964 critic_loss=109573816905.1429 entropy=17.6257 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 111620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-568942.7 mean_steps=14.0
|
|
[Episode 111630] reward=-113940258.1 actor_loss=0.3605 critic_loss=103237834306.7826 entropy=17.6283 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 111640] reward=-116901035.4 actor_loss=0.3097 critic_loss=103038003738.9474 entropy=17.6247 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 111640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477010.6 mean_steps=13.8
|
|
[Episode 111650] reward=-115263440.1 actor_loss=0.3531 critic_loss=103772598467.0476 entropy=17.6318 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 111660] reward=-119290308.0 actor_loss=0.3530 critic_loss=107362042077.4054 entropy=17.6463 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 111660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545561.0 mean_steps=13.3
|
|
[Episode 111670] reward=-122544286.2 actor_loss=0.3335 critic_loss=112361504983.5789 entropy=17.6516 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 111680] reward=-115884330.2 actor_loss=0.2865 critic_loss=110542512624.4848 entropy=17.6515 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 111680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481508.2 mean_steps=14.8
|
|
[Episode 111690] reward=-119014295.5 actor_loss=0.2582 critic_loss=105978150521.9048 entropy=17.6486 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 111700] reward=-116873323.2 actor_loss=0.3609 critic_loss=105147606812.4444 entropy=17.6451 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 111700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590921.7 mean_steps=13.3
|
|
[Episode 111710] reward=-118168721.4 actor_loss=0.2033 critic_loss=103867524647.3846 entropy=17.6351 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 111720] reward=-116119138.5 actor_loss=0.3213 critic_loss=103937545739.3778 entropy=17.6257 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 111720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-477202.3 mean_steps=15.6
|
|
[Episode 111730] reward=-117317319.1 actor_loss=0.2844 critic_loss=108669785793.4222 entropy=17.6314 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 111740] reward=-113596953.8 actor_loss=0.2821 critic_loss=100355314810.8800 entropy=17.6248 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 111740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-579384.9 mean_steps=14.4
|
|
[Episode 111750] reward=-118130163.9 actor_loss=0.2935 critic_loss=110458067968.0000 entropy=17.6226 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 111760] reward=-119242912.4 actor_loss=0.3153 critic_loss=111162707835.8710 entropy=17.6162 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 111760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475638.9 mean_steps=14.4
|
|
[Episode 111770] reward=-117767925.1 actor_loss=0.3340 critic_loss=110556210597.6471 entropy=17.6171 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 111780] reward=-122465927.9 actor_loss=0.2239 critic_loss=106906554572.8000 entropy=17.6207 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 111780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-594872.0 mean_steps=11.7
|
|
[Episode 111790] reward=-114388967.7 actor_loss=0.2564 critic_loss=104407262276.2667 entropy=17.6238 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 111800] reward=-117892238.9 actor_loss=0.2715 critic_loss=107141895261.0909 entropy=17.6308 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 111800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550740.2 mean_steps=13.9
|
|
[Episode 111810] reward=-120024167.6 actor_loss=0.3138 critic_loss=109113917986.1333 entropy=17.6452 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 111820] reward=-122207415.8 actor_loss=0.2512 critic_loss=108932679634.4889 entropy=17.6592 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 111820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533615.3 mean_steps=13.2
|
|
[Episode 111830] reward=-111227107.4 actor_loss=0.2229 critic_loss=101783410005.3333 entropy=17.6480 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 111840] reward=-122972687.3 actor_loss=0.2576 critic_loss=112394805248.0000 entropy=17.6397 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 111840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462658.6 mean_steps=14.2
|
|
[Episode 111850] reward=-116191134.3 actor_loss=0.2508 critic_loss=102113626323.8621 entropy=17.6465 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 111860] reward=-121236956.9 actor_loss=0.3049 critic_loss=115604087183.6098 entropy=17.6392 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 111860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599865.7 mean_steps=13.1
|
|
[Episode 111870] reward=-120548454.8 actor_loss=0.2069 critic_loss=103997669740.0889 entropy=17.6563 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 111880] reward=-117141961.2 actor_loss=0.2435 critic_loss=108958568903.1111 entropy=17.6548 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 111880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502506.5 mean_steps=13.8
|
|
[Episode 111890] reward=-114534529.4 actor_loss=0.2232 critic_loss=99759343934.5778 entropy=17.6586 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 111900] reward=-115463050.4 actor_loss=0.3849 critic_loss=103130181132.4878 entropy=17.6657 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 111900] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-390057.0 mean_steps=16.9
|
|
[Episode 111910] reward=-118546338.4 actor_loss=0.3131 critic_loss=108604832274.9630 entropy=17.6545 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 111920] reward=-124544434.0 actor_loss=0.3094 critic_loss=114357543594.6667 entropy=17.6371 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 111920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611241.4 mean_steps=12.8
|
|
[Episode 111930] reward=-114417255.6 actor_loss=0.3403 critic_loss=102328606910.5116 entropy=17.6428 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 111940] reward=-113839489.7 actor_loss=0.3025 critic_loss=107291076926.5778 entropy=17.6430 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 111940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-451971.1 mean_steps=13.8
|
|
[Episode 111950] reward=-116203890.6 actor_loss=0.2619 critic_loss=106599644461.1765 entropy=17.6383 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 111960] reward=-115551060.6 actor_loss=0.3255 critic_loss=99108829047.4667 entropy=17.6220 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 111960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-554455.6 mean_steps=14.3
|
|
[Episode 111970] reward=-120466455.7 actor_loss=0.2122 critic_loss=110852799719.2258 entropy=17.6122 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 111980] reward=-112755091.5 actor_loss=0.4078 critic_loss=103196052759.2727 entropy=17.6036 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 111980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494677.4 mean_steps=14.0
|
|
[Episode 111990] reward=-110767443.3 actor_loss=0.3128 critic_loss=97193792307.2000 entropy=17.5937 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 112000] reward=-115891982.9 actor_loss=0.3240 critic_loss=112974566815.1351 entropy=17.5899 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 112000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-609059.0 mean_steps=12.2
|
|
[Episode 112010] reward=-120463530.1 actor_loss=0.1719 critic_loss=106589042005.3333 entropy=17.5851 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 112020] reward=-114111976.0 actor_loss=0.2970 critic_loss=100783330497.7297 entropy=17.5743 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 112020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394711.4 mean_steps=15.2
|
|
[Episode 112030] reward=-109936267.6 actor_loss=0.2831 critic_loss=99653473730.5600 entropy=17.5798 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 112040] reward=-117703212.6 actor_loss=0.2909 critic_loss=111680876668.1212 entropy=17.5714 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 112040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-388016.3 mean_steps=15.9
|
|
[Episode 112050] reward=-117329868.0 actor_loss=0.3485 critic_loss=106340536671.0857 entropy=17.5580 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 112060] reward=-120549272.1 actor_loss=0.2626 critic_loss=112556039314.2857 entropy=17.5573 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 112060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558419.8 mean_steps=13.4
|
|
[Episode 112070] reward=-116646704.5 actor_loss=0.3236 critic_loss=111139297143.4667 entropy=17.5479 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 112080] reward=-122179466.8 actor_loss=0.2489 critic_loss=111593379498.6667 entropy=17.5560 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 112080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473788.1 mean_steps=15.1
|
|
[Episode 112090] reward=-119981950.6 actor_loss=0.2675 critic_loss=105251557376.0000 entropy=17.5544 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 112100] reward=-116482120.7 actor_loss=0.4051 critic_loss=107192191906.9091 entropy=17.5564 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 112100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-353185.8 mean_steps=15.8
|
|
[Episode 112110] reward=-117085396.4 actor_loss=0.3271 critic_loss=105425735270.4000 entropy=17.5686 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 112120] reward=-115960771.8 actor_loss=0.2291 critic_loss=107811641935.6444 entropy=17.5710 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 112120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-599983.9 mean_steps=12.0
|
|
[Episode 112130] reward=-119602212.3 actor_loss=0.2839 critic_loss=107289006899.2000 entropy=17.5548 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 112140] reward=-114297504.1 actor_loss=0.3105 critic_loss=98356540757.3333 entropy=17.5478 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 112140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580158.9 mean_steps=12.7
|
|
[Episode 112150] reward=-119972815.2 actor_loss=0.2938 critic_loss=111356658574.2222 entropy=17.5353 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 112160] reward=-108968682.6 actor_loss=0.4419 critic_loss=99890767751.5294 entropy=17.5327 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 112160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-596544.8 mean_steps=12.0
|
|
[Episode 112170] reward=-114564552.0 actor_loss=0.3117 critic_loss=116419339514.3111 entropy=17.5374 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 112180] reward=-117951653.3 actor_loss=0.3645 critic_loss=106388183110.6207 entropy=17.5443 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 112180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505204.8 mean_steps=14.3
|
|
[Episode 112190] reward=-118902761.9 actor_loss=0.2900 critic_loss=111124705703.7241 entropy=17.5169 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 112200] reward=-113909491.7 actor_loss=0.2939 critic_loss=102651136594.5806 entropy=17.5073 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 112200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463878.3 mean_steps=14.8
|
|
[Episode 112210] reward=-116766315.6 actor_loss=0.2208 critic_loss=109826839756.8000 entropy=17.4969 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 112220] reward=-117794786.1 actor_loss=0.3009 critic_loss=109292378521.6000 entropy=17.4936 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 112220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580221.2 mean_steps=12.8
|
|
[Episode 112230] reward=-118055251.9 actor_loss=0.3364 critic_loss=109579333332.2927 entropy=17.4876 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 112240] reward=-125001760.6 actor_loss=0.3119 critic_loss=148737106830.2222 entropy=17.4841 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 112240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-539085.6 mean_steps=12.2
|
|
[Episode 112250] reward=-114910898.1 actor_loss=0.2327 critic_loss=112077860310.4865 entropy=17.4864 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 112260] reward=-113699254.0 actor_loss=0.3755 critic_loss=101912656430.5455 entropy=17.4811 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 112260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505104.5 mean_steps=12.9
|
|
[Episode 112270] reward=-116213412.3 actor_loss=0.3248 critic_loss=114082090117.5652 entropy=17.4954 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 112280] reward=-113723846.0 actor_loss=0.3245 critic_loss=98013940030.5778 entropy=17.5031 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 112280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-349427.9 mean_steps=15.8
|
|
[Episode 112290] reward=-116670754.5 actor_loss=0.2517 critic_loss=113604973454.2222 entropy=17.4945 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 112300] reward=-122059529.0 actor_loss=0.2830 critic_loss=115651977871.3600 entropy=17.4798 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 112300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550976.2 mean_steps=14.5
|
|
[Episode 112310] reward=-120084850.4 actor_loss=0.2906 critic_loss=104068320987.4286 entropy=17.4928 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 112320] reward=-120969628.0 actor_loss=0.3179 critic_loss=247243322163.2000 entropy=17.4869 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 112320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-384368.7 mean_steps=15.2
|
|
[Episode 112330] reward=-117268420.2 actor_loss=0.2957 critic_loss=106129471351.4667 entropy=17.4701 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 112340] reward=-113293774.4 actor_loss=0.2291 critic_loss=103561207625.9556 entropy=17.4721 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 112340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476709.2 mean_steps=14.9
|
|
[Episode 112350] reward=-121117644.1 actor_loss=0.3659 critic_loss=444723584667.8261 entropy=17.4565 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 112360] reward=-120559258.5 actor_loss=0.2569 critic_loss=222196625648.9412 entropy=17.4561 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 112360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-598717.3 mean_steps=13.6
|
|
[Episode 112370] reward=-116954391.8 actor_loss=0.4430 critic_loss=111510857386.6667 entropy=17.4530 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 112380] reward=-115189206.7 actor_loss=0.2199 critic_loss=97416207737.2632 entropy=17.4574 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 112380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-593203.3 mean_steps=12.9
|
|
[Episode 112390] reward=-121952780.0 actor_loss=0.3169 critic_loss=112375785917.2174 entropy=17.4641 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 112400] reward=-117814110.0 actor_loss=0.3105 critic_loss=106108268211.8919 entropy=17.4976 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 112400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474914.3 mean_steps=14.4
|
|
[Episode 112410] reward=-117817483.7 actor_loss=0.3074 critic_loss=105573628859.7333 entropy=17.5077 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 112420] reward=-120008761.3 actor_loss=0.3004 critic_loss=132431938628.2667 entropy=17.5215 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 112420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-468753.4 mean_steps=15.2
|
|
[Episode 112430] reward=-116952941.9 actor_loss=0.3054 critic_loss=105480626904.1778 entropy=17.5167 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 112440] reward=-124179578.1 actor_loss=0.2391 critic_loss=113027403138.8445 entropy=17.5246 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 112440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368334.7 mean_steps=15.8
|
|
[Episode 112450] reward=-115497803.3 actor_loss=0.2656 critic_loss=104291692270.9333 entropy=17.5259 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 112460] reward=-116374276.3 actor_loss=0.2972 critic_loss=104001879517.8667 entropy=17.5374 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 112460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432991.1 mean_steps=16.2
|
|
[Episode 112470] reward=-121209532.0 actor_loss=0.2727 critic_loss=107657159384.1778 entropy=17.5383 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 112480] reward=-115942613.0 actor_loss=0.2649 critic_loss=104588756218.3111 entropy=17.5466 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 112480] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-688441.6 mean_steps=11.1
|
|
[Episode 112490] reward=-116382749.6 actor_loss=0.3165 critic_loss=104987772882.4889 entropy=17.5296 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 112500] reward=-113909327.8 actor_loss=0.3137 critic_loss=108634309336.1778 entropy=17.5514 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 112500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-623749.4 mean_steps=13.7
|
|
[Episode 112510] reward=-116335384.0 actor_loss=0.3011 critic_loss=105772434606.8293 entropy=17.5598 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 112520] reward=-121372678.5 actor_loss=0.3193 critic_loss=109798691271.1111 entropy=17.5793 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 112520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-663041.9 mean_steps=12.2
|
|
[Episode 112530] reward=-114832900.7 actor_loss=0.3171 critic_loss=104267714468.9778 entropy=17.5708 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 112540] reward=-115792527.8 actor_loss=0.3362 critic_loss=115497772151.0698 entropy=17.5621 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 112540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502896.7 mean_steps=13.9
|
|
[Episode 112550] reward=-116321044.9 actor_loss=0.2944 critic_loss=107772483811.5556 entropy=17.5658 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 112560] reward=-121371203.8 actor_loss=0.3440 critic_loss=116182537860.7407 entropy=17.5720 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 112560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567345.7 mean_steps=13.5
|
|
[Episode 112570] reward=-119860569.6 actor_loss=0.2822 critic_loss=109562036906.6667 entropy=17.5792 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 112580] reward=-117244393.5 actor_loss=0.2313 critic_loss=107359009391.3044 entropy=17.5884 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 112580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-663053.8 mean_steps=13.2
|
|
[Episode 112590] reward=-150039643.1 actor_loss=0.5384 critic_loss=5984933886907.7334 entropy=17.5865 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 112600] reward=-112727487.2 actor_loss=0.4143 critic_loss=103168010194.4889 entropy=17.5675 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 112600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-461893.4 mean_steps=16.4
|
|
[Episode 112610] reward=-219075843.3 actor_loss=0.3603 critic_loss=34282171426406.3984 entropy=17.5669 approx_kl=-0.0000 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 112620] reward=-118252088.0 actor_loss=0.2863 critic_loss=105936810116.1290 entropy=17.5688 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 112620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-437085.4 mean_steps=13.7
|
|
[Episode 112630] reward=-117462293.5 actor_loss=0.2986 critic_loss=110477510246.4000 entropy=17.5706 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 112640] reward=-108486149.4 actor_loss=0.3479 critic_loss=96555609739.6364 entropy=17.5730 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 112640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-390174.2 mean_steps=17.0
|
|
[Episode 112650] reward=-126862907.6 actor_loss=0.4498 critic_loss=610606326116.1740 entropy=17.5796 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 112660] reward=-120352875.6 actor_loss=0.2910 critic_loss=122691637885.1555 entropy=17.5835 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 112660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371279.3 mean_steps=15.5
|
|
[Episode 112670] reward=-112343446.6 actor_loss=0.2967 critic_loss=98252340816.8421 entropy=17.5876 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 112680] reward=-126449884.7 actor_loss=0.2145 critic_loss=237029647155.2000 entropy=17.5826 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 112680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537639.8 mean_steps=14.3
|
|
[Episode 112690] reward=-113320405.6 actor_loss=0.3434 critic_loss=97803770499.6572 entropy=17.5990 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 112700] reward=-140902320.0 actor_loss=0.3318 critic_loss=2800403672142.7690 entropy=17.6024 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 112700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-468205.8 mean_steps=16.6
|
|
[Episode 112710] reward=-867024073.1 actor_loss=3.7261 critic_loss=941426632294400.0000 entropy=17.6402 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 112720] reward=-120091354.9 actor_loss=0.3178 critic_loss=129103965922.2326 entropy=17.6648 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 112720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-444903.8 mean_steps=13.8
|
|
[Episode 112730] reward=-161353284.9 actor_loss=0.2819 critic_loss=8521707066345.2441 entropy=17.6726 approx_kl=0.0009 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 112740] reward=-399708174.9 actor_loss=0.3599 critic_loss=246578313929978.3125 entropy=17.6661 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 112740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-414132.9 mean_steps=14.7
|
|
[Episode 112750] reward=-110493939.8 actor_loss=0.3609 critic_loss=109688517427.2000 entropy=17.6720 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 112760] reward=-113108656.6 actor_loss=0.2809 critic_loss=102083116263.2258 entropy=17.6817 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 112760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-558223.2 mean_steps=12.8
|
|
[Episode 112770] reward=-124124505.0 actor_loss=0.1828 critic_loss=236019349953.5610 entropy=17.6909 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 112780] reward=-122804625.3 actor_loss=0.3166 critic_loss=111884916599.4667 entropy=17.6849 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 112780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-543063.6 mean_steps=12.4
|
|
[Episode 112790] reward=-118007424.5 actor_loss=0.3925 critic_loss=104790583046.2439 entropy=17.6717 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 112800] reward=-123142748.8 actor_loss=0.2826 critic_loss=118373833659.7333 entropy=17.6686 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 112800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424073.9 mean_steps=15.5
|
|
[Episode 112810] reward=-110946855.0 actor_loss=0.2870 critic_loss=112358647320.3810 entropy=17.6683 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 112820] reward=-115925903.7 actor_loss=0.3778 critic_loss=111924700891.4286 entropy=17.6820 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 112820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488374.5 mean_steps=14.7
|
|
[Episode 112830] reward=-119896061.6 actor_loss=0.2151 critic_loss=104047987370.6667 entropy=17.6750 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 112840] reward=-121451972.7 actor_loss=0.3824 critic_loss=111146550522.3111 entropy=17.6662 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 112840] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-737223.5 mean_steps=10.1
|
|
[Episode 112850] reward=-117921294.8 actor_loss=0.2781 critic_loss=111135311280.3556 entropy=17.6713 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 112860] reward=-115119142.7 actor_loss=0.4213 critic_loss=107712528566.0444 entropy=17.6952 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 112860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-395509.4 mean_steps=15.2
|
|
[Episode 112870] reward=-122303479.6 actor_loss=0.2636 critic_loss=111410970624.0000 entropy=17.6986 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 112880] reward=-1709568074.7 actor_loss=2.4355 critic_loss=4932124478506888.0000 entropy=17.6986 approx_kl=0.0038 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 112880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496510.0 mean_steps=13.8
|
|
[Episode 112890] reward=-961555758.5 actor_loss=0.2358 critic_loss=1916808479672456.5000 entropy=17.7136 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 112900] reward=-13937250175.8 actor_loss=37.2161 critic_loss=84487757578085632.0000 entropy=17.7389 approx_kl=0.0085 kl_stop=0 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 112900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399985.1 mean_steps=15.9
|
|
[Episode 112910] reward=-8855765592.7 actor_loss=0.3720 critic_loss=86554318615571888.0000 entropy=17.7324 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 112920] reward=-2400193185.7 actor_loss=0.4546 critic_loss=10420003168696028.0000 entropy=17.7256 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 112920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-499410.3 mean_steps=13.2
|
|
[Episode 112930] reward=-119939683.8 actor_loss=0.2210 critic_loss=109496991561.9556 entropy=17.7426 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 112940] reward=-171048602.6 actor_loss=0.7744 critic_loss=12379537680771.4590 entropy=17.7375 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 112940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-48728331.6 mean_steps=21.4
|
|
[Episode 112950] reward=-1165308839.3 actor_loss=0.3599 critic_loss=2502521723971174.5000 entropy=17.7429 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 112960] reward=-116819668.4 actor_loss=0.3105 critic_loss=106749935616.0000 entropy=17.7346 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 112960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616531.9 mean_steps=12.8
|
|
[Episode 112970] reward=-368789877.7 actor_loss=0.3972 critic_loss=172415808176128.0000 entropy=17.7216 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 112980] reward=-2306673988.1 actor_loss=0.2767 critic_loss=6283452077904145.0000 entropy=17.7314 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Eval 112980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451634.1 mean_steps=15.4
|
|
[Episode 112990] reward=-170779208.6 actor_loss=0.2488 critic_loss=13685677102785.4219 entropy=17.7391 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 113000] reward=-1045805247.3 actor_loss=0.2942 critic_loss=1708406808497981.0000 entropy=17.7594 approx_kl=0.0031 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 113000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563334.2 mean_steps=13.3
|
|
[Episode 113010] reward=-121992481.9 actor_loss=0.2874 critic_loss=113725045145.6000 entropy=17.7831 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 113020] reward=-118732111.7 actor_loss=0.3260 critic_loss=107381810426.3111 entropy=17.7960 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 113020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-466962.8 mean_steps=13.8
|
|
[Episode 113030] reward=-125687813.1 actor_loss=0.2910 critic_loss=120107540844.0889 entropy=17.8055 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 113040] reward=-122248397.4 actor_loss=0.2944 critic_loss=108930903426.8445 entropy=17.8058 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 113040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637083.4 mean_steps=12.3
|
|
[Episode 113050] reward=-118309952.4 actor_loss=0.4426 critic_loss=121793594557.6296 entropy=17.8085 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 113060] reward=-2164226757.0 actor_loss=0.1796 critic_loss=6632427786658702.0000 entropy=17.7952 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 113060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472082.4 mean_steps=15.0
|
|
[Episode 113070] reward=-950077224.6 actor_loss=0.3659 critic_loss=1967707378767189.2500 entropy=17.8114 approx_kl=0.0038 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 113080] reward=-1590654597.5 actor_loss=34.9512 critic_loss=2552309649244160.0000 entropy=17.8436 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 113080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-371478.8 mean_steps=15.2
|
|
[Episode 113090] reward=-2235645634.6 actor_loss=0.2972 critic_loss=6766020590138461.0000 entropy=17.8513 approx_kl=0.0033 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 113100] reward=-113906564.2 actor_loss=0.4058 critic_loss=101690002090.6667 entropy=17.8518 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 113100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431198.1 mean_steps=14.4
|
|
[Episode 113110] reward=-921244435.4 actor_loss=0.3196 critic_loss=1542613400009659.7500 entropy=17.8238 approx_kl=0.0013 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 113120] reward=-120576790.4 actor_loss=0.2917 critic_loss=113800593817.6000 entropy=17.7848 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 113120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541092.7 mean_steps=14.2
|
|
[Episode 113130] reward=-117336934.4 actor_loss=0.2724 critic_loss=102555027748.5714 entropy=17.7665 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 113140] reward=-115953967.2 actor_loss=0.3543 critic_loss=106264752947.2000 entropy=17.7747 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 113140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-640665.6 mean_steps=13.2
|
|
[Episode 113150] reward=-808672457.4 actor_loss=0.2645 critic_loss=1391865066384822.7500 entropy=17.7647 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 113160] reward=-117730941.5 actor_loss=0.3246 critic_loss=107417679007.2889 entropy=17.7764 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 113160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383958.5 mean_steps=16.1
|
|
[Episode 113170] reward=-116219193.0 actor_loss=0.3830 critic_loss=116304235724.8000 entropy=17.7798 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 113180] reward=-119337289.0 actor_loss=0.2548 critic_loss=119737074392.1778 entropy=17.7723 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 113180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617739.6 mean_steps=12.1
|
|
[Episode 113190] reward=-119616638.3 actor_loss=0.3592 critic_loss=107975656880.3556 entropy=17.7714 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 113200] reward=-117358562.9 actor_loss=0.3977 critic_loss=114330369441.1852 entropy=17.7719 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 113200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427967.4 mean_steps=15.4
|
|
[Episode 113210] reward=-114477484.7 actor_loss=0.2666 critic_loss=108237710801.4545 entropy=17.7788 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 113220] reward=-1730660733.2 actor_loss=0.6313 critic_loss=5890248385669803.0000 entropy=17.7929 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 113220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508425.0 mean_steps=14.2
|
|
[Episode 113230] reward=-11720670268.5 actor_loss=53.4071 critic_loss=58316794005842888.0000 entropy=17.8089 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 113240] reward=-123891698.2 actor_loss=0.2562 critic_loss=142286171704.8889 entropy=17.8127 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 113240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503195.4 mean_steps=14.4
|
|
[Episode 113250] reward=-6211935482.9 actor_loss=0.3853 critic_loss=38178882033211440.0000 entropy=17.8227 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 113260] reward=-8127775664.3 actor_loss=9.8602 critic_loss=39588273356188328.0000 entropy=17.8278 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 113260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-107465167.7 mean_steps=23.4
|
|
[Episode 113270] reward=-5392415850.3 actor_loss=0.2088 critic_loss=22661931188250216.0000 entropy=17.8292 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1016 front_blocked=0
|
|
[Episode 113280] reward=-3295019145.9 actor_loss=0.2772 critic_loss=8480344414296262.0000 entropy=17.8620 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 113280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486075.4 mean_steps=15.2
|
|
[Episode 113290] reward=-118054665.1 actor_loss=0.3514 critic_loss=110367454185.2444 entropy=17.8764 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 113300] reward=-3375459537.1 actor_loss=0.8341 critic_loss=9642728178301884.0000 entropy=17.8552 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 113300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-608536.3 mean_steps=11.8
|
|
[Episode 113310] reward=-120043580.7 actor_loss=0.3413 critic_loss=110296576455.1111 entropy=17.8501 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 113320] reward=-117276974.0 actor_loss=0.3438 critic_loss=110894216169.2444 entropy=17.8513 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 113320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443985.1 mean_steps=15.4
|
|
[Episode 113330] reward=-457568412.1 actor_loss=0.3589 critic_loss=265373921662293.3438 entropy=17.8610 approx_kl=0.0031 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 113340] reward=-122496398.2 actor_loss=0.2136 critic_loss=112106966220.8000 entropy=17.8650 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 113340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424652.9 mean_steps=14.3
|
|
[Episode 113350] reward=-5781895708.8 actor_loss=1.0397 critic_loss=37811863429316608.0000 entropy=17.8612 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 113360] reward=-4765868330.3 actor_loss=9.5865 critic_loss=16337263816762982.0000 entropy=17.8655 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 113360] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-724894.2 mean_steps=10.7
|
|
[Episode 113370] reward=-3533639181.1 actor_loss=0.3081 critic_loss=9357598172491868.0000 entropy=17.8825 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1003 front_blocked=0
|
|
[Episode 113380] reward=-118618493.8 actor_loss=0.3106 critic_loss=108358131347.9111 entropy=17.8947 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 113380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-328446.4 mean_steps=16.9
|
|
[Episode 113390] reward=-2465508561.9 actor_loss=43.3344 critic_loss=6754005413348190.0000 entropy=17.9142 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1087 front_blocked=0
|
|
[Episode 113400] reward=-127996667.8 actor_loss=0.2724 critic_loss=121400454530.8445 entropy=17.9291 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 113400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405219.7 mean_steps=15.6
|
|
[Episode 113410] reward=-249371194.7 actor_loss=0.2940 critic_loss=67745453930359.4688 entropy=17.9295 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 113420] reward=-1412822068.2 actor_loss=0.3462 critic_loss=4011771809077657.5000 entropy=17.9510 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 113420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-329153.8 mean_steps=15.6
|
|
[Episode 113430] reward=-4487888471.8 actor_loss=0.2336 critic_loss=42782609803656848.0000 entropy=17.9501 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 113440] reward=-123797177.0 actor_loss=0.2612 critic_loss=115144830520.8889 entropy=17.9527 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 113440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542752.0 mean_steps=14.2
|
|
[Episode 113450] reward=-119607014.7 actor_loss=0.3118 critic_loss=117790116158.5778 entropy=17.9622 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 113460] reward=-725206898.4 actor_loss=0.3791 critic_loss=1047474713037730.8750 entropy=17.9430 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 113460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390030.1 mean_steps=14.8
|
|
[Episode 113470] reward=-3602220505.9 actor_loss=0.2985 critic_loss=15800387518158166.0000 entropy=17.9504 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 113480] reward=-111209452.1 actor_loss=0.3556 critic_loss=115298367169.4222 entropy=17.9739 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 113480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410205.0 mean_steps=15.1
|
|
[Episode 113490] reward=-117885723.3 actor_loss=0.4077 critic_loss=110352834377.9556 entropy=17.9688 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 113500] reward=-115472044.9 actor_loss=0.3818 critic_loss=111186064725.3333 entropy=17.9765 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 113500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398959.3 mean_steps=14.8
|
|
[Episode 113510] reward=-123589834.5 actor_loss=0.2891 critic_loss=119661557805.5111 entropy=17.9578 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 113520] reward=-122029774.7 actor_loss=0.1534 critic_loss=117679430769.7778 entropy=17.9632 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 113520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642995.9 mean_steps=12.9
|
|
[Episode 113530] reward=-3756545789.9 actor_loss=0.2942 critic_loss=12086483454496312.0000 entropy=17.9854 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1074 front_blocked=0
|
|
[Episode 113540] reward=-2377589517.4 actor_loss=0.4103 critic_loss=6771910011169451.0000 entropy=17.9915 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 113540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549752.7 mean_steps=13.5
|
|
[Episode 113550] reward=-123705720.5 actor_loss=0.2885 critic_loss=118992069882.3111 entropy=17.9708 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 113560] reward=-128784507.4 actor_loss=0.2396 critic_loss=128273277474.1333 entropy=17.9751 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 113560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-361395.5 mean_steps=15.8
|
|
[Episode 113570] reward=-122205551.6 actor_loss=0.2300 critic_loss=118075765555.2000 entropy=17.9783 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 113580] reward=-6839030164.6 actor_loss=0.1739 critic_loss=40758513646401400.0000 entropy=17.9814 approx_kl=0.0059 kl_stop=0 intervention_rate=0.0990 front_blocked=0
|
|
[Eval 113580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474242.9 mean_steps=15.1
|
|
[Episode 113590] reward=-5904750485.1 actor_loss=0.2518 critic_loss=72899086459833920.0000 entropy=17.9844 approx_kl=0.0002 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 113600] reward=-8980318720.1 actor_loss=0.2266 critic_loss=72809057836911824.0000 entropy=18.0139 approx_kl=0.0225 kl_stop=1 intervention_rate=0.1003 front_blocked=0
|
|
[Eval 113600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572576.8 mean_steps=13.5
|
|
[Episode 113610] reward=-116736424.5 actor_loss=0.3122 critic_loss=126151505733.8182 entropy=18.0301 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 113620] reward=-3012654834.0 actor_loss=0.2775 critic_loss=10150668846771586.0000 entropy=18.0313 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 113620] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-667873.7 mean_steps=12.3
|
|
[Episode 113630] reward=-1126949994.7 actor_loss=0.3183 critic_loss=2619705307448388.5000 entropy=18.0556 approx_kl=-0.0024 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 113640] reward=-885615090.8 actor_loss=0.2905 critic_loss=1556081677231929.7500 entropy=18.0558 approx_kl=0.0042 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 113640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430292.0 mean_steps=15.3
|
|
[Episode 113650] reward=-127555674.0 actor_loss=0.3562 critic_loss=128955421491.2000 entropy=18.0558 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 113660] reward=-2707239322.6 actor_loss=0.2698 critic_loss=14264005483380372.0000 entropy=18.0598 approx_kl=-0.0018 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 113660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439928.2 mean_steps=14.8
|
|
[Episode 113670] reward=-423853975.8 actor_loss=0.3034 critic_loss=246265445250599.3750 entropy=18.0730 approx_kl=0.0030 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 113680] reward=-531579208.1 actor_loss=0.2168 critic_loss=515655388644420.2500 entropy=18.0716 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 113680] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-327376.1 mean_steps=17.6
|
|
[Episode 113690] reward=-117771188.2 actor_loss=0.4245 critic_loss=116848755029.3333 entropy=18.0846 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 113700] reward=-585827789.3 actor_loss=0.3005 critic_loss=643304465990360.1250 entropy=18.0808 approx_kl=0.0011 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 113700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437042.4 mean_steps=15.5
|
|
[Episode 113710] reward=-119236615.2 actor_loss=0.3145 critic_loss=129620221579.6364 entropy=18.0842 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 113720] reward=-2618943122.4 actor_loss=27.7856 critic_loss=13455294256195538.0000 entropy=18.1001 approx_kl=-0.0005 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 113720] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-352425.5 mean_steps=16.9
|
|
[Episode 113730] reward=-122660952.5 actor_loss=0.2718 critic_loss=142563892805.1892 entropy=18.1156 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 113740] reward=-3528869857.2 actor_loss=0.2855 critic_loss=21378836301576056.0000 entropy=18.1088 approx_kl=-0.0013 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 113740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544765.0 mean_steps=13.4
|
|
[Episode 113750] reward=-2978255485.6 actor_loss=9.7615 critic_loss=11884310861313184.0000 entropy=18.1222 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1042 front_blocked=0
|
|
[Episode 113760] reward=-167671613.6 actor_loss=17.3716 critic_loss=5296834483905.4219 entropy=18.1159 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 113760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511343.8 mean_steps=14.2
|
|
[Episode 113770] reward=-5743487449.8 actor_loss=0.1891 critic_loss=21570698405216256.0000 entropy=18.1009 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 113780] reward=-1615236686.5 actor_loss=0.2272 critic_loss=5760834283311377.0000 entropy=18.1068 approx_kl=0.0007 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 113780] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-648513.5 mean_steps=11.4
|
|
[Episode 113790] reward=-5695987188.6 actor_loss=0.2577 critic_loss=26938773089946464.0000 entropy=18.1295 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1074 front_blocked=0
|
|
[Episode 113800] reward=-129479460.7 actor_loss=0.2570 critic_loss=201676864599.7714 entropy=18.1296 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 113800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612750.0 mean_steps=12.8
|
|
[Episode 113810] reward=-127730477.5 actor_loss=0.2639 critic_loss=151050980920.8889 entropy=18.1324 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 113820] reward=-818032733.8 actor_loss=0.2385 critic_loss=1249667795980561.0000 entropy=18.1368 approx_kl=-0.0020 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 113820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553812.3 mean_steps=14.4
|
|
[Episode 113830] reward=-298610930.9 actor_loss=0.1963 critic_loss=97376235613934.9375 entropy=18.1442 approx_kl=0.0008 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 113840] reward=-1690893320.6 actor_loss=0.2710 critic_loss=6347849785555172.0000 entropy=18.1507 approx_kl=-0.0017 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 113840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440292.5 mean_steps=14.7
|
|
[Episode 113850] reward=-126878307.0 actor_loss=0.2698 critic_loss=129522535628.8000 entropy=18.1314 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 113860] reward=-125270420.0 actor_loss=0.2799 critic_loss=191343480690.7586 entropy=18.1174 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 113860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-304991.4 mean_steps=16.7
|
|
[Episode 113870] reward=-118220317.3 actor_loss=0.3213 critic_loss=118494511703.4146 entropy=18.1185 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 113880] reward=-125730436.8 actor_loss=0.1484 critic_loss=120184260608.0000 entropy=18.1374 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 113880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-471349.5 mean_steps=15.8
|
|
[Episode 113890] reward=-114397580.5 actor_loss=0.4923 critic_loss=113086718156.8000 entropy=18.1239 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 113900] reward=-124449712.9 actor_loss=0.2859 critic_loss=124417250645.3333 entropy=18.1106 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 113900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512444.6 mean_steps=13.9
|
|
[Episode 113910] reward=-117836892.8 actor_loss=0.2666 critic_loss=112125315657.1429 entropy=18.0977 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 113920] reward=-129792443.3 actor_loss=0.3728 critic_loss=286724506563.7647 entropy=18.1014 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 113920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426984.6 mean_steps=15.3
|
|
[Episode 113930] reward=-123614154.2 actor_loss=0.2798 critic_loss=120072775044.4138 entropy=18.0923 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 113940] reward=-126627048.8 actor_loss=0.2862 critic_loss=119933965233.2308 entropy=18.0809 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 113940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559281.5 mean_steps=12.7
|
|
[Episode 113950] reward=-119548770.5 actor_loss=0.3016 critic_loss=114422710818.1333 entropy=18.0672 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 113960] reward=-122190757.0 actor_loss=0.4118 critic_loss=117233641244.4444 entropy=18.0617 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 113960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-619183.8 mean_steps=11.9
|
|
[Episode 113970] reward=-114895166.8 actor_loss=0.4475 critic_loss=113159062784.0000 entropy=18.0450 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 113980] reward=-117826891.8 actor_loss=0.3781 critic_loss=105823016004.2667 entropy=18.0276 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 113980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-546948.2 mean_steps=12.1
|
|
[Episode 113990] reward=-124247862.9 actor_loss=0.2505 critic_loss=133083694148.2667 entropy=18.0316 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 114000] reward=-126604506.9 actor_loss=0.2564 critic_loss=131300467216.5161 entropy=18.0150 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 114000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428057.5 mean_steps=15.5
|
|
[Episode 114010] reward=-127964318.6 actor_loss=0.2558 critic_loss=119077674160.5517 entropy=18.0010 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 114020] reward=-121722166.9 actor_loss=0.3164 critic_loss=111597463324.4444 entropy=17.9851 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 114020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501020.0 mean_steps=14.1
|
|
[Episode 114030] reward=-124100514.2 actor_loss=0.3720 critic_loss=121127780352.0000 entropy=17.9765 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 114040] reward=-120018575.8 actor_loss=0.2555 critic_loss=121107559424.0000 entropy=17.9713 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 114040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413508.7 mean_steps=15.6
|
|
[Episode 114050] reward=-126030021.6 actor_loss=0.3153 critic_loss=119143576086.2609 entropy=17.9601 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 114060] reward=-2671394094.6 actor_loss=4.8273 critic_loss=8504863593157973.0000 entropy=17.9527 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 114060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505872.0 mean_steps=14.2
|
|
[Episode 114070] reward=-123704096.0 actor_loss=0.2106 critic_loss=121570607896.7742 entropy=17.9478 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 114080] reward=-123226608.0 actor_loss=0.2734 critic_loss=123194934737.4545 entropy=17.9388 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 114080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540829.5 mean_steps=13.3
|
|
[Episode 114090] reward=-121334839.3 actor_loss=0.2365 critic_loss=109024508495.6444 entropy=17.9175 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 114100] reward=-1120224704.8 actor_loss=0.2524 critic_loss=2371461139934595.0000 entropy=17.9099 approx_kl=-0.0010 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 114100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614275.5 mean_steps=11.8
|
|
[Episode 114110] reward=-121841686.0 actor_loss=0.2103 critic_loss=136403747726.2222 entropy=17.9193 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 114120] reward=-271584482.8 actor_loss=0.3361 critic_loss=79094675721420.7969 entropy=17.9367 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 114120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427317.9 mean_steps=14.4
|
|
[Episode 114130] reward=-122472146.2 actor_loss=0.4192 critic_loss=117434348157.1555 entropy=17.9219 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 114140] reward=-121304049.8 actor_loss=0.2837 critic_loss=113924705302.7556 entropy=17.9203 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 114140] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-298962.1 mean_steps=17.5
|
|
[Episode 114150] reward=-123008818.6 actor_loss=0.2442 critic_loss=116944471877.8182 entropy=17.9222 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 114160] reward=-1642772320.5 actor_loss=0.2218 critic_loss=2927021488783724.0000 entropy=17.9296 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 114160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-356290.7 mean_steps=16.2
|
|
[Episode 114170] reward=-119382607.8 actor_loss=0.3457 critic_loss=112397214866.2857 entropy=17.9018 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 114180] reward=-114344591.6 actor_loss=0.2830 critic_loss=108263353548.8000 entropy=17.9064 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 114180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557688.1 mean_steps=13.5
|
|
[Episode 114190] reward=-122165567.8 actor_loss=0.2295 critic_loss=119970185216.0000 entropy=17.9022 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 114200] reward=-120884983.3 actor_loss=0.3483 critic_loss=112130611200.0000 entropy=17.8980 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 114200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418527.1 mean_steps=15.6
|
|
[Episode 114210] reward=-123221612.0 actor_loss=0.3370 critic_loss=117159886416.8421 entropy=17.9198 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 114220] reward=-2539285622.4 actor_loss=0.3117 critic_loss=14263837228036552.0000 entropy=17.9295 approx_kl=-0.0000 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 114220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-449579.8 mean_steps=14.0
|
|
[Episode 114230] reward=-125292923.7 actor_loss=0.2930 critic_loss=119006899479.2727 entropy=17.9326 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 114240] reward=-125925825.3 actor_loss=0.2283 critic_loss=125096776424.7273 entropy=17.9255 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 114240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-634117.5 mean_steps=12.1
|
|
[Episode 114250] reward=-120730905.6 actor_loss=0.2471 critic_loss=114215059456.0000 entropy=17.9153 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 114260] reward=-503204938.0 actor_loss=0.3857 critic_loss=258647631541043.1875 entropy=17.8984 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 114260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442843.2 mean_steps=14.6
|
|
[Episode 114270] reward=-124642605.4 actor_loss=0.2410 critic_loss=121945781733.0526 entropy=17.9019 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 114280] reward=-116492755.0 actor_loss=0.3868 critic_loss=108057193494.7556 entropy=17.9064 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 114280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491345.0 mean_steps=13.7
|
|
[Episode 114290] reward=-125313420.1 actor_loss=0.3001 critic_loss=124897574229.3333 entropy=17.9169 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 114300] reward=-121605616.3 actor_loss=0.3804 critic_loss=117559238109.8667 entropy=17.9047 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 114300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-389358.6 mean_steps=15.2
|
|
[Episode 114310] reward=-117217113.1 actor_loss=0.2110 critic_loss=115946106880.0000 entropy=17.8967 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 114320] reward=-1322963848.9 actor_loss=0.3517 critic_loss=3562658717103263.5000 entropy=17.8936 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 114320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474279.5 mean_steps=13.9
|
|
[Episode 114330] reward=-352954365.7 actor_loss=0.3486 critic_loss=102479056245737.2500 entropy=17.9076 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 114340] reward=-118734847.7 actor_loss=0.3998 critic_loss=114849197260.8000 entropy=17.9117 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 114340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-444022.6 mean_steps=13.2
|
|
[Episode 114350] reward=-3585554625.7 actor_loss=4.6439 critic_loss=12224065907597858.0000 entropy=17.9145 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1003 front_blocked=0
|
|
[Episode 114360] reward=-845133323.0 actor_loss=7.3215 critic_loss=907783194553730.8750 entropy=17.9319 approx_kl=0.0007 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 114360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480202.2 mean_steps=14.8
|
|
[Episode 114370] reward=-120908205.4 actor_loss=0.2411 critic_loss=125136415584.7111 entropy=17.9254 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 114380] reward=-116551460.8 actor_loss=0.3433 critic_loss=116611302636.3077 entropy=17.9306 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 114380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-608476.6 mean_steps=14.1
|
|
[Episode 114390] reward=-122916377.4 actor_loss=0.2825 critic_loss=123075624478.1176 entropy=17.9292 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 114400] reward=-124846068.4 actor_loss=0.2623 critic_loss=136805845492.6222 entropy=17.9303 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 114400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613474.4 mean_steps=12.8
|
|
[Episode 114410] reward=-1483032719.9 actor_loss=0.3282 critic_loss=4820736475124349.0000 entropy=17.9665 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 114420] reward=-754881293.2 actor_loss=8.3178 critic_loss=1133382504913305.5000 entropy=17.9781 approx_kl=0.0030 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 114420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424265.9 mean_steps=14.8
|
|
[Episode 114430] reward=-1068396920.3 actor_loss=0.3064 critic_loss=2238273664232106.7500 entropy=18.0158 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 114440] reward=-1645042021.7 actor_loss=0.2748 critic_loss=6050691682076080.0000 entropy=18.0053 approx_kl=0.0005 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 114440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474382.9 mean_steps=14.4
|
|
[Episode 114450] reward=-120163215.7 actor_loss=0.2588 critic_loss=115508296960.0000 entropy=17.9934 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 114460] reward=-125612138.0 actor_loss=0.2369 critic_loss=116843519089.7778 entropy=17.9927 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 114460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422940.2 mean_steps=15.2
|
|
[Episode 114470] reward=-124039844.8 actor_loss=0.1810 critic_loss=131712901982.3158 entropy=17.9766 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 114480] reward=-2713450226.3 actor_loss=0.2752 critic_loss=15691718134682966.0000 entropy=17.9859 approx_kl=-0.0013 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 114480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-535771.2 mean_steps=12.2
|
|
[Episode 114490] reward=-118589773.5 actor_loss=0.2521 critic_loss=114845326579.8095 entropy=17.9920 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 114500] reward=-125988781.2 actor_loss=0.2508 critic_loss=146195430711.6522 entropy=17.9789 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 114500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563664.0 mean_steps=12.7
|
|
[Episode 114510] reward=-127118738.6 actor_loss=0.2337 critic_loss=126745791146.6667 entropy=17.9588 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 114520] reward=-883091911.5 actor_loss=7.3030 critic_loss=1590370645585029.5000 entropy=17.9465 approx_kl=0.0019 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 114520] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-312645.6 mean_steps=17.1
|
|
[Episode 114530] reward=-125465548.7 actor_loss=0.2523 critic_loss=119440764451.7209 entropy=17.9444 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 114540] reward=-124709755.9 actor_loss=0.2781 critic_loss=125878793116.9032 entropy=17.9479 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 114540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-379058.4 mean_steps=15.1
|
|
[Episode 114550] reward=-773826628.6 actor_loss=0.2750 critic_loss=668904181604898.1250 entropy=17.9670 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 114560] reward=-122844666.4 actor_loss=0.2704 critic_loss=119899529762.1333 entropy=17.9726 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 114560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459843.7 mean_steps=15.8
|
|
[Episode 114570] reward=-123661443.4 actor_loss=0.2379 critic_loss=126117939976.8276 entropy=17.9637 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 114580] reward=-118705589.7 actor_loss=0.2283 critic_loss=109374845201.0667 entropy=17.9653 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 114580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-609894.4 mean_steps=13.8
|
|
[Episode 114590] reward=-126654013.6 actor_loss=0.1578 critic_loss=123267478323.2000 entropy=17.9608 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 114600] reward=-121315398.0 actor_loss=0.2549 critic_loss=123268640137.8462 entropy=17.9728 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 114600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-364582.2 mean_steps=16.6
|
|
[Episode 114610] reward=-121687884.2 actor_loss=0.2738 critic_loss=138538557124.9231 entropy=17.9766 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 114620] reward=-578904525.2 actor_loss=11.4304 critic_loss=586164125827072.0000 entropy=17.9802 approx_kl=0.0032 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 114620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-374477.2 mean_steps=15.1
|
|
[Episode 114630] reward=-119653913.7 actor_loss=0.2773 critic_loss=110712694556.4444 entropy=17.9825 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 114640] reward=-122836510.2 actor_loss=0.3582 critic_loss=122912928768.0000 entropy=17.9958 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 114640] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-624951.3 mean_steps=10.9
|
|
[Episode 114650] reward=-124212118.9 actor_loss=0.2996 critic_loss=122258976182.8571 entropy=17.9945 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 114660] reward=-1246019707.7 actor_loss=0.3071 critic_loss=3019701524015786.5000 entropy=18.0061 approx_kl=0.0033 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 114660] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-241825.6 mean_steps=18.9
|
|
[Episode 114670] reward=-117655487.3 actor_loss=0.3326 critic_loss=112895393063.8222 entropy=18.0078 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 114680] reward=-126735985.0 actor_loss=0.2016 critic_loss=139534009230.2222 entropy=18.0059 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 114680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592434.3 mean_steps=13.8
|
|
[Episode 114690] reward=-132042599.8 actor_loss=0.2192 critic_loss=166438025443.5555 entropy=17.9968 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 114700] reward=-114878031.3 actor_loss=0.3335 critic_loss=113601558784.0000 entropy=18.0170 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 114700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-620195.0 mean_steps=12.7
|
|
[Episode 114710] reward=-701923513.4 actor_loss=0.3727 critic_loss=906028607952213.3750 entropy=18.0417 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 114720] reward=-126470756.2 actor_loss=0.2527 critic_loss=128859193344.0000 entropy=18.0331 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 114720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-97183860.6 mean_steps=19.9
|
|
[Episode 114730] reward=-808255340.6 actor_loss=0.2698 critic_loss=1004829006626816.0000 entropy=18.0823 approx_kl=-0.0015 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 114740] reward=-125133482.1 actor_loss=0.2322 critic_loss=131715729544.5333 entropy=18.0784 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 114740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526225.6 mean_steps=14.2
|
|
[Episode 114750] reward=-131493767.5 actor_loss=0.1960 critic_loss=154607159978.6667 entropy=18.0595 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 114760] reward=-118760366.3 actor_loss=0.3358 critic_loss=114350220174.2222 entropy=18.0614 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 114760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488664.2 mean_steps=14.9
|
|
[Episode 114770] reward=-120679241.5 actor_loss=0.3304 critic_loss=113066625342.5778 entropy=18.0520 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 114780] reward=-120836136.6 actor_loss=0.3460 critic_loss=117058991263.2889 entropy=18.0294 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 114780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423227.7 mean_steps=15.1
|
|
[Episode 114790] reward=-123705653.6 actor_loss=0.2910 critic_loss=121239459521.4222 entropy=18.0141 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 114800] reward=-121578232.6 actor_loss=0.2417 critic_loss=119062422960.3556 entropy=17.9950 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 114800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438724.2 mean_steps=14.5
|
|
[Episode 114810] reward=-119673268.3 actor_loss=0.2932 critic_loss=113308824735.2889 entropy=17.9794 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 114820] reward=-122133688.1 actor_loss=0.2769 critic_loss=119124411278.2222 entropy=17.9668 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 114820] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-675646.6 mean_steps=11.3
|
|
[Episode 114830] reward=-123471859.2 actor_loss=0.2705 critic_loss=146392497803.6364 entropy=17.9472 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 114840] reward=-124827048.7 actor_loss=0.2954 critic_loss=116065300107.6364 entropy=17.9420 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 114840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-535560.0 mean_steps=15.4
|
|
[Episode 114850] reward=-116678610.1 actor_loss=0.3919 critic_loss=114428658408.7273 entropy=17.9453 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 114860] reward=-121911274.8 actor_loss=0.3018 critic_loss=111987058501.8182 entropy=17.9740 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 114860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642078.4 mean_steps=13.1
|
|
[Episode 114870] reward=-142710540.2 actor_loss=0.2425 critic_loss=1393051800553.2444 entropy=17.9825 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 114880] reward=-128976535.0 actor_loss=0.3336 critic_loss=126316340317.0909 entropy=17.9880 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 114880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-520406.7 mean_steps=15.1
|
|
[Episode 114890] reward=-297719280.7 actor_loss=0.2073 critic_loss=75326250071563.3750 entropy=18.0196 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 114900] reward=-126828289.7 actor_loss=0.2935 critic_loss=122641726512.7619 entropy=18.0349 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 114900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-342526.1 mean_steps=16.0
|
|
[Episode 114910] reward=-122075333.2 actor_loss=0.2803 critic_loss=120110497609.9556 entropy=18.0243 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 114920] reward=-124969572.4 actor_loss=0.1985 critic_loss=120493216017.0667 entropy=18.0071 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 114920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483413.9 mean_steps=14.2
|
|
[Episode 114930] reward=-127404762.6 actor_loss=0.2466 critic_loss=210194109235.2000 entropy=18.0027 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 114940] reward=-122735552.6 actor_loss=0.2434 critic_loss=122284849419.1304 entropy=18.0135 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 114940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430243.4 mean_steps=15.4
|
|
[Episode 114950] reward=-120776130.4 actor_loss=0.3690 critic_loss=228726208921.6000 entropy=18.0045 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 114960] reward=-123008471.8 actor_loss=0.3190 critic_loss=114460812773.0526 entropy=17.9936 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 114960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413457.8 mean_steps=16.2
|
|
[Episode 114970] reward=-123505315.9 actor_loss=0.3102 critic_loss=117295842554.3111 entropy=17.9841 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 114980] reward=-134293041.2 actor_loss=0.3477 critic_loss=361740068864.0000 entropy=18.0055 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 114980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-454731.5 mean_steps=12.9
|
|
[Episode 114990] reward=-128421064.8 actor_loss=0.3121 critic_loss=127505284156.2353 entropy=18.0050 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 115000] reward=-171569787.4 actor_loss=0.2974 critic_loss=7428640743970.1338 entropy=17.9942 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 115000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479026.2 mean_steps=14.8
|
|
[Episode 115010] reward=-130647802.8 actor_loss=0.2621 critic_loss=132011975475.2000 entropy=17.9855 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 115020] reward=-123592438.0 actor_loss=0.1845 critic_loss=125262291126.0444 entropy=17.9901 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 115020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377654.2 mean_steps=16.3
|
|
[Episode 115030] reward=-123922533.8 actor_loss=0.3249 critic_loss=116037965687.4667 entropy=17.9785 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 115040] reward=-126362469.5 actor_loss=0.2381 critic_loss=121482582698.6667 entropy=17.9619 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 115040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515522.4 mean_steps=13.9
|
|
[Episode 115050] reward=-121336978.0 actor_loss=0.2817 critic_loss=120341075831.4667 entropy=17.9594 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 115060] reward=-124969244.5 actor_loss=0.2850 critic_loss=118026068332.0889 entropy=17.9362 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 115060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535692.5 mean_steps=14.4
|
|
[Episode 115070] reward=-117648850.6 actor_loss=0.3546 critic_loss=109647767142.4000 entropy=17.9072 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 115080] reward=-125024474.6 actor_loss=0.3547 critic_loss=118132007215.4074 entropy=17.8972 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 115080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519099.1 mean_steps=14.9
|
|
[Episode 115090] reward=-123353252.9 actor_loss=0.2021 critic_loss=112303564800.0000 entropy=17.8916 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 115100] reward=-121358819.0 actor_loss=0.3029 critic_loss=114334333610.6667 entropy=17.8868 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 115100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575022.8 mean_steps=13.7
|
|
[Episode 115110] reward=-120544323.2 actor_loss=0.1850 critic_loss=111456953139.2000 entropy=17.8836 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 115120] reward=-118440894.6 actor_loss=0.2611 critic_loss=109913947249.7778 entropy=17.8842 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 115120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-600803.2 mean_steps=13.5
|
|
[Episode 115130] reward=-123802077.8 actor_loss=0.2718 critic_loss=117955768775.1111 entropy=17.8884 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 115140] reward=-118652083.5 actor_loss=0.3077 critic_loss=114699000945.7778 entropy=17.8725 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 115140] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-651255.1 mean_steps=11.4
|
|
[Episode 115150] reward=-117762578.9 actor_loss=0.2172 critic_loss=111263490048.0000 entropy=17.8586 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 115160] reward=-120217910.0 actor_loss=0.3084 critic_loss=113053295902.7200 entropy=17.8284 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 115160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-395517.8 mean_steps=17.2
|
|
[Episode 115170] reward=-123419111.2 actor_loss=0.3358 critic_loss=124545390819.5556 entropy=17.8074 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 115180] reward=-122354662.2 actor_loss=0.3171 critic_loss=120457946398.7200 entropy=17.8084 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 115180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-535041.2 mean_steps=12.6
|
|
[Episode 115190] reward=-117489463.8 actor_loss=0.3232 critic_loss=103264767544.8889 entropy=17.8025 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 115200] reward=-117336841.7 actor_loss=0.3017 critic_loss=111417990030.2222 entropy=17.7998 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 115200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606388.8 mean_steps=12.7
|
|
[Episode 115210] reward=-122705633.6 actor_loss=0.1627 critic_loss=114293558710.8571 entropy=17.7833 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 115220] reward=-112510770.5 actor_loss=0.2253 critic_loss=104562972514.4615 entropy=17.7850 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 115220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610382.3 mean_steps=12.9
|
|
[Episode 115230] reward=-120967981.8 actor_loss=0.2161 critic_loss=111991387780.7407 entropy=17.7872 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 115240] reward=-118038652.2 actor_loss=0.4351 critic_loss=110067999448.1778 entropy=17.7737 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 115240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459033.5 mean_steps=15.7
|
|
[Episode 115250] reward=-120620280.6 actor_loss=0.3049 critic_loss=115440344104.9600 entropy=17.7769 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 115260] reward=-120734802.4 actor_loss=0.3644 critic_loss=214520184832.0000 entropy=17.7677 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 115260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405701.6 mean_steps=15.2
|
|
[Episode 115270] reward=-120741681.6 actor_loss=0.3341 critic_loss=104479579447.6522 entropy=17.7672 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 115280] reward=-124580018.0 actor_loss=0.2263 critic_loss=117232240640.0000 entropy=17.7669 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 115280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-428126.2 mean_steps=16.2
|
|
[Episode 115290] reward=-121718009.3 actor_loss=0.3194 critic_loss=126416134846.1714 entropy=17.7606 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 115300] reward=-118585406.5 actor_loss=0.2740 critic_loss=103147878470.6207 entropy=17.7606 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 115300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-455769.2 mean_steps=13.5
|
|
[Episode 115310] reward=-116616422.2 actor_loss=0.3265 critic_loss=104232175674.5143 entropy=17.7565 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 115320] reward=-122209872.4 actor_loss=0.2468 critic_loss=111168599906.4615 entropy=17.7468 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 115320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-530025.1 mean_steps=12.7
|
|
[Episode 115330] reward=-116745636.5 actor_loss=0.4020 critic_loss=102989737611.6364 entropy=17.7399 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 115340] reward=-116578671.2 actor_loss=0.2765 critic_loss=103602639360.0000 entropy=17.7364 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 115340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363155.0 mean_steps=15.9
|
|
[Episode 115350] reward=-123930425.1 actor_loss=0.1856 critic_loss=114797764061.8667 entropy=17.7409 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 115360] reward=-120172844.2 actor_loss=0.2434 critic_loss=104118972416.0000 entropy=17.7492 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 115360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496200.7 mean_steps=15.1
|
|
[Episode 115370] reward=-119686935.8 actor_loss=0.2597 critic_loss=109958577860.9231 entropy=17.7372 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 115380] reward=-117968633.8 actor_loss=0.2932 critic_loss=108620803072.0000 entropy=17.7298 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 115380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442449.4 mean_steps=15.4
|
|
[Episode 115390] reward=-116733911.4 actor_loss=0.3700 critic_loss=110329732792.3200 entropy=17.7214 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 115400] reward=-116880017.8 actor_loss=0.2174 critic_loss=106604831961.2121 entropy=17.7175 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 115400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612095.3 mean_steps=12.6
|
|
[Episode 115410] reward=-117910812.1 actor_loss=0.3021 critic_loss=103689042329.6000 entropy=17.7170 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 115420] reward=-123217350.8 actor_loss=0.2578 critic_loss=115668480534.2609 entropy=17.7110 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 115420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-384881.6 mean_steps=16.1
|
|
[Episode 115430] reward=-116685071.1 actor_loss=0.3331 critic_loss=104952804010.6667 entropy=17.7041 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 115440] reward=-120115364.7 actor_loss=0.2338 critic_loss=111777417443.5556 entropy=17.6956 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 115440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-632481.1 mean_steps=13.2
|
|
[Episode 115450] reward=-125172521.6 actor_loss=0.2487 critic_loss=114034869086.3158 entropy=17.6913 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 115460] reward=-116993433.4 actor_loss=0.3936 critic_loss=107166687944.3478 entropy=17.6817 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 115460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525005.2 mean_steps=13.7
|
|
[Episode 115470] reward=-123640967.3 actor_loss=0.2539 critic_loss=119364844544.0000 entropy=17.6769 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 115480] reward=-119083871.6 actor_loss=0.3207 critic_loss=112749990646.5185 entropy=17.6745 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 115480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382593.0 mean_steps=15.8
|
|
[Episode 115490] reward=-123879412.9 actor_loss=0.2924 critic_loss=134349211490.4615 entropy=17.6590 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 115500] reward=-118038805.2 actor_loss=0.2839 critic_loss=111341425095.1111 entropy=17.6526 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 115500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-577678.5 mean_steps=11.6
|
|
[Episode 115510] reward=-142358760.3 actor_loss=0.2002 critic_loss=1845413446845.6296 entropy=17.6554 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 115520] reward=-119015058.4 actor_loss=0.3654 critic_loss=106160158720.0000 entropy=17.6489 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 115520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413491.1 mean_steps=15.8
|
|
[Episode 115530] reward=-121734950.6 actor_loss=0.2651 critic_loss=110843842418.7586 entropy=17.6713 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 115540] reward=-118107019.3 actor_loss=0.3016 critic_loss=105856365069.1282 entropy=17.6789 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 115540] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-686674.4 mean_steps=11.4
|
|
[Episode 115550] reward=-116957912.4 actor_loss=0.1996 critic_loss=111900314889.4815 entropy=17.6721 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 115560] reward=-112536574.0 actor_loss=0.2771 critic_loss=104084059841.4222 entropy=17.6526 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 115560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425951.7 mean_steps=15.6
|
|
[Episode 115570] reward=-124113559.1 actor_loss=0.2285 critic_loss=113164627694.9333 entropy=17.6583 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 115580] reward=-116238471.2 actor_loss=0.2917 critic_loss=102602692608.0000 entropy=17.6520 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 115580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554102.3 mean_steps=13.1
|
|
[Episode 115590] reward=-114268910.5 actor_loss=0.3660 critic_loss=107701417106.2857 entropy=17.6545 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 115600] reward=-121192786.4 actor_loss=0.2841 critic_loss=105749726321.7778 entropy=17.6735 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 115600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-570708.0 mean_steps=14.3
|
|
[Episode 115610] reward=-116380577.6 actor_loss=0.2387 critic_loss=109533257142.8571 entropy=17.6788 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 115620] reward=-124010454.3 actor_loss=0.3363 critic_loss=127248710314.6667 entropy=17.6829 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 115620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447962.0 mean_steps=14.5
|
|
[Episode 115630] reward=-122455468.0 actor_loss=0.3713 critic_loss=115157995683.8400 entropy=17.6856 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 115640] reward=-121456593.3 actor_loss=0.3185 critic_loss=117751522099.2000 entropy=17.6818 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 115640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577733.4 mean_steps=13.8
|
|
[Episode 115650] reward=-120530545.7 actor_loss=0.2320 critic_loss=105907192463.3600 entropy=17.6633 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 115660] reward=-118565458.8 actor_loss=0.3053 critic_loss=113001412678.6207 entropy=17.6479 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 115660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555213.4 mean_steps=13.2
|
|
[Episode 115670] reward=-117935205.1 actor_loss=0.3711 critic_loss=107077579382.1538 entropy=17.6351 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 115680] reward=-117637719.9 actor_loss=0.3058 critic_loss=107701201578.6667 entropy=17.6284 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 115680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-411504.2 mean_steps=17.3
|
|
[Episode 115690] reward=-123018492.2 actor_loss=0.3594 critic_loss=112650991323.4286 entropy=17.6350 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 115700] reward=-120224855.2 actor_loss=0.3030 critic_loss=109248622755.8400 entropy=17.6286 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 115700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435523.8 mean_steps=15.2
|
|
[Episode 115710] reward=-118317980.3 actor_loss=0.3240 critic_loss=105160314470.4000 entropy=17.6301 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 115720] reward=-116349636.9 actor_loss=0.2545 critic_loss=102119727675.5349 entropy=17.6230 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 115720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453714.0 mean_steps=15.8
|
|
[Episode 115730] reward=-113618817.0 actor_loss=0.2667 critic_loss=103820247040.0000 entropy=17.6390 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 115740] reward=-122957016.3 actor_loss=0.4206 critic_loss=111863224027.4286 entropy=17.6380 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 115740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539989.6 mean_steps=14.2
|
|
[Episode 115750] reward=-119264930.8 actor_loss=0.2673 critic_loss=109508412562.2857 entropy=17.6420 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 115760] reward=-121203191.2 actor_loss=0.3239 critic_loss=114239189622.1538 entropy=17.6376 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 115760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592679.8 mean_steps=13.6
|
|
[Episode 115770] reward=-125351403.0 actor_loss=0.2807 critic_loss=114009189338.0741 entropy=17.6328 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 115780] reward=-119481237.1 actor_loss=0.3449 critic_loss=109082088999.3846 entropy=17.6202 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 115780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-556273.5 mean_steps=12.2
|
|
[Episode 115790] reward=-116004020.9 actor_loss=0.3468 critic_loss=110274463557.8182 entropy=17.6181 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 115800] reward=-122281006.3 actor_loss=0.2270 critic_loss=107706822259.6129 entropy=17.6080 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 115800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-641261.9 mean_steps=13.1
|
|
[Episode 115810] reward=-119668360.1 actor_loss=0.2826 critic_loss=105780838400.0000 entropy=17.6046 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 115820] reward=-121304786.7 actor_loss=0.2871 critic_loss=110438530779.4286 entropy=17.6003 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 115820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-555955.2 mean_steps=11.6
|
|
[Episode 115830] reward=-116586620.8 actor_loss=0.2589 critic_loss=105745575662.9333 entropy=17.5988 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 115840] reward=-119885869.5 actor_loss=0.3577 critic_loss=107883825272.4706 entropy=17.6014 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 115840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454790.6 mean_steps=15.7
|
|
[Episode 115850] reward=-122216999.2 actor_loss=0.3238 critic_loss=107943155525.8182 entropy=17.5916 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 115860] reward=-127846090.8 actor_loss=0.1976 critic_loss=117392155209.1429 entropy=17.6023 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 115860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563666.0 mean_steps=13.7
|
|
[Episode 115870] reward=-116956113.9 actor_loss=0.2185 critic_loss=100306589346.3415 entropy=17.5954 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 115880] reward=-116969285.0 actor_loss=0.2903 critic_loss=103739387494.4000 entropy=17.6034 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 115880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462809.7 mean_steps=14.7
|
|
[Episode 115890] reward=-117075462.8 actor_loss=0.2513 critic_loss=103070766336.0000 entropy=17.5994 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 115900] reward=-123893758.5 actor_loss=0.2997 critic_loss=112508640721.4545 entropy=17.6015 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 115900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558343.6 mean_steps=13.2
|
|
[Episode 115910] reward=-117918072.5 actor_loss=0.2216 critic_loss=103770622138.1818 entropy=17.6017 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 115920] reward=-116979705.9 actor_loss=0.3892 critic_loss=101677155032.1778 entropy=17.6143 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 115920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-470996.0 mean_steps=15.8
|
|
[Episode 115930] reward=-114004183.8 actor_loss=0.2827 critic_loss=100482670227.9111 entropy=17.6173 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 115940] reward=-115051995.3 actor_loss=0.3221 critic_loss=105614608871.6190 entropy=17.6091 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 115940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543911.5 mean_steps=13.7
|
|
[Episode 115950] reward=-114719364.5 actor_loss=0.4199 critic_loss=95290389299.2000 entropy=17.6054 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 115960] reward=-115488254.1 actor_loss=0.2632 critic_loss=102891190863.6444 entropy=17.5876 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 115960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-453879.3 mean_steps=12.7
|
|
[Episode 115970] reward=-117678631.5 actor_loss=0.3198 critic_loss=99995038773.8947 entropy=17.5730 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 115980] reward=-117280498.0 actor_loss=0.2420 critic_loss=112820554089.4118 entropy=17.5573 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 115980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416519.6 mean_steps=14.2
|
|
[Episode 115990] reward=-116953447.3 actor_loss=0.3410 critic_loss=100942213939.2000 entropy=17.5518 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 116000] reward=-113703986.6 actor_loss=0.3270 critic_loss=103080303426.3704 entropy=17.5459 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 116000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-387167.0 mean_steps=16.1
|
|
[Episode 116010] reward=-121805168.4 actor_loss=0.3119 critic_loss=105524576968.3478 entropy=17.5414 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 116020] reward=-121891952.5 actor_loss=0.2764 critic_loss=106547233845.8947 entropy=17.5490 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 116020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495197.5 mean_steps=15.1
|
|
[Episode 116030] reward=-119768141.1 actor_loss=0.2014 critic_loss=107614732615.6800 entropy=17.5485 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 116040] reward=-119028697.6 actor_loss=0.2245 critic_loss=102271565568.0000 entropy=17.5405 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 116040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-504715.9 mean_steps=13.2
|
|
[Episode 116050] reward=-119245653.2 actor_loss=0.2068 critic_loss=101050787547.4286 entropy=17.5371 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 116060] reward=-117501721.2 actor_loss=0.2708 critic_loss=103284846832.9412 entropy=17.5401 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 116060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547460.6 mean_steps=13.2
|
|
[Episode 116070] reward=-123035589.4 actor_loss=0.3077 critic_loss=110071621924.5714 entropy=17.5506 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 116080] reward=-118292719.8 actor_loss=0.3285 critic_loss=104041509345.8824 entropy=17.5514 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 116080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-601409.3 mean_steps=12.8
|
|
[Episode 116090] reward=-108459175.9 actor_loss=0.3208 critic_loss=94164715292.4444 entropy=17.5533 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 116100] reward=-116340608.9 actor_loss=0.4033 critic_loss=98883866482.7586 entropy=17.5476 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 116100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-512475.8 mean_steps=13.5
|
|
[Episode 116110] reward=-115285411.9 actor_loss=0.2617 critic_loss=100092816725.3333 entropy=17.5606 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 116120] reward=-115857049.1 actor_loss=0.2559 critic_loss=102842831738.4348 entropy=17.5624 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 116120] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-276002.5 mean_steps=19.2
|
|
[Episode 116130] reward=-116594026.9 actor_loss=0.2921 critic_loss=110064973596.4444 entropy=17.5589 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 116140] reward=-117531808.6 actor_loss=0.3647 critic_loss=102900215644.1600 entropy=17.5507 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 116140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-527701.0 mean_steps=12.3
|
|
[Episode 116150] reward=-115989236.0 actor_loss=0.2011 critic_loss=100531723195.7333 entropy=17.5552 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 116160] reward=-114602579.8 actor_loss=0.2801 critic_loss=111109349072.5926 entropy=17.5368 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 116160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-395918.4 mean_steps=14.2
|
|
[Episode 116170] reward=-125008045.6 actor_loss=0.3389 critic_loss=107285500099.0476 entropy=17.5267 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 116180] reward=-114643456.2 actor_loss=0.2465 critic_loss=100313834564.2667 entropy=17.5311 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 116180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501186.0 mean_steps=13.2
|
|
[Episode 116190] reward=-111045357.7 actor_loss=0.3453 critic_loss=93737784843.3778 entropy=17.5356 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 116200] reward=-113300673.8 actor_loss=0.2936 critic_loss=98082833294.2222 entropy=17.5335 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 116200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-650221.8 mean_steps=12.0
|
|
[Episode 116210] reward=-114723558.9 actor_loss=0.3326 critic_loss=96786825697.8824 entropy=17.5349 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 116220] reward=-118044910.0 actor_loss=0.3751 critic_loss=104132998609.4545 entropy=17.5244 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 116220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-599112.9 mean_steps=11.9
|
|
[Episode 116230] reward=-124268583.1 actor_loss=0.2312 critic_loss=110670855021.7143 entropy=17.5191 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 116240] reward=-115305073.0 actor_loss=0.2468 critic_loss=96468219845.4857 entropy=17.4793 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 116240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-395035.2 mean_steps=15.4
|
|
[Episode 116250] reward=-108086930.9 actor_loss=0.3681 critic_loss=93038282752.0000 entropy=17.4649 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 116260] reward=-111862006.6 actor_loss=0.3155 critic_loss=107186177780.8696 entropy=17.4755 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 116260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445947.0 mean_steps=14.4
|
|
[Episode 116270] reward=-118641357.3 actor_loss=0.3491 critic_loss=100415866798.0800 entropy=17.4850 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 116280] reward=-112122430.0 actor_loss=0.3773 critic_loss=129339743573.3333 entropy=17.4887 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 116280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467619.3 mean_steps=13.8
|
|
[Episode 116290] reward=-118903166.3 actor_loss=0.3458 critic_loss=103114473472.0000 entropy=17.4985 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 116300] reward=-118600076.9 actor_loss=0.2499 critic_loss=105476720054.8571 entropy=17.5072 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 116300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478765.4 mean_steps=14.9
|
|
[Episode 116310] reward=-113537397.8 actor_loss=0.3461 critic_loss=98509146316.8000 entropy=17.5043 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 116320] reward=-113518093.0 actor_loss=0.2983 critic_loss=104803440932.5714 entropy=17.5005 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 116320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614418.3 mean_steps=12.6
|
|
[Episode 116330] reward=-115509526.8 actor_loss=0.3082 critic_loss=130614385001.4118 entropy=17.5015 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 116340] reward=-133657699.3 actor_loss=0.3427 critic_loss=1433971471155.2000 entropy=17.5040 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 116340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402060.8 mean_steps=16.2
|
|
[Episode 116350] reward=-114429880.5 actor_loss=0.4490 critic_loss=105227357277.0909 entropy=17.5101 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 116360] reward=-122962494.1 actor_loss=0.3075 critic_loss=111574361624.3810 entropy=17.5157 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 116360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527926.5 mean_steps=13.4
|
|
[Episode 116370] reward=-123469991.4 actor_loss=0.2709 critic_loss=324379283671.5789 entropy=17.5293 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 116380] reward=-122164480.6 actor_loss=0.2623 critic_loss=110356708147.2000 entropy=17.5456 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 116380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-360730.9 mean_steps=16.8
|
|
[Episode 116390] reward=-118492794.0 actor_loss=0.2602 critic_loss=106862481192.4211 entropy=17.5433 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 116400] reward=-118655110.8 actor_loss=0.2190 critic_loss=103225915759.5897 entropy=17.5539 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 116400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425297.9 mean_steps=15.9
|
|
[Episode 116410] reward=-121444020.7 actor_loss=0.2659 critic_loss=117808473047.0400 entropy=17.5414 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 116420] reward=-125659721.8 actor_loss=0.1881 critic_loss=113815837696.0000 entropy=17.5477 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 116420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495534.7 mean_steps=14.1
|
|
[Episode 116430] reward=-118631714.5 actor_loss=0.3302 critic_loss=103828855386.3529 entropy=17.5523 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 116440] reward=-121853321.3 actor_loss=0.2394 critic_loss=134535640529.4545 entropy=17.5726 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 116440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552789.9 mean_steps=13.8
|
|
[Episode 116450] reward=-119855537.3 actor_loss=0.2487 critic_loss=106400709080.6154 entropy=17.5932 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 116460] reward=-117509305.9 actor_loss=0.2778 critic_loss=104613020012.0889 entropy=17.5912 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 116460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390409.2 mean_steps=15.2
|
|
[Episode 116470] reward=-117840026.7 actor_loss=0.2578 critic_loss=100251010480.3556 entropy=17.6041 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 116480] reward=-119858908.7 actor_loss=0.3875 critic_loss=107517899207.1111 entropy=17.6201 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 116480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536014.3 mean_steps=13.6
|
|
[Episode 116490] reward=-118946122.7 actor_loss=0.1524 critic_loss=105117629235.2000 entropy=17.6107 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 116500] reward=-158837704.4 actor_loss=0.2933 critic_loss=6451677888512.0000 entropy=17.6013 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 116500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602320.6 mean_steps=12.9
|
|
[Episode 116510] reward=-119566276.9 actor_loss=0.3064 critic_loss=109754641248.7111 entropy=17.6049 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 116520] reward=-117343615.3 actor_loss=0.1957 critic_loss=102282441887.2889 entropy=17.5993 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 116520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-301611.8 mean_steps=16.9
|
|
[Episode 116530] reward=-122092767.1 actor_loss=0.3437 critic_loss=109262198101.3333 entropy=17.5902 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 116540] reward=-122001563.4 actor_loss=0.3141 critic_loss=107949153280.0000 entropy=17.5933 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 116540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447894.2 mean_steps=15.0
|
|
[Episode 116550] reward=-117329234.4 actor_loss=0.2660 critic_loss=102213062941.7674 entropy=17.5902 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 116560] reward=-116503572.2 actor_loss=0.3631 critic_loss=104515599655.8222 entropy=17.5852 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 116560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479227.7 mean_steps=14.9
|
|
[Episode 116570] reward=-121554916.4 actor_loss=0.2470 critic_loss=111506705749.3333 entropy=17.5995 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 116580] reward=-123533577.9 actor_loss=0.2024 critic_loss=111106585395.2000 entropy=17.6005 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 116580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391315.8 mean_steps=16.1
|
|
[Episode 116590] reward=-115554634.7 actor_loss=0.4164 critic_loss=105344950784.0000 entropy=17.5987 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 116600] reward=-116821930.8 actor_loss=0.3327 critic_loss=101579670928.6956 entropy=17.5893 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 116600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516647.3 mean_steps=14.3
|
|
[Episode 116610] reward=-117765544.8 actor_loss=0.3211 critic_loss=101415133440.0000 entropy=17.5809 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 116620] reward=-116296946.2 actor_loss=0.2924 critic_loss=102020916565.3333 entropy=17.5804 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 116620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-473462.7 mean_steps=16.0
|
|
[Episode 116630] reward=-117261366.9 actor_loss=0.3669 critic_loss=104767546307.7647 entropy=17.5695 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 116640] reward=-114451685.5 actor_loss=0.4198 critic_loss=98918846756.5714 entropy=17.5738 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 116640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583761.5 mean_steps=13.8
|
|
[Episode 116650] reward=-117386394.2 actor_loss=0.2757 critic_loss=102673816663.7714 entropy=17.5808 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 116660] reward=-119607194.4 actor_loss=0.2962 critic_loss=102402658304.0000 entropy=17.5800 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 116660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497451.4 mean_steps=15.2
|
|
[Episode 116670] reward=-118040589.7 actor_loss=0.3443 critic_loss=107864813171.6129 entropy=17.5755 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 116680] reward=-124263038.0 actor_loss=0.3374 critic_loss=113844604712.4211 entropy=17.5852 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 116680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580992.9 mean_steps=13.4
|
|
[Episode 116690] reward=-120219956.4 actor_loss=0.2994 critic_loss=111523606118.4000 entropy=17.6039 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 116700] reward=-811171694.4 actor_loss=0.4821 critic_loss=1374983721572883.0000 entropy=17.5956 approx_kl=0.0036 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 116700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473030.5 mean_steps=13.9
|
|
[Episode 116710] reward=-119593766.2 actor_loss=0.2461 critic_loss=106273477254.7368 entropy=17.6034 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 116720] reward=-141245556.0 actor_loss=0.2893 critic_loss=2034894837077.3333 entropy=17.6031 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 116720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433018.8 mean_steps=15.8
|
|
[Episode 116730] reward=-120805186.7 actor_loss=0.4151 critic_loss=109604571054.0800 entropy=17.6124 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 116740] reward=-1083663575.9 actor_loss=0.4074 critic_loss=2596699282318586.5000 entropy=17.6111 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 116740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557200.0 mean_steps=13.6
|
|
[Episode 116750] reward=-118319558.9 actor_loss=0.3412 critic_loss=119018378293.8947 entropy=17.6237 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 116760] reward=-121767836.3 actor_loss=0.1892 critic_loss=110108992034.1333 entropy=17.6224 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 116760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475140.7 mean_steps=15.1
|
|
[Episode 116770] reward=-122375367.8 actor_loss=0.2792 critic_loss=112982366328.4706 entropy=17.6149 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 116780] reward=-121842040.7 actor_loss=0.3394 critic_loss=113959611452.2353 entropy=17.6084 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 116780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518953.6 mean_steps=13.4
|
|
[Episode 116790] reward=-118893135.9 actor_loss=0.3187 critic_loss=105623209756.4444 entropy=17.6050 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 116800] reward=-135342708.3 actor_loss=0.3639 critic_loss=2041993080057.0811 entropy=17.5954 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 116800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-418653.3 mean_steps=14.3
|
|
[Episode 116810] reward=-120532267.6 actor_loss=0.3160 critic_loss=109580410333.8667 entropy=17.5814 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 116820] reward=-119200044.8 actor_loss=0.2475 critic_loss=105133040006.0952 entropy=17.5751 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 116820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467501.3 mean_steps=13.7
|
|
[Episode 116830] reward=-119660647.3 actor_loss=0.2158 critic_loss=119280050888.3478 entropy=17.5792 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 116840] reward=-119764375.3 actor_loss=0.3705 critic_loss=117460527706.3529 entropy=17.5890 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 116840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-466386.4 mean_steps=16.6
|
|
[Episode 116850] reward=-111390429.2 actor_loss=0.3740 critic_loss=97479167867.8710 entropy=17.5903 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 116860] reward=-119059146.4 actor_loss=0.2874 critic_loss=104892639232.0000 entropy=17.5863 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 116860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478205.5 mean_steps=14.8
|
|
[Episode 116870] reward=-116523797.8 actor_loss=0.2544 critic_loss=102278709248.0000 entropy=17.5742 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 116880] reward=-113318398.6 actor_loss=0.2833 critic_loss=102593704937.2444 entropy=17.5702 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 116880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509552.7 mean_steps=13.8
|
|
[Episode 116890] reward=-124679898.9 actor_loss=0.3891 critic_loss=121236617352.5333 entropy=17.5731 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 116900] reward=-118495582.8 actor_loss=0.3097 critic_loss=111063318528.0000 entropy=17.5607 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 116900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559105.2 mean_steps=12.8
|
|
[Episode 116910] reward=-118156536.1 actor_loss=0.3194 critic_loss=104160571019.6364 entropy=17.5329 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 116920] reward=-118060341.6 actor_loss=0.3053 critic_loss=102682119021.7143 entropy=17.5341 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 116920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545608.7 mean_steps=13.2
|
|
[Episode 116930] reward=-114596535.1 actor_loss=0.2714 critic_loss=102030180966.4000 entropy=17.5398 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 116940] reward=-115742895.6 actor_loss=0.2469 critic_loss=98704067424.7111 entropy=17.5341 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 116940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489129.5 mean_steps=15.2
|
|
[Episode 116950] reward=-110999611.3 actor_loss=0.3524 critic_loss=99269075646.1714 entropy=17.5207 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 116960] reward=-113957254.2 actor_loss=0.3664 critic_loss=97535306228.6222 entropy=17.5141 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 116960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-561536.0 mean_steps=12.8
|
|
[Episode 116970] reward=-114773054.4 actor_loss=0.3199 critic_loss=101646282114.8445 entropy=17.5249 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 116980] reward=-115330030.1 actor_loss=0.3710 critic_loss=105103031520.7805 entropy=17.5218 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 116980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376094.4 mean_steps=16.3
|
|
[Episode 116990] reward=-1154677853.2 actor_loss=0.2742 critic_loss=2891103337297627.5000 entropy=17.5238 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 117000] reward=-115102285.5 actor_loss=0.2634 critic_loss=102128713363.9111 entropy=17.5147 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 117000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498979.1 mean_steps=15.2
|
|
[Episode 117010] reward=-118980103.9 actor_loss=0.3574 critic_loss=101136953180.1600 entropy=17.5083 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 117020] reward=-115023118.9 actor_loss=0.3316 critic_loss=101605020383.1795 entropy=17.5117 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 117020] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-630738.3 mean_steps=11.0
|
|
[Episode 117030] reward=-970522502.5 actor_loss=0.2581 critic_loss=2010483659572656.2500 entropy=17.5191 approx_kl=-0.0012 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 117040] reward=-119397668.8 actor_loss=0.2951 critic_loss=158879501715.3940 entropy=17.5368 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 117040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542253.5 mean_steps=14.7
|
|
[Episode 117050] reward=-119525860.3 actor_loss=0.3066 critic_loss=176567039590.4000 entropy=17.5432 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 117060] reward=-115682268.6 actor_loss=0.2391 critic_loss=101380559909.9259 entropy=17.5430 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 117060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-525864.8 mean_steps=15.7
|
|
[Episode 117070] reward=-115876183.0 actor_loss=0.3509 critic_loss=100720473039.2381 entropy=17.5448 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 117080] reward=-862888829.0 actor_loss=0.2883 critic_loss=1613372535836217.0000 entropy=17.5569 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 117080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581193.1 mean_steps=13.1
|
|
[Episode 117090] reward=-118583264.2 actor_loss=0.3081 critic_loss=102774830967.4667 entropy=17.5700 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 117100] reward=-118100632.7 actor_loss=0.2908 critic_loss=102028097945.6000 entropy=17.5561 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 117100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-622653.9 mean_steps=12.8
|
|
[Episode 117110] reward=-114115847.8 actor_loss=0.2144 critic_loss=96514978065.0667 entropy=17.5388 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 117120] reward=-119635462.6 actor_loss=0.2914 critic_loss=220928877454.2222 entropy=17.5571 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 117120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458059.2 mean_steps=14.9
|
|
[Episode 117130] reward=-116456140.6 actor_loss=0.2913 critic_loss=100880604752.8421 entropy=17.5546 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 117140] reward=-3269105425.2 actor_loss=0.2753 critic_loss=17745770233311324.0000 entropy=17.5567 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 117140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-508096.2 mean_steps=15.3
|
|
[Episode 117150] reward=-111254311.8 actor_loss=0.2255 critic_loss=113554411402.9714 entropy=17.5551 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 117160] reward=-122004509.4 actor_loss=0.2481 critic_loss=146904665702.4000 entropy=17.5978 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 117160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-636321.5 mean_steps=12.1
|
|
[Episode 117170] reward=-118332891.0 actor_loss=0.3136 critic_loss=109968465510.4000 entropy=17.5942 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 117180] reward=-111735508.5 actor_loss=0.3651 critic_loss=97484708431.6444 entropy=17.6198 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 117180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428403.9 mean_steps=15.8
|
|
[Episode 117190] reward=-118731614.6 actor_loss=0.2507 critic_loss=103870400609.5238 entropy=17.6179 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 117200] reward=-757089726.1 actor_loss=0.3702 critic_loss=1198094505744065.5000 entropy=17.6271 approx_kl=0.0009 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 117200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583712.7 mean_steps=13.6
|
|
[Episode 117210] reward=-123127357.7 actor_loss=0.2918 critic_loss=113663932229.8182 entropy=17.6392 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 117220] reward=-930154641.6 actor_loss=0.2970 critic_loss=1852894513591455.2500 entropy=17.6566 approx_kl=0.0013 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 117220] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-280777.1 mean_steps=17.6
|
|
[Episode 117230] reward=-4327909336.6 actor_loss=12.6018 critic_loss=22584496799690028.0000 entropy=17.6753 approx_kl=0.0031 kl_stop=1 intervention_rate=0.1087 front_blocked=0
|
|
[Episode 117240] reward=-118979322.1 actor_loss=0.2812 critic_loss=108159351714.9091 entropy=17.6800 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 117240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410359.1 mean_steps=15.5
|
|
[Episode 117250] reward=-118901031.0 actor_loss=0.3815 critic_loss=114707218000.8421 entropy=17.6761 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 117260] reward=-118776386.8 actor_loss=0.2236 critic_loss=106560976281.6000 entropy=17.6768 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 117260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479074.7 mean_steps=14.8
|
|
[Episode 117270] reward=-3640226322.8 actor_loss=0.3959 critic_loss=35636628876153468.0000 entropy=17.6730 approx_kl=0.0009 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 117280] reward=-115108986.9 actor_loss=0.2996 critic_loss=106202248169.2444 entropy=17.6681 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 117280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483460.3 mean_steps=15.2
|
|
[Episode 117290] reward=-112704444.0 actor_loss=0.3572 critic_loss=106018092869.8182 entropy=17.6663 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 117300] reward=-115060194.2 actor_loss=0.2826 critic_loss=101363480130.7826 entropy=17.6630 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 117300] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-663497.6 mean_steps=11.6
|
|
[Episode 117310] reward=-120825271.6 actor_loss=0.2051 critic_loss=107996108390.4000 entropy=17.6590 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 117320] reward=-524172921.1 actor_loss=0.1483 critic_loss=589751720025201.7500 entropy=17.6551 approx_kl=0.0004 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 117320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-559980.1 mean_steps=15.1
|
|
[Episode 117330] reward=-120688456.9 actor_loss=0.2884 critic_loss=110980296899.0476 entropy=17.6562 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 117340] reward=-118517589.2 actor_loss=0.2937 critic_loss=145065028741.5652 entropy=17.6618 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 117340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-397804.4 mean_steps=16.2
|
|
[Episode 117350] reward=-156038728.5 actor_loss=0.2755 critic_loss=5414604778882.8447 entropy=17.6683 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 117360] reward=-113813867.5 actor_loss=0.2988 critic_loss=99453964492.8000 entropy=17.6635 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 117360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-392909.7 mean_steps=15.3
|
|
[Episode 117370] reward=-112790308.9 actor_loss=0.4173 critic_loss=103432359695.0588 entropy=17.6591 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 117380] reward=-115756686.1 actor_loss=0.2903 critic_loss=108696324096.0000 entropy=17.6490 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 117380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472352.5 mean_steps=14.2
|
|
[Episode 117390] reward=-118940362.4 actor_loss=0.2299 critic_loss=114064006712.8889 entropy=17.6524 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 117400] reward=-116404499.3 actor_loss=0.3054 critic_loss=109027583646.8965 entropy=17.6549 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 117400] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-622872.3 mean_steps=11.2
|
|
[Episode 117410] reward=-121369987.3 actor_loss=0.3873 critic_loss=127417567124.2105 entropy=17.6545 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 117420] reward=-119248536.1 actor_loss=0.3895 critic_loss=109811328286.7200 entropy=17.6564 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 117420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421743.5 mean_steps=16.5
|
|
[Episode 117430] reward=-117835400.2 actor_loss=0.3404 critic_loss=103376923594.1053 entropy=17.6684 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 117440] reward=-117289379.0 actor_loss=0.2355 critic_loss=110490974149.4857 entropy=17.6766 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 117440] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-299114.5 mean_steps=16.5
|
|
[Episode 117450] reward=-117135504.7 actor_loss=0.3052 critic_loss=106633908854.1538 entropy=17.6767 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 117460] reward=-123306136.6 actor_loss=0.3154 critic_loss=114434314391.7037 entropy=17.6655 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 117460] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-339136.8 mean_steps=17.1
|
|
[Episode 117470] reward=-122415948.1 actor_loss=0.3144 critic_loss=110826445704.9302 entropy=17.6599 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 117480] reward=-121027478.2 actor_loss=0.2428 critic_loss=113066062064.9412 entropy=17.6489 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 117480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570791.9 mean_steps=13.9
|
|
[Episode 117490] reward=-121419603.0 actor_loss=0.2134 critic_loss=113517056215.5789 entropy=17.6430 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 117500] reward=-121852938.0 actor_loss=0.2894 critic_loss=109752707147.8519 entropy=17.6377 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 117500] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-326000.0 mean_steps=16.2
|
|
[Episode 117510] reward=-118761835.0 actor_loss=0.2417 critic_loss=105329558232.1778 entropy=17.6458 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 117520] reward=-113888572.5 actor_loss=0.3536 critic_loss=99166463590.4000 entropy=17.6244 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 117520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461098.7 mean_steps=14.4
|
|
[Episode 117530] reward=-118862530.0 actor_loss=0.2443 critic_loss=107012490152.2286 entropy=17.6109 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 117540] reward=-118213940.5 actor_loss=0.2349 critic_loss=105651086998.5882 entropy=17.6010 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 117540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453822.6 mean_steps=15.9
|
|
[Episode 117550] reward=-122290038.5 actor_loss=0.2577 critic_loss=109991583744.0000 entropy=17.5950 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 117560] reward=-121278711.9 actor_loss=0.2347 critic_loss=106607601891.5556 entropy=17.5944 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 117560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565609.6 mean_steps=13.4
|
|
[Episode 117570] reward=-114389940.5 actor_loss=0.3104 critic_loss=210757121755.4286 entropy=17.6024 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 117580] reward=-120941763.6 actor_loss=0.2281 critic_loss=105710754816.0000 entropy=17.6053 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 117580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416435.0 mean_steps=15.2
|
|
[Episode 117590] reward=-116657463.4 actor_loss=0.3578 critic_loss=103459054861.4737 entropy=17.6085 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 117600] reward=-119112200.8 actor_loss=0.3992 critic_loss=104503258377.4815 entropy=17.5976 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 117600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502936.3 mean_steps=12.8
|
|
[Episode 117610] reward=-122202492.6 actor_loss=0.3195 critic_loss=108521216409.6000 entropy=17.5920 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 117620] reward=-120462316.2 actor_loss=0.2975 critic_loss=122454521173.3333 entropy=17.6005 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 117620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401178.4 mean_steps=15.4
|
|
[Episode 117630] reward=-115668284.4 actor_loss=0.2643 critic_loss=101113426505.1429 entropy=17.5974 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 117640] reward=-120763920.6 actor_loss=0.2933 critic_loss=106993347788.8000 entropy=17.6146 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 117640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-470573.9 mean_steps=13.0
|
|
[Episode 117650] reward=-114650852.3 actor_loss=0.3660 critic_loss=98216790624.8649 entropy=17.6033 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 117660] reward=-118744582.7 actor_loss=0.2015 critic_loss=99402711598.5455 entropy=17.6053 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 117660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492744.3 mean_steps=13.9
|
|
[Episode 117670] reward=-120239297.5 actor_loss=0.2194 critic_loss=105965398874.8387 entropy=17.5981 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 117680] reward=-109404637.9 actor_loss=0.3071 critic_loss=94401823084.0889 entropy=17.6138 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 117680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462747.5 mean_steps=13.6
|
|
[Episode 117690] reward=-112856326.7 actor_loss=0.2006 critic_loss=100529238774.5185 entropy=17.6065 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 117700] reward=-114234365.5 actor_loss=0.3860 critic_loss=105403835112.7273 entropy=17.6058 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 117700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-389957.2 mean_steps=17.1
|
|
[Episode 117710] reward=-123712122.1 actor_loss=0.2588 critic_loss=120324143380.7568 entropy=17.5949 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 117720] reward=-127507843.4 actor_loss=0.2543 critic_loss=245173028540.6316 entropy=17.5950 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 117720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611980.0 mean_steps=12.7
|
|
[Episode 117730] reward=-136921078.9 actor_loss=0.3539 critic_loss=1695674249307.0222 entropy=17.5922 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 117740] reward=-113861923.7 actor_loss=0.3371 critic_loss=101528087130.3529 entropy=17.5802 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 117740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519336.1 mean_steps=14.3
|
|
[Episode 117750] reward=-120917563.7 actor_loss=0.2765 critic_loss=112766688051.2000 entropy=17.5775 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 117760] reward=-116432980.3 actor_loss=0.2997 critic_loss=103129856591.6444 entropy=17.5758 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 117760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528870.4 mean_steps=13.4
|
|
[Episode 117770] reward=-118513597.8 actor_loss=0.2860 critic_loss=106884997939.2000 entropy=17.5672 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 117780] reward=-118085785.1 actor_loss=0.2419 critic_loss=111081412135.3846 entropy=17.5764 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 117780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512821.4 mean_steps=14.0
|
|
[Episode 117790] reward=-115256784.6 actor_loss=0.3084 critic_loss=106020531931.4286 entropy=17.5773 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 117800] reward=-115623668.4 actor_loss=0.3630 critic_loss=103919467633.7778 entropy=17.5802 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 117800] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-311046.8 mean_steps=16.6
|
|
[Episode 117810] reward=-114441934.0 actor_loss=0.2229 critic_loss=104076300196.9778 entropy=17.5742 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 117820] reward=-116007085.8 actor_loss=0.2534 critic_loss=108127534011.7333 entropy=17.5617 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 117820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-586374.6 mean_steps=11.7
|
|
[Episode 117830] reward=-112821831.7 actor_loss=0.4394 critic_loss=108745821570.8445 entropy=17.5696 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 117840] reward=-115523601.7 actor_loss=0.2775 critic_loss=95556074259.6923 entropy=17.5607 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 117840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471140.0 mean_steps=15.2
|
|
[Episode 117850] reward=-118437033.4 actor_loss=0.2777 critic_loss=105573366533.6889 entropy=17.5588 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 117860] reward=-117719332.9 actor_loss=0.2619 critic_loss=104840697901.5111 entropy=17.5513 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 117860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458512.5 mean_steps=15.8
|
|
[Episode 117870] reward=-121999047.9 actor_loss=0.2450 critic_loss=107407034140.4444 entropy=17.5513 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 117880] reward=-120347627.9 actor_loss=0.3118 critic_loss=108996744533.3333 entropy=17.5496 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 117880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563384.5 mean_steps=13.7
|
|
[Episode 117890] reward=-115654642.9 actor_loss=0.2804 critic_loss=101552834332.4444 entropy=17.5552 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 117900] reward=-122422775.5 actor_loss=0.3096 critic_loss=284526135796.6222 entropy=17.5486 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 117900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474658.1 mean_steps=14.9
|
|
[Episode 117910] reward=-117056140.6 actor_loss=0.3056 critic_loss=102445955845.6889 entropy=17.5411 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 117920] reward=-112861282.8 actor_loss=0.2913 critic_loss=99802194648.1778 entropy=17.5386 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 117920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409644.6 mean_steps=16.1
|
|
[Episode 117930] reward=-110851291.0 actor_loss=0.3815 critic_loss=99041084666.3111 entropy=17.5507 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 117940] reward=-111970892.1 actor_loss=0.3491 critic_loss=93786050418.7586 entropy=17.5340 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 117940] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-304501.8 mean_steps=16.7
|
|
[Episode 117950] reward=-115385307.1 actor_loss=0.3015 critic_loss=95563039266.1333 entropy=17.5483 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 117960] reward=-120290967.9 actor_loss=0.2174 critic_loss=114545894058.6667 entropy=17.5526 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 117960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513583.7 mean_steps=14.2
|
|
[Episode 117970] reward=-120467152.8 actor_loss=0.3106 critic_loss=108479358126.8293 entropy=17.5811 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 117980] reward=-132378517.4 actor_loss=0.4913 critic_loss=2382137574339.7646 entropy=17.5900 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 117980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472465.6 mean_steps=14.6
|
|
[Episode 117990] reward=-119498029.4 actor_loss=0.2885 critic_loss=107263081840.6400 entropy=17.5809 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 118000] reward=-113683268.0 actor_loss=0.2551 critic_loss=103420314563.7647 entropy=17.5821 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 118000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507562.7 mean_steps=14.4
|
|
[Episode 118010] reward=-122458954.4 actor_loss=0.2755 critic_loss=117369710910.5778 entropy=17.5795 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 118020] reward=-109026363.6 actor_loss=0.4235 critic_loss=94699251630.0800 entropy=17.5752 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 118020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320691.2 mean_steps=16.9
|
|
[Episode 118030] reward=-119352081.0 actor_loss=0.2358 critic_loss=102434997043.2000 entropy=17.5632 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 118040] reward=-117566070.1 actor_loss=0.3327 critic_loss=103937467392.0000 entropy=17.5686 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 118040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-578998.0 mean_steps=11.4
|
|
[Episode 118050] reward=-112878791.2 actor_loss=0.2690 critic_loss=105657400779.0345 entropy=17.5714 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 118060] reward=-116707337.0 actor_loss=0.3017 critic_loss=102927614537.1429 entropy=17.5683 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 118060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529759.3 mean_steps=13.2
|
|
[Episode 118070] reward=-114017244.6 actor_loss=0.4205 critic_loss=101264039745.4884 entropy=17.5761 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 118080] reward=-110892312.8 actor_loss=0.3376 critic_loss=102460930093.5111 entropy=17.5790 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 118080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642026.1 mean_steps=12.3
|
|
[Episode 118090] reward=-119495005.9 actor_loss=0.3101 critic_loss=104590338366.5778 entropy=17.5676 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 118100] reward=-119115419.8 actor_loss=0.2935 critic_loss=104671746457.6000 entropy=17.5644 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 118100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386299.9 mean_steps=16.3
|
|
[Episode 118110] reward=-119450888.0 actor_loss=0.2736 critic_loss=107070394094.9333 entropy=17.5801 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 118120] reward=-113045577.2 actor_loss=0.2521 critic_loss=99215541733.0526 entropy=17.5842 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 118120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398615.1 mean_steps=16.2
|
|
[Episode 118130] reward=-118612662.7 actor_loss=0.2475 critic_loss=132657203645.2174 entropy=17.5889 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 118140] reward=-115604179.1 actor_loss=0.3571 critic_loss=106403580090.1818 entropy=17.5863 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 118140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386085.3 mean_steps=15.9
|
|
[Episode 118150] reward=-112970044.3 actor_loss=0.2710 critic_loss=100301489766.4000 entropy=17.5979 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 118160] reward=-117930043.4 actor_loss=0.2524 critic_loss=104996762783.2889 entropy=17.5904 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 118160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447940.8 mean_steps=15.4
|
|
[Episode 118170] reward=-114045421.7 actor_loss=0.3200 critic_loss=100629282542.9333 entropy=17.5780 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 118180] reward=-114648329.3 actor_loss=0.2945 critic_loss=101260303384.9756 entropy=17.5751 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 118180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471902.2 mean_steps=14.0
|
|
[Episode 118190] reward=-115071661.2 actor_loss=0.3544 critic_loss=105940941649.1707 entropy=17.5820 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 118200] reward=-113966577.6 actor_loss=0.3021 critic_loss=98614743131.0222 entropy=17.5853 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 118200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435336.7 mean_steps=14.4
|
|
[Episode 118210] reward=-118309572.5 actor_loss=0.3656 critic_loss=105453750590.5778 entropy=17.5947 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 118220] reward=-123814245.7 actor_loss=0.2467 critic_loss=116405572221.1555 entropy=17.5962 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 118220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516109.7 mean_steps=15.2
|
|
[Episode 118230] reward=-121823987.2 actor_loss=0.2471 critic_loss=112503753932.8000 entropy=17.5911 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 118240] reward=-116191273.0 actor_loss=0.2834 critic_loss=110873761319.3846 entropy=17.5991 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 118240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-609537.4 mean_steps=13.8
|
|
[Episode 118250] reward=-114311808.5 actor_loss=0.3109 critic_loss=102396697330.5263 entropy=17.5899 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 118260] reward=-2833460858.3 actor_loss=0.3255 critic_loss=17761090119185932.0000 entropy=17.6087 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 118260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-303466.9 mean_steps=16.4
|
|
[Episode 118270] reward=-1193506425.8 actor_loss=0.4835 critic_loss=3086483781850817.5000 entropy=17.6098 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 118280] reward=-116846249.0 actor_loss=0.3293 critic_loss=112770315059.2000 entropy=17.6307 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 118280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574177.8 mean_steps=13.8
|
|
[Episode 118290] reward=-118510812.8 actor_loss=0.3039 critic_loss=109115183650.1333 entropy=17.6604 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 118300] reward=-1655082051.6 actor_loss=0.2297 critic_loss=4671027975238451.0000 entropy=17.6629 approx_kl=-0.0017 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 118300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-415589.9 mean_steps=14.7
|
|
[Episode 118310] reward=-118493544.5 actor_loss=0.3417 critic_loss=290993262006.8571 entropy=17.6705 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 118320] reward=-2398803864.9 actor_loss=0.2242 critic_loss=12656551719687510.0000 entropy=17.6731 approx_kl=-0.0016 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 118320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-174643594.6 mean_steps=19.8
|
|
[Episode 118330] reward=-4963619337.5 actor_loss=0.3168 critic_loss=30010069318637248.0000 entropy=17.6828 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Episode 118340] reward=-4055003605.3 actor_loss=0.2478 critic_loss=35188124410871080.0000 entropy=17.6831 approx_kl=-0.0020 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 118340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-150849994.3 mean_steps=22.9
|
|
[Episode 118350] reward=-3561037907.2 actor_loss=0.3234 critic_loss=20299324236735740.0000 entropy=17.6919 approx_kl=-0.0009 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 118360] reward=-1065007199.2 actor_loss=0.1621 critic_loss=2415621889280500.5000 entropy=17.7087 approx_kl=-0.0007 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Eval 118360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-158085828.4 mean_steps=21.8
|
|
[Episode 118370] reward=-3233025754.1 actor_loss=0.1938 critic_loss=18600028872842352.0000 entropy=17.7314 approx_kl=-0.0009 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Episode 118380] reward=-639306347.0 actor_loss=15.2769 critic_loss=789596581116859.7500 entropy=17.7345 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 118380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-575159.7 mean_steps=14.2
|
|
[Episode 118390] reward=-1459662619.0 actor_loss=0.1861 critic_loss=4465635116985731.0000 entropy=17.7477 approx_kl=-0.0013 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Episode 118400] reward=-4705706660.5 actor_loss=0.1967 critic_loss=22777784065589248.0000 entropy=17.7459 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1055 front_blocked=0
|
|
[Eval 118400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517423.7 mean_steps=15.2
|
|
[Episode 118410] reward=-9063159919.0 actor_loss=0.1830 critic_loss=75133709935752720.0000 entropy=17.7615 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.0996 front_blocked=0
|
|
[Episode 118420] reward=-8760042423.7 actor_loss=0.1316 critic_loss=54741208363987400.0000 entropy=17.7662 approx_kl=0.0058 kl_stop=0 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 118420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-443950.7 mean_steps=16.4
|
|
[Episode 118430] reward=-17103239106.9 actor_loss=17.7625 critic_loss=155353539804834464.0000 entropy=17.7916 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 118440] reward=-824701403.2 actor_loss=0.2538 critic_loss=1391138256296345.5000 entropy=17.7955 approx_kl=-0.0014 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 118440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459000.7 mean_steps=15.7
|
|
[Episode 118450] reward=-2455333234.1 actor_loss=0.2489 critic_loss=13201871082898500.0000 entropy=17.8031 approx_kl=0.0001 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 118460] reward=-127392240.3 actor_loss=0.3283 critic_loss=241116282880.0000 entropy=17.8257 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 118460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-531339.0 mean_steps=15.2
|
|
[Episode 118470] reward=-124724321.4 actor_loss=0.2736 critic_loss=160496200125.2174 entropy=17.8325 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 118480] reward=-125849933.5 actor_loss=0.2305 critic_loss=130375056384.0000 entropy=17.8352 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 118480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-497764.8 mean_steps=15.7
|
|
[Episode 118490] reward=-120475685.1 actor_loss=0.2785 critic_loss=112419878684.4444 entropy=17.8416 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 118500] reward=-117866502.2 actor_loss=0.2439 critic_loss=109297855365.1200 entropy=17.8426 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 118500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482795.6 mean_steps=15.1
|
|
[Episode 118510] reward=-2485426694.9 actor_loss=0.2592 critic_loss=13490887425657696.0000 entropy=17.8319 approx_kl=0.0001 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 118520] reward=-1236180923.8 actor_loss=0.2843 critic_loss=3586814776396276.5000 entropy=17.8300 approx_kl=-0.0015 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 118520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-635752.9 mean_steps=13.2
|
|
[Episode 118530] reward=-124854801.9 actor_loss=0.2949 critic_loss=127532886016.0000 entropy=17.8602 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 118540] reward=-1093395155.0 actor_loss=0.2534 critic_loss=2579825748694357.5000 entropy=17.8751 approx_kl=0.0016 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 118540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523713.3 mean_steps=14.1
|
|
[Episode 118550] reward=-113415061.2 actor_loss=0.3806 critic_loss=105765176173.7143 entropy=17.8823 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 118560] reward=-116262047.0 actor_loss=0.3235 critic_loss=106601210486.1538 entropy=17.8821 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 118560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-456635.5 mean_steps=15.9
|
|
[Episode 118570] reward=-113531521.0 actor_loss=0.2698 critic_loss=103854842148.5714 entropy=17.8984 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 118580] reward=-122419275.3 actor_loss=0.2402 critic_loss=120663948449.6842 entropy=17.9136 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 118580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-381303.7 mean_steps=15.4
|
|
[Episode 118590] reward=-120268643.5 actor_loss=0.4000 critic_loss=108816027166.1176 entropy=17.9167 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 118600] reward=-121224507.2 actor_loss=0.3605 critic_loss=111630158324.6222 entropy=17.9305 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 118600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467562.2 mean_steps=14.8
|
|
[Episode 118610] reward=-118156539.9 actor_loss=0.2998 critic_loss=114938208256.0000 entropy=17.9308 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 118620] reward=-123485834.0 actor_loss=0.2353 critic_loss=111014523252.3636 entropy=17.9331 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 118620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517555.8 mean_steps=13.3
|
|
[Episode 118630] reward=-119819509.9 actor_loss=0.3804 critic_loss=113200248077.4737 entropy=17.9376 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 118640] reward=-125554064.4 actor_loss=0.2100 critic_loss=112418883642.5143 entropy=17.9362 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 118640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458543.2 mean_steps=14.4
|
|
[Episode 118650] reward=-120979363.8 actor_loss=0.1932 critic_loss=135026500567.0400 entropy=17.9288 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 118660] reward=-126210099.4 actor_loss=0.1927 critic_loss=122440932010.6667 entropy=17.9287 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 118660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-439855.1 mean_steps=16.4
|
|
[Episode 118670] reward=-123998296.6 actor_loss=0.2723 critic_loss=177526750369.6842 entropy=17.9281 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 118680] reward=-123793486.5 actor_loss=0.2848 critic_loss=114019847281.7778 entropy=17.9221 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 118680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383800.9 mean_steps=16.0
|
|
[Episode 118690] reward=-119167819.0 actor_loss=0.3976 critic_loss=116085681555.3939 entropy=17.9208 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 118700] reward=-121014175.7 actor_loss=0.3414 critic_loss=113449980276.3636 entropy=17.9320 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 118700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401933.3 mean_steps=16.4
|
|
[Episode 118710] reward=-118380180.6 actor_loss=0.2950 critic_loss=104098944934.9565 entropy=17.9345 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 118720] reward=-119397645.7 actor_loss=0.3467 critic_loss=111144369447.8222 entropy=17.9221 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 118720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483012.4 mean_steps=15.2
|
|
[Episode 118730] reward=-120528498.1 actor_loss=0.2553 critic_loss=110524694186.6667 entropy=17.9191 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 118740] reward=-125230896.3 actor_loss=0.3328 critic_loss=116697098012.4444 entropy=17.9178 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 118740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-540625.4 mean_steps=15.1
|
|
[Episode 118750] reward=-124043683.2 actor_loss=0.2990 critic_loss=118105700433.9200 entropy=17.9065 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 118760] reward=-115424356.7 actor_loss=0.2179 critic_loss=111127437676.0889 entropy=17.8977 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 118760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537391.9 mean_steps=13.2
|
|
[Episode 118770] reward=-119396376.8 actor_loss=0.2910 critic_loss=107683314748.2353 entropy=17.8881 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 118780] reward=-116158056.9 actor_loss=0.3604 critic_loss=107331790740.2105 entropy=17.8831 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 118780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469921.9 mean_steps=14.7
|
|
[Episode 118790] reward=-119205359.7 actor_loss=0.2824 critic_loss=105896597731.5556 entropy=17.8724 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 118800] reward=-118913773.2 actor_loss=0.2750 critic_loss=109287550789.8182 entropy=17.8650 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 118800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534943.4 mean_steps=13.2
|
|
[Episode 118810] reward=-121483054.4 actor_loss=0.2249 critic_loss=107326816886.1538 entropy=17.8568 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 118820] reward=-120488016.8 actor_loss=0.3349 critic_loss=104539711988.6222 entropy=17.8515 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 118820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603728.6 mean_steps=13.1
|
|
[Episode 118830] reward=-123488507.0 actor_loss=0.2140 critic_loss=108303973800.5854 entropy=17.8423 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 118840] reward=-119375155.8 actor_loss=0.2754 critic_loss=104454591953.4545 entropy=17.8593 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 118840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-409262.0 mean_steps=14.4
|
|
[Episode 118850] reward=-121123578.0 actor_loss=0.3294 critic_loss=125212903785.4118 entropy=17.8345 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 118860] reward=-119664533.3 actor_loss=0.2265 critic_loss=122015760998.4000 entropy=17.8291 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 118860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444796.4 mean_steps=15.4
|
|
[Episode 118870] reward=-122402271.1 actor_loss=0.3044 critic_loss=115624717146.8387 entropy=17.8095 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 118880] reward=-119829455.8 actor_loss=0.2756 critic_loss=115200796747.8519 entropy=17.8094 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 118880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501001.8 mean_steps=14.0
|
|
[Episode 118890] reward=-119058688.2 actor_loss=0.3619 critic_loss=108252172288.0000 entropy=17.8085 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 118900] reward=-122177838.1 actor_loss=0.3016 critic_loss=107839590985.1429 entropy=17.7975 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 118900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572847.9 mean_steps=13.4
|
|
[Episode 118910] reward=-174296619.5 actor_loss=23.0400 critic_loss=10498383343856.9414 entropy=17.8004 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 118920] reward=-118591167.4 actor_loss=0.2611 critic_loss=126732338244.2667 entropy=17.8086 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 118920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-364561.0 mean_steps=15.1
|
|
[Episode 118930] reward=-120125783.3 actor_loss=0.3371 critic_loss=110598572393.4118 entropy=17.7980 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 118940] reward=-119160541.0 actor_loss=0.2278 critic_loss=112382669775.2381 entropy=17.8085 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 118940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-362350.7 mean_steps=15.9
|
|
[Episode 118950] reward=-120653851.6 actor_loss=0.2847 critic_loss=108806552689.7778 entropy=17.8109 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 118960] reward=-122652692.8 actor_loss=0.2350 critic_loss=159580170563.3684 entropy=17.7925 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 118960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484662.4 mean_steps=13.9
|
|
[Episode 118970] reward=-117747516.5 actor_loss=0.2896 critic_loss=102499757442.8445 entropy=17.7883 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 118980] reward=-124539632.2 actor_loss=0.2350 critic_loss=120083010796.3077 entropy=17.7844 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 118980] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-254284.3 mean_steps=18.2
|
|
[Episode 118990] reward=-124053878.6 actor_loss=0.2424 critic_loss=120482811630.9333 entropy=17.7705 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 119000] reward=-119051546.5 actor_loss=0.2999 critic_loss=113169706734.9333 entropy=17.7639 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 119000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511017.8 mean_steps=14.2
|
|
[Episode 119010] reward=-119982377.1 actor_loss=0.3009 critic_loss=111414720999.6190 entropy=17.7609 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 119020] reward=-119904895.8 actor_loss=0.2463 critic_loss=106022793052.1600 entropy=17.7641 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 119020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464436.7 mean_steps=15.4
|
|
[Episode 119030] reward=-123332607.1 actor_loss=0.2977 critic_loss=114882137656.8889 entropy=17.7709 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 119040] reward=-119548681.9 actor_loss=0.2824 critic_loss=106982333395.4783 entropy=17.7633 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 119040] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-688039.7 mean_steps=11.6
|
|
[Episode 119050] reward=-123829650.9 actor_loss=0.2586 critic_loss=109401812101.5652 entropy=17.7541 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 119060] reward=-125884955.9 actor_loss=0.1988 critic_loss=122691006976.0000 entropy=17.7596 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 119060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553754.8 mean_steps=14.0
|
|
[Episode 119070] reward=-118907355.7 actor_loss=0.3109 critic_loss=104506418614.8571 entropy=17.7554 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 119080] reward=-114859588.6 actor_loss=0.3487 critic_loss=106918328687.5897 entropy=17.7621 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 119080] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-216917.4 mean_steps=17.7
|
|
[Episode 119090] reward=-121123142.4 actor_loss=0.1977 critic_loss=119801092029.9355 entropy=17.7740 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 119100] reward=-119389570.3 actor_loss=0.3668 critic_loss=104322488897.6410 entropy=17.7855 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 119100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-564067.9 mean_steps=14.4
|
|
[Episode 119110] reward=-117420228.8 actor_loss=0.2533 critic_loss=114048506083.5556 entropy=17.7882 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 119120] reward=-122487058.6 actor_loss=0.2796 critic_loss=114421676681.3659 entropy=17.7902 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 119120] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353946.8 mean_steps=17.0
|
|
[Episode 119130] reward=-118457840.7 actor_loss=0.3071 critic_loss=107889256261.8182 entropy=17.7933 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 119140] reward=-114311749.8 actor_loss=0.3446 critic_loss=103676650216.7273 entropy=17.8038 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 119140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-514055.5 mean_steps=15.8
|
|
[Episode 119150] reward=-121841792.5 actor_loss=0.2341 critic_loss=109877259170.9091 entropy=17.8025 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 119160] reward=-121788259.3 actor_loss=0.2495 critic_loss=112110878485.9429 entropy=17.8045 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 119160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-690892.8 mean_steps=12.6
|
|
[Episode 119170] reward=-122255641.9 actor_loss=0.3350 critic_loss=164728796610.5600 entropy=17.8161 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 119180] reward=-119690958.2 actor_loss=0.3142 critic_loss=120575187899.7333 entropy=17.8174 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 119180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578841.7 mean_steps=12.8
|
|
[Episode 119190] reward=-157240848.6 actor_loss=0.2779 critic_loss=4602705098979.5557 entropy=17.8359 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 119200] reward=-120963530.5 actor_loss=0.2368 critic_loss=117354660584.7273 entropy=17.8425 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 119200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501509.8 mean_steps=13.8
|
|
[Episode 119210] reward=-630297990.6 actor_loss=0.4102 critic_loss=736538687712642.8750 entropy=17.8434 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 119220] reward=-978684783.9 actor_loss=0.2907 critic_loss=2431206547958442.5000 entropy=17.8509 approx_kl=-0.0016 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 119220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490152.8 mean_steps=14.8
|
|
[Episode 119230] reward=-123992864.5 actor_loss=0.2763 critic_loss=121135020819.6923 entropy=17.8481 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 119240] reward=-121813731.8 actor_loss=0.2614 critic_loss=114137913753.6000 entropy=17.8529 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 119240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554068.4 mean_steps=13.5
|
|
[Episode 119250] reward=-122704900.3 actor_loss=0.2169 critic_loss=124362323373.4194 entropy=17.8552 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 119260] reward=-2653797813.5 actor_loss=0.2898 critic_loss=14878109117681208.0000 entropy=17.8679 approx_kl=-0.0010 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 119260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481561.1 mean_steps=13.9
|
|
[Episode 119270] reward=-118445589.2 actor_loss=0.3863 critic_loss=110238013763.3684 entropy=17.8586 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 119280] reward=-120931160.1 actor_loss=0.2374 critic_loss=108015620870.9189 entropy=17.8615 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 119280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570538.0 mean_steps=13.7
|
|
[Episode 119290] reward=-124082054.3 actor_loss=0.3634 critic_loss=123695838339.2821 entropy=17.8600 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 119300] reward=-118320833.8 actor_loss=0.3268 critic_loss=108534393332.6222 entropy=17.8541 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 119300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463437.8 mean_steps=15.4
|
|
[Episode 119310] reward=-119044747.3 actor_loss=0.3327 critic_loss=173798777742.2222 entropy=17.8541 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 119320] reward=-649057104.9 actor_loss=0.4558 critic_loss=778721160208930.1250 entropy=17.8615 approx_kl=-0.0026 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 119320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508027.3 mean_steps=13.2
|
|
[Episode 119330] reward=-116278661.0 actor_loss=0.4462 critic_loss=110794060595.2000 entropy=17.8595 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 119340] reward=-115586110.2 actor_loss=0.2910 critic_loss=114662377683.8621 entropy=17.8596 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 119340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484490.2 mean_steps=14.1
|
|
[Episode 119350] reward=-117517635.8 actor_loss=0.3356 critic_loss=106098243265.4222 entropy=17.8569 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 119360] reward=-120658612.4 actor_loss=0.2762 critic_loss=106011959296.0000 entropy=17.8419 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 119360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-566580.0 mean_steps=11.5
|
|
[Episode 119370] reward=-117345029.3 actor_loss=0.2984 critic_loss=108397202157.2683 entropy=17.8327 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 119380] reward=-978070007.8 actor_loss=0.3957 critic_loss=2062396191081904.2500 entropy=17.8309 approx_kl=-0.0002 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 119380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365349.5 mean_steps=16.0
|
|
[Episode 119390] reward=-2327346924.1 actor_loss=0.2889 critic_loss=11941748162860010.0000 entropy=17.8459 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 119400] reward=-120136409.2 actor_loss=0.2855 critic_loss=110129833445.0526 entropy=17.8266 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 119400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513858.6 mean_steps=12.8
|
|
[Episode 119410] reward=-109877943.2 actor_loss=0.3218 critic_loss=97093406264.8889 entropy=17.8394 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 119420] reward=-1521952074.5 actor_loss=2.5605 critic_loss=4924113298653184.0000 entropy=17.8388 approx_kl=0.0042 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 119420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395416.7 mean_steps=16.4
|
|
[Episode 119430] reward=-1089553534.1 actor_loss=0.2894 critic_loss=3239759198388952.0000 entropy=17.8408 approx_kl=0.0005 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 119440] reward=-121291008.0 actor_loss=0.3308 critic_loss=117589775889.6552 entropy=17.8435 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 119440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515034.1 mean_steps=14.1
|
|
[Episode 119450] reward=-112140134.9 actor_loss=0.3799 critic_loss=104539229967.0588 entropy=17.8404 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 119460] reward=-118216392.3 actor_loss=0.2203 critic_loss=105818924828.4444 entropy=17.8695 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 119460] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-367197.1 mean_steps=17.1
|
|
[Episode 119470] reward=-120222964.0 actor_loss=0.2463 critic_loss=108564975491.8788 entropy=17.8670 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 119480] reward=-121413278.4 actor_loss=0.3205 critic_loss=112420828046.2222 entropy=17.8572 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 119480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366905.1 mean_steps=16.0
|
|
[Episode 119490] reward=-120594342.8 actor_loss=0.2749 critic_loss=109656321462.8571 entropy=17.8399 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 119500] reward=-117249129.0 actor_loss=0.3006 critic_loss=112085575207.3846 entropy=17.8391 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 119500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455710.7 mean_steps=14.8
|
|
[Episode 119510] reward=-118503000.1 actor_loss=0.3055 critic_loss=113213362267.0222 entropy=17.8611 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 119520] reward=-121715518.2 actor_loss=0.1995 critic_loss=108881427023.6444 entropy=17.8745 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 119520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496949.3 mean_steps=14.2
|
|
[Episode 119530] reward=-121143092.1 actor_loss=0.3902 critic_loss=116389442998.8571 entropy=17.8673 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 119540] reward=-1211925193.4 actor_loss=0.3566 critic_loss=3226931950471668.5000 entropy=17.8859 approx_kl=-0.0006 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 119540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-644514.2 mean_steps=13.2
|
|
[Episode 119550] reward=-114400106.5 actor_loss=0.3039 critic_loss=100707887058.4889 entropy=17.8803 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 119560] reward=-118707820.2 actor_loss=0.3116 critic_loss=105212458077.0909 entropy=17.8766 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 119560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480756.2 mean_steps=15.0
|
|
[Episode 119570] reward=-117820620.3 actor_loss=0.2998 critic_loss=105831785540.2667 entropy=17.8690 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 119580] reward=-475067071.3 actor_loss=0.3922 critic_loss=404975492530176.0000 entropy=17.8367 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 119580] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-261269.0 mean_steps=18.6
|
|
[Episode 119590] reward=-116795092.8 actor_loss=0.3348 critic_loss=108983515818.6667 entropy=17.8374 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 119600] reward=-120864160.6 actor_loss=0.3422 critic_loss=127937183516.4444 entropy=17.8624 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 119600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513047.6 mean_steps=13.5
|
|
[Episode 119610] reward=-118020534.2 actor_loss=0.2883 critic_loss=113308089457.7778 entropy=17.8679 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 119620] reward=-120127727.8 actor_loss=0.2806 critic_loss=113459948566.7556 entropy=17.8754 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 119620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-524025.7 mean_steps=15.3
|
|
[Episode 119630] reward=-119570829.9 actor_loss=0.2248 critic_loss=114663534318.9333 entropy=17.8873 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 119640] reward=-117736903.7 actor_loss=0.2994 critic_loss=110677858167.4667 entropy=17.8878 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 119640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589249.0 mean_steps=12.4
|
|
[Episode 119650] reward=-118148495.4 actor_loss=0.2078 critic_loss=113192774899.8095 entropy=17.8793 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 119660] reward=-123671069.8 actor_loss=0.2741 critic_loss=122291552958.1714 entropy=17.8725 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 119660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-629102.8 mean_steps=12.8
|
|
[Episode 119670] reward=-116080716.8 actor_loss=0.2922 critic_loss=105109218072.7742 entropy=17.8871 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 119680] reward=-5967678612.3 actor_loss=0.2304 critic_loss=40298725276012456.0000 entropy=17.8946 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1068 front_blocked=0
|
|
[Eval 119680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476920.6 mean_steps=14.1
|
|
[Episode 119690] reward=-117533700.8 actor_loss=0.2827 critic_loss=106255947483.4286 entropy=17.8913 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 119700] reward=-123369422.1 actor_loss=0.3961 critic_loss=117553986861.1765 entropy=17.8689 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 119700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-359597.4 mean_steps=17.5
|
|
[Episode 119710] reward=-115876998.1 actor_loss=0.3279 critic_loss=106370482176.0000 entropy=17.8727 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 119720] reward=-113964952.4 actor_loss=0.3266 critic_loss=105279649837.5111 entropy=17.8576 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 119720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598941.3 mean_steps=12.7
|
|
[Episode 119730] reward=-119759586.5 actor_loss=0.3483 critic_loss=110609154594.1333 entropy=17.8522 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 119740] reward=-122543426.2 actor_loss=0.2716 critic_loss=111641936808.2286 entropy=17.8668 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 119740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561149.1 mean_steps=14.4
|
|
[Episode 119750] reward=-119019123.8 actor_loss=0.2142 critic_loss=110907087471.3044 entropy=17.8544 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 119760] reward=-122838030.0 actor_loss=0.2543 critic_loss=111600739998.8965 entropy=17.8487 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 119760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592067.4 mean_steps=13.6
|
|
[Episode 119770] reward=-120245089.7 actor_loss=0.3753 critic_loss=119225566822.4000 entropy=17.8368 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 119780] reward=-118674787.1 actor_loss=0.3004 critic_loss=107552394146.9091 entropy=17.8378 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 119780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471935.6 mean_steps=14.9
|
|
[Episode 119790] reward=-126889301.1 actor_loss=0.2725 critic_loss=167667664896.0000 entropy=17.8321 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 119800] reward=-117679522.1 actor_loss=0.3599 critic_loss=105512706560.0000 entropy=17.8236 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 119800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562540.5 mean_steps=12.2
|
|
[Episode 119810] reward=-122771985.0 actor_loss=0.2385 critic_loss=115015386794.6667 entropy=17.8309 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 119820] reward=-917462869.6 actor_loss=0.3055 critic_loss=2070172294988868.2500 entropy=17.8308 approx_kl=-0.0008 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 119820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486009.8 mean_steps=14.0
|
|
[Episode 119830] reward=-3365802597.1 actor_loss=0.3984 critic_loss=24302004142883908.0000 entropy=17.8540 approx_kl=-0.0008 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 119840] reward=-115601190.8 actor_loss=0.3152 critic_loss=110096902531.4595 entropy=17.8800 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 119840] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-693536.8 mean_steps=11.3
|
|
[Episode 119850] reward=-3548694745.9 actor_loss=0.2010 critic_loss=26982176072843628.0000 entropy=17.8869 approx_kl=-0.0013 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 119860] reward=-3894184331.9 actor_loss=0.2537 critic_loss=18097030793084564.0000 entropy=17.9148 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Eval 119860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438074.7 mean_steps=14.1
|
|
[Episode 119870] reward=-114284391.0 actor_loss=0.3519 critic_loss=108810217579.7895 entropy=17.9224 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 119880] reward=-4994793374.7 actor_loss=0.3457 critic_loss=52026209784992744.0000 entropy=17.9236 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 119880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641007.3 mean_steps=12.1
|
|
[Episode 119890] reward=-119693430.5 actor_loss=0.2110 critic_loss=121280692464.9412 entropy=17.9239 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 119900] reward=-467899153.6 actor_loss=0.3360 critic_loss=339165794294533.6875 entropy=17.9613 approx_kl=-0.0002 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 119900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539267.3 mean_steps=13.8
|
|
[Episode 119910] reward=-773270010.4 actor_loss=0.1962 critic_loss=1214669005252926.5000 entropy=17.9708 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 119920] reward=-1262089663.8 actor_loss=0.3820 critic_loss=3467295243720021.5000 entropy=17.9663 approx_kl=-0.0004 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 119920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544042.2 mean_steps=14.2
|
|
[Episode 119930] reward=-2357173711.0 actor_loss=0.2753 critic_loss=12207381689570282.0000 entropy=17.9599 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 119940] reward=-117362399.6 actor_loss=0.2028 critic_loss=108853674465.8824 entropy=17.9632 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 119940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371675.9 mean_steps=15.8
|
|
[Episode 119950] reward=-812050439.4 actor_loss=0.3042 critic_loss=1350004253917184.0000 entropy=17.9799 approx_kl=-0.0018 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 119960] reward=-120191433.4 actor_loss=0.2738 critic_loss=108951649226.1053 entropy=17.9958 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 119960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497826.6 mean_steps=14.0
|
|
[Episode 119970] reward=-2549610364.2 actor_loss=0.2716 critic_loss=14244099813201966.0000 entropy=17.9864 approx_kl=0.0003 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 119980] reward=-120698025.2 actor_loss=0.2824 critic_loss=119337858692.7407 entropy=17.9817 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 119980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553771.7 mean_steps=14.2
|
|
[Episode 119990] reward=-116412000.3 actor_loss=0.3071 critic_loss=120677511987.2000 entropy=17.9722 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 120000] reward=-120390684.0 actor_loss=0.2506 critic_loss=124169294080.0000 entropy=17.9741 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 120000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-343058.2 mean_steps=16.9
|
|
[Episode 120010] reward=-2007205701.7 actor_loss=0.3280 critic_loss=8928117560949692.0000 entropy=17.9845 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 120020] reward=-2002772782.2 actor_loss=0.2707 critic_loss=8813243804919034.0000 entropy=17.9934 approx_kl=-0.0013 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 120020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517757.5 mean_steps=15.1
|
|
[Episode 120030] reward=-7861104863.6 actor_loss=0.1821 critic_loss=71108608606193440.0000 entropy=17.9959 approx_kl=0.0005 kl_stop=0 intervention_rate=0.1048 front_blocked=0
|
|
[Episode 120040] reward=-127074043.9 actor_loss=0.1649 critic_loss=131743741713.8605 entropy=18.0083 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 120040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410446.4 mean_steps=14.9
|
|
[Episode 120050] reward=-3249191075.5 actor_loss=0.1726 critic_loss=22652217630561256.0000 entropy=18.0209 approx_kl=-0.0008 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Episode 120060] reward=-121266012.4 actor_loss=0.3224 critic_loss=121422477312.0000 entropy=18.0264 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 120060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-651147.8 mean_steps=12.4
|
|
[Episode 120070] reward=-119713130.5 actor_loss=0.3111 critic_loss=113930383360.0000 entropy=18.0403 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 120080] reward=-122160588.3 actor_loss=0.2768 critic_loss=123872966701.5111 entropy=18.0243 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 120080] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-305093.0 mean_steps=17.9
|
|
[Episode 120090] reward=-121310382.5 actor_loss=0.2404 critic_loss=119003596337.5484 entropy=18.0249 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 120100] reward=-120732463.8 actor_loss=0.2863 critic_loss=113332954180.2667 entropy=17.9958 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 120100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520670.1 mean_steps=14.2
|
|
[Episode 120110] reward=-2951460770.5 actor_loss=0.2796 critic_loss=19030388962451640.0000 entropy=17.9591 approx_kl=0.0002 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 120120] reward=-122876732.8 actor_loss=0.2751 critic_loss=115766827727.5676 entropy=17.9590 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 120120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434161.1 mean_steps=15.7
|
|
[Episode 120130] reward=-119815919.2 actor_loss=0.2788 critic_loss=115217551459.9024 entropy=17.9621 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 120140] reward=-119415108.1 actor_loss=0.2545 critic_loss=114541188881.8605 entropy=17.9516 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 120140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-476067.4 mean_steps=13.2
|
|
[Episode 120150] reward=-120862181.5 actor_loss=0.2418 critic_loss=113160746253.4737 entropy=17.9475 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 120160] reward=-118939400.2 actor_loss=0.2818 critic_loss=115474667074.7826 entropy=17.9688 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 120160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635778.6 mean_steps=12.1
|
|
[Episode 120170] reward=-2185689154.7 actor_loss=0.2847 critic_loss=10586146890993572.0000 entropy=17.9786 approx_kl=-0.0001 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 120180] reward=-119733078.4 actor_loss=0.2324 critic_loss=115603822525.9355 entropy=17.9831 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 120180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480370.8 mean_steps=13.7
|
|
[Episode 120190] reward=-129441137.3 actor_loss=0.1604 critic_loss=326904964055.0400 entropy=17.9848 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 120200] reward=-143475953.3 actor_loss=0.3006 critic_loss=2156410592109.7144 entropy=17.9716 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 120200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-564322.1 mean_steps=12.8
|
|
[Episode 120210] reward=-116986085.7 actor_loss=0.2712 critic_loss=110516679293.1555 entropy=17.9710 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 120220] reward=-121867539.2 actor_loss=0.2761 critic_loss=115269893120.0000 entropy=17.9528 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 120220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536514.0 mean_steps=14.2
|
|
[Episode 120230] reward=-121060863.5 actor_loss=0.2612 critic_loss=124500756070.4000 entropy=17.9278 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 120240] reward=-118372333.0 actor_loss=0.3455 critic_loss=111657777561.6000 entropy=17.9257 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 120240] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-287911.9 mean_steps=17.1
|
|
[Episode 120250] reward=-122424674.9 actor_loss=0.2990 critic_loss=123158827281.0667 entropy=17.9538 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 120260] reward=-119301770.3 actor_loss=0.1706 critic_loss=108653166405.8182 entropy=17.9435 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 120260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486958.7 mean_steps=14.8
|
|
[Episode 120270] reward=-113493738.1 actor_loss=0.2313 critic_loss=102628544420.9778 entropy=17.9269 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 120280] reward=-117749103.1 actor_loss=0.2829 critic_loss=108598427940.5714 entropy=17.9227 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 120280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-674277.7 mean_steps=13.6
|
|
[Episode 120290] reward=-115253605.4 actor_loss=0.2981 critic_loss=98435837496.8889 entropy=17.9086 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 120300] reward=-121513573.9 actor_loss=0.2335 critic_loss=114560070815.2889 entropy=17.8964 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 120300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454015.4 mean_steps=15.4
|
|
[Episode 120310] reward=-114459654.1 actor_loss=0.3661 critic_loss=102518745406.5778 entropy=17.8387 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 120320] reward=-119937314.5 actor_loss=0.3229 critic_loss=108553772509.8667 entropy=17.8064 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 120320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474096.0 mean_steps=14.7
|
|
[Episode 120330] reward=-119534995.0 actor_loss=0.2610 critic_loss=104730565563.7333 entropy=17.7828 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 120340] reward=-116447801.0 actor_loss=0.3163 critic_loss=107573510326.0444 entropy=17.7663 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 120340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-667746.3 mean_steps=12.9
|
|
[Episode 120350] reward=-114861029.7 actor_loss=0.3386 critic_loss=102461592105.5135 entropy=17.7579 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 120360] reward=-123812126.4 actor_loss=0.2361 critic_loss=113991928242.4242 entropy=17.7619 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 120360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446422.0 mean_steps=14.3
|
|
[Episode 120370] reward=-118366244.0 actor_loss=0.3997 critic_loss=106012756755.6923 entropy=17.7720 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 120380] reward=-118955826.0 actor_loss=0.2166 critic_loss=105535289441.5238 entropy=17.7744 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 120380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465921.2 mean_steps=14.2
|
|
[Episode 120390] reward=-112025346.8 actor_loss=0.4784 critic_loss=107079909922.1333 entropy=17.7636 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 120400] reward=-847343269.7 actor_loss=0.3588 critic_loss=1514544014073674.0000 entropy=17.7717 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 120400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562142.4 mean_steps=12.4
|
|
[Episode 120410] reward=-119312065.3 actor_loss=0.2570 critic_loss=106942772645.6471 entropy=17.7796 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 120420] reward=-121628822.2 actor_loss=0.3168 critic_loss=106867110229.3333 entropy=17.7879 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 120420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-476845.9 mean_steps=12.6
|
|
[Episode 120430] reward=-114501163.9 actor_loss=0.4005 critic_loss=107518002790.4000 entropy=17.7834 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 120440] reward=-117604386.2 actor_loss=0.2626 critic_loss=108432085170.0870 entropy=17.7885 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 120440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404067.3 mean_steps=16.1
|
|
[Episode 120450] reward=-348131539.9 actor_loss=0.3725 critic_loss=162656425532269.7188 entropy=17.8063 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 120460] reward=-872427841.2 actor_loss=0.2313 critic_loss=1590424272051313.7500 entropy=17.8094 approx_kl=0.0007 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 120460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-594375.4 mean_steps=11.8
|
|
[Episode 120470] reward=-123766830.5 actor_loss=0.2066 critic_loss=118360993011.8095 entropy=17.8226 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 120480] reward=-115816958.6 actor_loss=0.3307 critic_loss=113969018743.4667 entropy=17.8316 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 120480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-399292.8 mean_steps=16.7
|
|
[Episode 120490] reward=-119200035.6 actor_loss=0.2610 critic_loss=110877198848.0000 entropy=17.8413 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 120500] reward=-118526755.1 actor_loss=0.3163 critic_loss=115422887424.0000 entropy=17.8405 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 120500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-670599.9 mean_steps=12.2
|
|
[Episode 120510] reward=-119102936.2 actor_loss=0.2730 critic_loss=108676362435.0476 entropy=17.8481 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 120520] reward=-119317580.8 actor_loss=0.2851 critic_loss=105082020386.1333 entropy=17.8431 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 120520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533688.2 mean_steps=13.2
|
|
[Episode 120530] reward=-116260846.7 actor_loss=0.2716 critic_loss=111323808836.2667 entropy=17.8354 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 120540] reward=-122977951.3 actor_loss=0.2062 critic_loss=113354089540.2667 entropy=17.8311 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 120540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553173.3 mean_steps=14.5
|
|
[Episode 120550] reward=-119719222.9 actor_loss=0.2470 critic_loss=109730501330.8235 entropy=17.8337 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 120560] reward=-120272525.3 actor_loss=0.2982 critic_loss=104874530061.4737 entropy=17.8282 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 120560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510260.3 mean_steps=13.2
|
|
[Episode 120570] reward=-122634862.8 actor_loss=0.2272 critic_loss=112921705585.7778 entropy=17.8285 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 120580] reward=-120166648.3 actor_loss=0.2864 critic_loss=111893093841.4545 entropy=17.7934 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 120580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523577.0 mean_steps=14.1
|
|
[Episode 120590] reward=-122240713.1 actor_loss=0.3219 critic_loss=108578592888.4706 entropy=17.7985 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 120600] reward=-121030087.6 actor_loss=0.2595 critic_loss=112794693001.8462 entropy=17.7900 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 120600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425132.6 mean_steps=15.3
|
|
[Episode 120610] reward=-129226399.9 actor_loss=0.2732 critic_loss=683642734268.6316 entropy=17.7789 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 120620] reward=-112067497.0 actor_loss=0.3677 critic_loss=102862556851.8919 entropy=17.7606 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 120620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440118.2 mean_steps=14.4
|
|
[Episode 120630] reward=-120800573.7 actor_loss=0.4008 critic_loss=111118810820.9231 entropy=17.7492 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 120640] reward=-117396172.3 actor_loss=0.2352 critic_loss=101087923996.4444 entropy=17.7406 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 120640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379244.5 mean_steps=15.8
|
|
[Episode 120650] reward=-122073178.0 actor_loss=0.3670 critic_loss=112893012179.8621 entropy=17.7548 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 120660] reward=-120880257.1 actor_loss=0.2519 critic_loss=105961821846.5882 entropy=17.7377 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 120660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567356.9 mean_steps=13.6
|
|
[Episode 120670] reward=-120776513.0 actor_loss=0.2489 critic_loss=104257001517.5111 entropy=17.7339 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 120680] reward=-117105483.5 actor_loss=0.2931 critic_loss=112878407920.9412 entropy=17.7429 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 120680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486164.2 mean_steps=13.8
|
|
[Episode 120690] reward=-116358247.2 actor_loss=0.4552 critic_loss=119246652757.3333 entropy=17.7580 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 120700] reward=-121881598.4 actor_loss=0.2371 critic_loss=110286550964.1481 entropy=17.7523 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 120700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600798.7 mean_steps=12.6
|
|
[Episode 120710] reward=-116285562.6 actor_loss=0.3685 critic_loss=109343843072.0000 entropy=17.7458 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 120720] reward=-116118135.1 actor_loss=0.3256 critic_loss=104619036572.0976 entropy=17.7497 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 120720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587283.3 mean_steps=13.4
|
|
[Episode 120730] reward=-135925833.6 actor_loss=0.3168 critic_loss=1907824027238.3999 entropy=17.7506 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 120740] reward=-116924434.0 actor_loss=0.2765 critic_loss=102925274659.7209 entropy=17.7632 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 120740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-57177914.4 mean_steps=19.3
|
|
[Episode 120750] reward=-114154271.7 actor_loss=0.2750 critic_loss=114921676800.0000 entropy=17.7608 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 120760] reward=-117337949.9 actor_loss=0.3311 critic_loss=105930748723.2000 entropy=17.7643 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 120760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562852.8 mean_steps=13.5
|
|
[Episode 120770] reward=-115907366.3 actor_loss=0.3603 critic_loss=104141718150.7368 entropy=17.7675 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 120780] reward=-118308241.2 actor_loss=0.2440 critic_loss=107518455697.2973 entropy=17.7730 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 120780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395914.5 mean_steps=16.1
|
|
[Episode 120790] reward=-121307966.1 actor_loss=0.2503 critic_loss=112228825673.1429 entropy=17.7651 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 120800] reward=-119946814.6 actor_loss=0.3076 critic_loss=116162343367.1111 entropy=17.7529 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 120800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359315.4 mean_steps=15.7
|
|
[Episode 120810] reward=-115257799.0 actor_loss=0.3368 critic_loss=104976682263.2727 entropy=17.7583 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 120820] reward=-215839866.9 actor_loss=0.2573 critic_loss=33203790888595.9102 entropy=17.7621 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 120820] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-644328.2 mean_steps=11.4
|
|
[Episode 120830] reward=-420631877.3 actor_loss=0.2311 critic_loss=220090419397700.2812 entropy=17.8206 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 120840] reward=-115912072.2 actor_loss=0.3555 critic_loss=103827553211.7333 entropy=17.8236 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 120840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-383569.1 mean_steps=15.1
|
|
[Episode 120850] reward=-117642029.2 actor_loss=0.2599 critic_loss=100354469888.0000 entropy=17.8340 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 120860] reward=-116820104.8 actor_loss=0.3479 critic_loss=104636076557.1282 entropy=17.8247 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 120860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-495043.8 mean_steps=12.2
|
|
[Episode 120870] reward=-118562208.7 actor_loss=0.2002 critic_loss=128534354569.3659 entropy=17.8049 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 120880] reward=-119160662.6 actor_loss=0.3296 critic_loss=114052775936.0000 entropy=17.7921 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 120880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445047.3 mean_steps=14.6
|
|
[Episode 120890] reward=-115887156.6 actor_loss=0.3236 critic_loss=104196660049.1707 entropy=17.7933 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 120900] reward=-112708504.9 actor_loss=0.3069 critic_loss=109943117141.3333 entropy=17.8136 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 120900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-362970.4 mean_steps=14.9
|
|
[Episode 120910] reward=-117913082.2 actor_loss=0.3298 critic_loss=113522564729.9048 entropy=17.8010 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 120920] reward=-121638346.4 actor_loss=0.2737 critic_loss=111515500544.0000 entropy=17.7957 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 120920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419048.5 mean_steps=15.4
|
|
[Episode 120930] reward=-120726634.7 actor_loss=0.2856 critic_loss=112833933403.0222 entropy=17.7832 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 120940] reward=-119182907.1 actor_loss=0.2897 critic_loss=110292265779.2000 entropy=17.7796 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 120940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-626833.0 mean_steps=13.8
|
|
[Episode 120950] reward=-114640714.9 actor_loss=0.2935 critic_loss=95901202113.4222 entropy=17.7958 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 120960] reward=-122286125.7 actor_loss=0.2571 critic_loss=118850043221.3333 entropy=17.7731 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 120960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547915.1 mean_steps=13.6
|
|
[Episode 120970] reward=-123293754.6 actor_loss=0.2625 critic_loss=114761144477.5385 entropy=17.7677 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 120980] reward=-114265542.0 actor_loss=0.3388 critic_loss=98909212672.0000 entropy=17.7516 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 120980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535443.9 mean_steps=12.9
|
|
[Episode 120990] reward=-123902587.1 actor_loss=0.2302 critic_loss=109746527292.2353 entropy=17.7432 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 121000] reward=-117751121.5 actor_loss=0.2503 critic_loss=103046503719.8222 entropy=17.7433 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 121000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502994.2 mean_steps=14.7
|
|
[Episode 121010] reward=-125057610.0 actor_loss=0.1933 critic_loss=108255487590.4000 entropy=17.7411 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 121020] reward=-122839142.7 actor_loss=0.3265 critic_loss=105315390259.2000 entropy=17.7352 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 121020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-412515.4 mean_steps=13.8
|
|
[Episode 121030] reward=-119391240.6 actor_loss=0.2068 critic_loss=110354701312.0000 entropy=17.7294 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 121040] reward=-117559477.5 actor_loss=0.3887 critic_loss=103844036864.0000 entropy=17.7250 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 121040] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332011.6 mean_steps=16.3
|
|
[Episode 121050] reward=-118195585.0 actor_loss=0.2900 critic_loss=103567781432.8889 entropy=17.7249 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 121060] reward=-117022167.6 actor_loss=0.2797 critic_loss=98300314874.3111 entropy=17.7320 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 121060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-639195.3 mean_steps=12.0
|
|
[Episode 121070] reward=-123349647.4 actor_loss=0.2170 critic_loss=110906622316.0889 entropy=17.7422 approx_kl=0.0114 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 121080] reward=-122083982.3 actor_loss=0.1393 critic_loss=108931583836.1600 entropy=17.7394 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 121080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-506329.5 mean_steps=14.7
|
|
[Episode 121090] reward=-122273797.5 actor_loss=0.3423 critic_loss=105756878848.0000 entropy=17.7503 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 121100] reward=-119237739.0 actor_loss=0.4271 critic_loss=106844012171.6364 entropy=17.7497 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 121100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411100.9 mean_steps=15.0
|
|
[Episode 121110] reward=-116069939.5 actor_loss=0.2901 critic_loss=106793112613.9259 entropy=17.7536 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 121120] reward=-119254439.2 actor_loss=0.2441 critic_loss=107865283539.4783 entropy=17.7481 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 121120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447253.7 mean_steps=14.4
|
|
[Episode 121130] reward=-119497063.9 actor_loss=0.2184 critic_loss=102729199757.2414 entropy=17.7557 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 121140] reward=-124023663.0 actor_loss=0.2637 critic_loss=120376703348.3636 entropy=17.7528 approx_kl=0.0114 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 121140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422088.0 mean_steps=15.8
|
|
[Episode 121150] reward=-122056038.7 actor_loss=0.3773 critic_loss=109729997064.2581 entropy=17.7517 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 121160] reward=-121707738.4 actor_loss=0.2523 critic_loss=109560310988.8000 entropy=17.7444 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 121160] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-744834.1 mean_steps=11.4
|
|
[Episode 121170] reward=-119940708.2 actor_loss=0.3603 critic_loss=109525274624.0000 entropy=17.7395 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 121180] reward=-117694896.4 actor_loss=0.2050 critic_loss=104449535024.7619 entropy=17.7291 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 121180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386524.2 mean_steps=16.0
|
|
[Episode 121190] reward=-124802776.8 actor_loss=0.3143 critic_loss=121346748220.9524 entropy=17.7283 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 121200] reward=-124078304.6 actor_loss=0.1936 critic_loss=108619862194.0870 entropy=17.7277 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 121200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443484.8 mean_steps=15.1
|
|
[Episode 121210] reward=-123590144.4 actor_loss=0.3347 critic_loss=114140908412.7179 entropy=17.7296 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 121220] reward=-121880140.2 actor_loss=0.2376 critic_loss=111529531205.8182 entropy=17.7299 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 121220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364102.2 mean_steps=15.9
|
|
[Episode 121230] reward=-123742903.8 actor_loss=0.3122 critic_loss=113276311961.6000 entropy=17.7306 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 121240] reward=-116915584.9 actor_loss=0.3030 critic_loss=103712031744.0000 entropy=17.7351 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 121240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467005.5 mean_steps=13.6
|
|
[Episode 121250] reward=-119687684.1 actor_loss=0.2910 critic_loss=105854480657.0667 entropy=17.7327 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 121260] reward=-118571598.7 actor_loss=0.1885 critic_loss=109880757657.6000 entropy=17.7617 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 121260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543476.0 mean_steps=13.3
|
|
[Episode 121270] reward=-117329118.7 actor_loss=0.3062 critic_loss=119464861485.9487 entropy=17.7581 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 121280] reward=-120438686.5 actor_loss=0.2767 critic_loss=110997322319.6444 entropy=17.7672 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 121280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-561333.3 mean_steps=12.6
|
|
[Episode 121290] reward=-122930376.5 actor_loss=0.2887 critic_loss=114033380556.8000 entropy=17.7796 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 121300] reward=-120012970.3 actor_loss=0.3293 critic_loss=131262000696.8889 entropy=17.7877 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 121300] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359724.5 mean_steps=16.4
|
|
[Episode 121310] reward=-122336399.8 actor_loss=0.3200 critic_loss=139038880450.2069 entropy=17.7878 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 121320] reward=-112667990.1 actor_loss=0.2748 critic_loss=100330600675.5556 entropy=17.7827 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 121320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503299.8 mean_steps=13.9
|
|
[Episode 121330] reward=-121002707.3 actor_loss=0.2931 critic_loss=108402635510.5185 entropy=17.7858 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 121340] reward=-119301138.6 actor_loss=0.1959 critic_loss=109407612654.9333 entropy=17.7990 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 121340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371726.7 mean_steps=15.9
|
|
[Episode 121350] reward=-122029153.4 actor_loss=0.2849 critic_loss=105924796416.0000 entropy=17.8118 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 121360] reward=-124137308.2 actor_loss=0.2949 critic_loss=113587214745.6000 entropy=17.8142 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 121360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484415.8 mean_steps=14.4
|
|
[Episode 121370] reward=-118632001.4 actor_loss=0.3310 critic_loss=108927280196.2667 entropy=17.8192 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 121380] reward=-119027149.5 actor_loss=0.3302 critic_loss=106020338222.5455 entropy=17.8195 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 121380] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-289183.0 mean_steps=19.1
|
|
[Episode 121390] reward=-113024476.5 actor_loss=0.3732 critic_loss=106921025536.0000 entropy=17.8200 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 121400] reward=-116761868.8 actor_loss=0.3057 critic_loss=103893868261.5172 entropy=17.7997 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 121400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477047.2 mean_steps=14.1
|
|
[Episode 121410] reward=-121326327.7 actor_loss=0.2310 critic_loss=112037938365.6296 entropy=17.7906 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 121420] reward=-119367341.6 actor_loss=0.3889 critic_loss=112700680484.5714 entropy=17.7930 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 121420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-427798.3 mean_steps=16.4
|
|
[Episode 121430] reward=-122592106.0 actor_loss=0.2777 critic_loss=110919654130.5263 entropy=17.7983 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 121440] reward=-120426145.7 actor_loss=0.2264 critic_loss=115439110553.6000 entropy=17.7924 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 121440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-615940.2 mean_steps=14.8
|
|
[Episode 121450] reward=-118787962.6 actor_loss=0.3426 critic_loss=103899998260.5128 entropy=17.7869 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 121460] reward=-114154212.7 actor_loss=0.4282 critic_loss=101338273642.1463 entropy=17.8020 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 121460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560632.0 mean_steps=13.2
|
|
[Episode 121470] reward=-121326100.9 actor_loss=0.2608 critic_loss=111501681623.0400 entropy=17.7930 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 121480] reward=-118090259.6 actor_loss=0.2231 critic_loss=107043369779.2000 entropy=17.7824 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 121480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566640.5 mean_steps=13.3
|
|
[Episode 121490] reward=-120270578.4 actor_loss=0.3080 critic_loss=106002809679.4483 entropy=17.7741 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 121500] reward=-124136235.3 actor_loss=0.2190 critic_loss=112950311852.9730 entropy=17.7820 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 121500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-385623.5 mean_steps=15.3
|
|
[Episode 121510] reward=-118690256.3 actor_loss=0.2542 critic_loss=101159865685.3333 entropy=17.7776 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 121520] reward=-122443864.5 actor_loss=0.3545 critic_loss=108831982660.2667 entropy=17.7658 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 121520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-435868.7 mean_steps=16.8
|
|
[Episode 121530] reward=-116988577.0 actor_loss=0.3634 critic_loss=107559304551.7838 entropy=17.7668 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 121540] reward=-120716857.1 actor_loss=0.2823 critic_loss=110572334648.8889 entropy=17.7561 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 121540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578884.8 mean_steps=13.6
|
|
[Episode 121550] reward=-121441778.3 actor_loss=0.2245 critic_loss=108299381097.4118 entropy=17.7609 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 121560] reward=-121720691.4 actor_loss=0.2955 critic_loss=106597230884.5714 entropy=17.7519 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 121560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597216.9 mean_steps=13.1
|
|
[Episode 121570] reward=-120653850.9 actor_loss=0.2789 critic_loss=113774045184.0000 entropy=17.7555 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 121580] reward=-123617329.5 actor_loss=0.2457 critic_loss=109441361237.3333 entropy=17.7622 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 121580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-689696.9 mean_steps=12.2
|
|
[Episode 121590] reward=-121241626.1 actor_loss=0.2690 critic_loss=107634766002.0870 entropy=17.7518 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 121600] reward=-123494444.9 actor_loss=0.2360 critic_loss=110760626858.6667 entropy=17.7460 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 121600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487273.3 mean_steps=13.7
|
|
[Episode 121610] reward=-117602249.8 actor_loss=0.3184 critic_loss=108103728014.2222 entropy=17.7530 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 121620] reward=-119488810.6 actor_loss=0.3067 critic_loss=108210528256.0000 entropy=17.7449 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 121620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531356.3 mean_steps=14.1
|
|
[Episode 121630] reward=-124992156.8 actor_loss=0.2853 critic_loss=115937759641.6000 entropy=17.7428 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 121640] reward=-123814928.9 actor_loss=0.2760 critic_loss=110480861304.4706 entropy=17.7283 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 121640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498880.0 mean_steps=14.2
|
|
[Episode 121650] reward=-120389182.6 actor_loss=0.1985 critic_loss=108428756871.5294 entropy=17.7217 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 121660] reward=-114215695.1 actor_loss=0.3161 critic_loss=104208947662.4516 entropy=17.6924 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 121660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518576.9 mean_steps=15.1
|
|
[Episode 121670] reward=-123187975.2 actor_loss=0.2579 critic_loss=109252668464.7619 entropy=17.6824 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 121680] reward=-120015860.3 actor_loss=0.2590 critic_loss=108766382155.8519 entropy=17.6825 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 121680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402959.0 mean_steps=14.9
|
|
[Episode 121690] reward=-115160490.1 actor_loss=0.2675 critic_loss=104644080733.0909 entropy=17.6808 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 121700] reward=-119519338.0 actor_loss=0.3071 critic_loss=109817335222.8571 entropy=17.6809 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 121700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420389.2 mean_steps=16.6
|
|
[Episode 121710] reward=-114856639.6 actor_loss=0.3938 critic_loss=99885412625.0667 entropy=17.6964 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 121720] reward=-117296056.1 actor_loss=0.3386 critic_loss=99150036992.0000 entropy=17.7003 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 121720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-601282.8 mean_steps=12.2
|
|
[Episode 121730] reward=-114025787.2 actor_loss=0.3872 critic_loss=121074920789.3333 entropy=17.6988 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 121740] reward=-116498061.3 actor_loss=0.3970 critic_loss=104479645331.9111 entropy=17.6882 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 121740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544670.0 mean_steps=13.3
|
|
[Episode 121750] reward=-115420401.7 actor_loss=0.2941 critic_loss=102623394254.4516 entropy=17.6828 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 121760] reward=-120710654.0 actor_loss=0.2573 critic_loss=106830029857.0323 entropy=17.6732 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 121760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434243.2 mean_steps=15.3
|
|
[Episode 121770] reward=-119799561.3 actor_loss=0.3521 critic_loss=117194217233.8605 entropy=17.6778 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 121780] reward=-120830783.9 actor_loss=0.3550 critic_loss=109972648997.9259 entropy=17.6692 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 121780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433964.0 mean_steps=15.6
|
|
[Episode 121790] reward=-118140375.0 actor_loss=0.2084 critic_loss=96202392535.0400 entropy=17.6815 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 121800] reward=-117460189.2 actor_loss=0.3437 critic_loss=102178579395.7647 entropy=17.6892 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 121800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385238.3 mean_steps=16.2
|
|
[Episode 121810] reward=-118859541.6 actor_loss=0.3225 critic_loss=105367198539.2941 entropy=17.7049 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 121820] reward=-116614515.1 actor_loss=0.3769 critic_loss=107714902539.9070 entropy=17.7110 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 121820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-611041.4 mean_steps=14.0
|
|
[Episode 121830] reward=-118556485.1 actor_loss=0.3345 critic_loss=102494823960.3810 entropy=17.7232 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 121840] reward=-123475440.1 actor_loss=0.1558 critic_loss=115531825516.0889 entropy=17.7182 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 121840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574530.0 mean_steps=12.9
|
|
[Episode 121850] reward=-114571305.2 actor_loss=0.2465 critic_loss=161194813440.0000 entropy=17.7116 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 121860] reward=-119644056.5 actor_loss=0.2895 critic_loss=107708386508.8000 entropy=17.7136 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 121860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-644021.4 mean_steps=13.1
|
|
[Episode 121870] reward=-116710491.6 actor_loss=0.2518 critic_loss=104571716494.2222 entropy=17.7130 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 121880] reward=-123476222.7 actor_loss=0.2328 critic_loss=108042850304.0000 entropy=17.7051 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 121880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528690.3 mean_steps=14.4
|
|
[Episode 121890] reward=-119583885.7 actor_loss=0.3114 critic_loss=104381587671.5789 entropy=17.7106 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 121900] reward=-115502161.3 actor_loss=0.3576 critic_loss=100854154035.2000 entropy=17.7083 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 121900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432540.9 mean_steps=16.3
|
|
[Episode 121910] reward=-113289342.2 actor_loss=0.2507 critic_loss=97826140672.0000 entropy=17.7062 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 121920] reward=-119868905.7 actor_loss=0.3070 critic_loss=103284311654.4000 entropy=17.6998 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 121920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644589.7 mean_steps=12.2
|
|
[Episode 121930] reward=-121806935.3 actor_loss=0.2055 critic_loss=110666506035.2000 entropy=17.7036 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 121940] reward=-122694855.8 actor_loss=0.2096 critic_loss=106873915234.4615 entropy=17.7124 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 121940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497293.3 mean_steps=14.0
|
|
[Episode 121950] reward=-116101345.6 actor_loss=0.2858 critic_loss=102865188454.4000 entropy=17.6994 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 121960] reward=-112860344.3 actor_loss=0.3692 critic_loss=98058611175.6190 entropy=17.6970 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 121960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496750.4 mean_steps=13.9
|
|
[Episode 121970] reward=-120251897.3 actor_loss=0.3232 critic_loss=102744835218.2857 entropy=17.7094 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 121980] reward=-118097068.5 actor_loss=0.3699 critic_loss=104942903842.1333 entropy=17.6982 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 121980] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-270467.6 mean_steps=18.1
|
|
[Episode 121990] reward=-115902265.4 actor_loss=0.2239 critic_loss=98966232905.9556 entropy=17.6964 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 122000] reward=-116491456.7 actor_loss=0.3140 critic_loss=99911153527.4667 entropy=17.6890 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 122000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576112.2 mean_steps=13.3
|
|
[Episode 122010] reward=-116403377.2 actor_loss=0.2600 critic_loss=100812411335.1111 entropy=17.6973 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 122020] reward=-120622823.3 actor_loss=0.2529 critic_loss=106449801489.0667 entropy=17.6897 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 122020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440066.0 mean_steps=14.7
|
|
[Episode 122030] reward=-124069983.2 actor_loss=0.2441 critic_loss=106172284042.3784 entropy=17.6596 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 122040] reward=-119224256.5 actor_loss=0.4205 critic_loss=106302405199.6444 entropy=17.6617 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 122040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588397.4 mean_steps=12.8
|
|
[Episode 122050] reward=-117469673.0 actor_loss=0.3091 critic_loss=103304750762.6667 entropy=17.6624 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 122060] reward=-118457367.7 actor_loss=0.2820 critic_loss=99757449216.0000 entropy=17.6655 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 122060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-430495.3 mean_steps=16.4
|
|
[Episode 122070] reward=-120012653.6 actor_loss=0.2487 critic_loss=106592514180.1290 entropy=17.6735 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 122080] reward=-124223883.5 actor_loss=0.3521 critic_loss=120705646592.0000 entropy=17.6655 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 122080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491553.6 mean_steps=14.0
|
|
[Episode 122090] reward=-119424808.5 actor_loss=0.2486 critic_loss=120451247010.9091 entropy=17.6687 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 122100] reward=-122793179.9 actor_loss=0.3293 critic_loss=108780191744.0000 entropy=17.6693 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 122100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458253.5 mean_steps=14.3
|
|
[Episode 122110] reward=-120350988.8 actor_loss=0.3537 critic_loss=107146793728.0000 entropy=17.6639 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 122120] reward=-117280408.8 actor_loss=0.3163 critic_loss=100947880995.3103 entropy=17.6531 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 122120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480462.7 mean_steps=15.2
|
|
[Episode 122130] reward=-119579976.9 actor_loss=0.2918 critic_loss=103911859541.3333 entropy=17.6700 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 122140] reward=-113184930.1 actor_loss=0.4084 critic_loss=101426586510.2222 entropy=17.6571 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 122140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-483233.4 mean_steps=13.2
|
|
[Episode 122150] reward=-116612672.9 actor_loss=0.3432 critic_loss=100022961814.5882 entropy=17.6611 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 122160] reward=-116684839.2 actor_loss=0.2455 critic_loss=109554145325.5111 entropy=17.6785 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 122160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516754.8 mean_steps=15.1
|
|
[Episode 122170] reward=-122410187.8 actor_loss=0.2196 critic_loss=107708055259.4286 entropy=17.6673 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 122180] reward=-120243390.3 actor_loss=0.2529 critic_loss=106483281464.8889 entropy=17.6577 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 122180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588943.5 mean_steps=12.8
|
|
[Episode 122190] reward=-118210160.5 actor_loss=0.3452 critic_loss=99878515689.2444 entropy=17.6601 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 122200] reward=-117437350.7 actor_loss=0.3108 critic_loss=102724892535.4667 entropy=17.6560 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 122200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545039.6 mean_steps=13.7
|
|
[Episode 122210] reward=-121370941.5 actor_loss=0.2063 critic_loss=105599915845.8182 entropy=17.6577 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 122220] reward=-118223349.8 actor_loss=0.2631 critic_loss=103739045329.4545 entropy=17.6490 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 122220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-389410.3 mean_steps=16.8
|
|
[Episode 122230] reward=-119146521.0 actor_loss=0.3153 critic_loss=108641358358.2609 entropy=17.6478 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 122240] reward=-119628484.7 actor_loss=0.2682 critic_loss=102954761329.7778 entropy=17.6582 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 122240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-346076.9 mean_steps=16.9
|
|
[Episode 122250] reward=-119131220.7 actor_loss=0.3389 critic_loss=104569865485.4737 entropy=17.6562 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 122260] reward=-115223937.2 actor_loss=0.2729 critic_loss=104915029138.2857 entropy=17.6577 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 122260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454418.9 mean_steps=14.9
|
|
[Episode 122270] reward=-114447617.9 actor_loss=0.2833 critic_loss=101870430315.7895 entropy=17.6512 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 122280] reward=-123538691.9 actor_loss=0.2668 critic_loss=108744333204.2105 entropy=17.6517 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 122280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-590703.5 mean_steps=11.5
|
|
[Episode 122290] reward=-120061945.5 actor_loss=0.3213 critic_loss=100407027778.0645 entropy=17.6507 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 122300] reward=-110376371.8 actor_loss=0.3028 critic_loss=95330660136.4211 entropy=17.6626 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 122300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454408.2 mean_steps=15.4
|
|
[Episode 122310] reward=-119128484.7 actor_loss=0.3301 critic_loss=103483833958.4000 entropy=17.6767 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 122320] reward=-119832399.5 actor_loss=0.3758 critic_loss=110378385017.9048 entropy=17.6738 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 122320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-424005.3 mean_steps=16.6
|
|
[Episode 122330] reward=-120031054.9 actor_loss=0.2954 critic_loss=102969514721.2800 entropy=17.6893 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 122340] reward=-115563848.3 actor_loss=0.3887 critic_loss=104589988841.2444 entropy=17.6760 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 122340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467469.5 mean_steps=13.8
|
|
[Episode 122350] reward=-119488317.9 actor_loss=0.2077 critic_loss=102945584810.6667 entropy=17.6841 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 122360] reward=-111882899.3 actor_loss=0.3242 critic_loss=98849408269.4737 entropy=17.6860 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 122360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435899.4 mean_steps=15.8
|
|
[Episode 122370] reward=-117098527.8 actor_loss=0.3587 critic_loss=107335766016.0000 entropy=17.6864 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 122380] reward=-122551913.1 actor_loss=0.1922 critic_loss=102400426166.0444 entropy=17.6920 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 122380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-672873.0 mean_steps=13.2
|
|
[Episode 122390] reward=-122580582.3 actor_loss=0.2102 critic_loss=143700809045.3333 entropy=17.7034 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 122400] reward=-115743755.6 actor_loss=0.2686 critic_loss=104954692403.2000 entropy=17.7023 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 122400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577015.0 mean_steps=13.0
|
|
[Episode 122410] reward=-120216245.7 actor_loss=0.2498 critic_loss=103382537701.0526 entropy=17.6817 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 122420] reward=-119225901.2 actor_loss=0.3201 critic_loss=101630238720.0000 entropy=17.6816 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 122420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614957.0 mean_steps=11.9
|
|
[Episode 122430] reward=-123409580.5 actor_loss=0.2564 critic_loss=108102755760.3556 entropy=17.6891 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 122440] reward=-110839874.4 actor_loss=0.4339 critic_loss=101620479638.5882 entropy=17.6940 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 122440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-648983.5 mean_steps=12.6
|
|
[Episode 122450] reward=-121971796.9 actor_loss=0.3036 critic_loss=108323477991.6190 entropy=17.7023 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 122460] reward=-120738850.2 actor_loss=0.3205 critic_loss=103857789415.6190 entropy=17.6982 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 122460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547616.2 mean_steps=14.3
|
|
[Episode 122470] reward=-126040515.1 actor_loss=0.2946 critic_loss=118088818141.8667 entropy=17.6776 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 122480] reward=-121503571.1 actor_loss=0.2963 critic_loss=107960129156.7407 entropy=17.6630 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 122480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615707.7 mean_steps=13.0
|
|
[Episode 122490] reward=-125978575.8 actor_loss=0.2492 critic_loss=111038987059.2000 entropy=17.6761 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 122500] reward=-116157669.3 actor_loss=0.3609 critic_loss=108947169462.0444 entropy=17.6793 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 122500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459211.7 mean_steps=15.4
|
|
[Episode 122510] reward=-111601683.3 actor_loss=0.3644 critic_loss=100583349058.3704 entropy=17.6713 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 122520] reward=-114773788.6 actor_loss=0.3588 critic_loss=97392256432.3556 entropy=17.6674 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 122520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499778.0 mean_steps=14.0
|
|
[Episode 122530] reward=-120305629.7 actor_loss=0.2687 critic_loss=108171464066.8445 entropy=17.6618 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 122540] reward=-121789465.0 actor_loss=0.3243 critic_loss=106257268371.9111 entropy=17.6334 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 122540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463462.4 mean_steps=13.8
|
|
[Episode 122550] reward=-117966139.8 actor_loss=0.2922 critic_loss=102050016856.2759 entropy=17.6326 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 122560] reward=-118848130.3 actor_loss=0.3297 critic_loss=104456794020.9778 entropy=17.6309 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 122560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446275.3 mean_steps=14.6
|
|
[Episode 122570] reward=-118494427.5 actor_loss=0.2371 critic_loss=104612999076.9778 entropy=17.6414 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 122580] reward=-115777841.3 actor_loss=0.3368 critic_loss=102629639782.4000 entropy=17.6470 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 122580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-358336.1 mean_steps=17.1
|
|
[Episode 122590] reward=-122327338.7 actor_loss=0.2848 critic_loss=108877909852.1600 entropy=17.6451 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 122600] reward=-114367324.9 actor_loss=0.4112 critic_loss=98238012893.8667 entropy=17.6238 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 122600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464689.6 mean_steps=13.7
|
|
[Episode 122610] reward=-122068300.9 actor_loss=0.1910 critic_loss=102449863793.7778 entropy=17.6302 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 122620] reward=-118158187.6 actor_loss=0.3213 critic_loss=94956836181.3333 entropy=17.6008 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 122620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545551.2 mean_steps=12.8
|
|
[Episode 122630] reward=-115827926.8 actor_loss=0.3640 critic_loss=100331151724.0889 entropy=17.5819 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 122640] reward=-118937280.8 actor_loss=0.3008 critic_loss=100124747933.5385 entropy=17.5888 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 122640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-600532.7 mean_steps=14.4
|
|
[Episode 122650] reward=-117190103.4 actor_loss=0.3154 critic_loss=98627272899.0476 entropy=17.5855 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 122660] reward=-117497287.5 actor_loss=0.2811 critic_loss=95800098269.8667 entropy=17.5829 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 122660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-426006.2 mean_steps=16.1
|
|
[Episode 122670] reward=-121581060.2 actor_loss=0.3028 critic_loss=103449824460.8000 entropy=17.5859 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 122680] reward=-121417059.2 actor_loss=0.2670 critic_loss=101772693048.8889 entropy=17.5771 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 122680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418631.2 mean_steps=15.4
|
|
[Episode 122690] reward=-123456706.5 actor_loss=0.1476 critic_loss=110210833591.7949 entropy=17.5659 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 122700] reward=-118596587.4 actor_loss=0.2233 critic_loss=100368386184.5333 entropy=17.5680 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 122700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536049.9 mean_steps=13.3
|
|
[Episode 122710] reward=-116710298.9 actor_loss=0.3537 critic_loss=110394586368.0000 entropy=17.5698 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 122720] reward=-118894375.6 actor_loss=0.1800 critic_loss=105977609511.8222 entropy=17.5599 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 122720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570170.1 mean_steps=12.6
|
|
[Episode 122730] reward=-123810205.4 actor_loss=0.3222 critic_loss=112563510886.4000 entropy=17.5647 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 122740] reward=-121414681.8 actor_loss=0.2811 critic_loss=105498701101.1765 entropy=17.5727 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 122740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481906.3 mean_steps=13.6
|
|
[Episode 122750] reward=-116250459.9 actor_loss=0.2703 critic_loss=104439318300.4444 entropy=17.5637 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 122760] reward=-118432264.7 actor_loss=0.2054 critic_loss=98672052906.6667 entropy=17.5576 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 122760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479015.3 mean_steps=14.6
|
|
[Episode 122770] reward=-119324776.6 actor_loss=0.3613 critic_loss=105023580521.4118 entropy=17.5449 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 122780] reward=-114598965.9 actor_loss=0.4336 critic_loss=97480679606.0444 entropy=17.5333 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 122780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481305.5 mean_steps=13.8
|
|
[Episode 122790] reward=-117811266.2 actor_loss=0.2906 critic_loss=101723511239.1111 entropy=17.5401 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 122800] reward=-120453634.3 actor_loss=0.2739 critic_loss=105696371507.2000 entropy=17.5438 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 122800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477538.4 mean_steps=13.7
|
|
[Episode 122810] reward=-114330962.3 actor_loss=0.3267 critic_loss=101984117480.7273 entropy=17.5476 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 122820] reward=-120567965.8 actor_loss=0.1917 critic_loss=102659896115.2000 entropy=17.5515 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 122820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570429.1 mean_steps=12.8
|
|
[Episode 122830] reward=-115340107.5 actor_loss=0.2859 critic_loss=106270470235.0222 entropy=17.5486 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 122840] reward=-115731975.2 actor_loss=0.3514 critic_loss=96283389860.9778 entropy=17.5571 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 122840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545949.4 mean_steps=13.4
|
|
[Episode 122850] reward=-123324193.9 actor_loss=0.2551 critic_loss=110545450910.4762 entropy=17.5730 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 122860] reward=-121218291.3 actor_loss=0.3099 critic_loss=107074624458.1053 entropy=17.5663 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 122860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-411394.7 mean_steps=16.9
|
|
[Episode 122870] reward=-120922670.4 actor_loss=0.3034 critic_loss=101426922700.8000 entropy=17.5520 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 122880] reward=-115131709.6 actor_loss=0.2381 critic_loss=100459188679.1111 entropy=17.5612 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 122880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-360291.1 mean_steps=16.8
|
|
[Episode 122890] reward=-121913242.9 actor_loss=0.2649 critic_loss=101323862211.0476 entropy=17.5673 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 122900] reward=-122112115.5 actor_loss=0.2098 critic_loss=125527740631.5789 entropy=17.5686 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 122900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-520066.9 mean_steps=14.8
|
|
[Episode 122910] reward=-115136302.3 actor_loss=0.2676 critic_loss=99574895411.2000 entropy=17.5770 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 122920] reward=-118125044.3 actor_loss=0.2653 critic_loss=99666542376.4211 entropy=17.5784 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 122920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590726.1 mean_steps=13.8
|
|
[Episode 122930] reward=-120442269.7 actor_loss=0.2689 critic_loss=100230027384.4706 entropy=17.5781 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 122940] reward=-115119688.6 actor_loss=0.3519 critic_loss=96464906558.5778 entropy=17.5650 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 122940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423390.3 mean_steps=15.3
|
|
[Episode 122950] reward=-121461451.4 actor_loss=0.3114 critic_loss=108433602560.0000 entropy=17.5695 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 122960] reward=-123528953.4 actor_loss=0.2350 critic_loss=108199422901.0732 entropy=17.5645 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 122960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536958.3 mean_steps=13.6
|
|
[Episode 122970] reward=-118365952.8 actor_loss=0.3091 critic_loss=98794146201.6000 entropy=17.5606 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 122980] reward=-117718553.0 actor_loss=0.2738 critic_loss=101013962296.8889 entropy=17.5532 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 122980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-461089.9 mean_steps=15.3
|
|
[Episode 122990] reward=-120537898.5 actor_loss=0.2795 critic_loss=101883470045.4054 entropy=17.5509 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 123000] reward=-117563223.9 actor_loss=0.1615 critic_loss=99788783433.9556 entropy=17.5566 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 123000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416901.6 mean_steps=14.4
|
|
[Episode 123010] reward=-117531686.0 actor_loss=0.2676 critic_loss=102756636514.4615 entropy=17.5473 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 123020] reward=-124789173.0 actor_loss=0.2340 critic_loss=105107176472.9756 entropy=17.5417 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 123020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-448523.7 mean_steps=16.1
|
|
[Episode 123030] reward=-121140121.1 actor_loss=0.3447 critic_loss=105624384000.0000 entropy=17.5454 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 123040] reward=-114760752.8 actor_loss=0.2949 critic_loss=94943182848.0000 entropy=17.5385 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 123040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533464.3 mean_steps=14.4
|
|
[Episode 123050] reward=-121184840.4 actor_loss=0.2366 critic_loss=101264181760.0000 entropy=17.5514 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 123060] reward=-117613363.8 actor_loss=0.3029 critic_loss=97425861956.6829 entropy=17.5588 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 123060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487181.8 mean_steps=13.6
|
|
[Episode 123070] reward=-117869234.5 actor_loss=0.2984 critic_loss=102642668339.2000 entropy=17.5612 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 123080] reward=-118864335.0 actor_loss=0.3103 critic_loss=104058085093.5172 entropy=17.5635 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 123080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-415149.6 mean_steps=13.9
|
|
[Episode 123090] reward=-112034673.3 actor_loss=0.4028 critic_loss=98462720762.0465 entropy=17.5661 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 123100] reward=-117226955.6 actor_loss=0.2109 critic_loss=99449233408.0000 entropy=17.5586 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 123100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-455324.5 mean_steps=13.8
|
|
[Episode 123110] reward=-112377607.0 actor_loss=0.3048 critic_loss=95026602374.0952 entropy=17.5771 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 123120] reward=-122288825.4 actor_loss=0.2404 critic_loss=101778890205.8667 entropy=17.5710 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 123120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520622.4 mean_steps=13.0
|
|
[Episode 123130] reward=-116011199.0 actor_loss=0.2671 critic_loss=109195052568.3810 entropy=17.5504 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 123140] reward=-117674522.6 actor_loss=0.3654 critic_loss=102142886912.0000 entropy=17.5507 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 123140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428222.4 mean_steps=15.2
|
|
[Episode 123150] reward=-118423984.9 actor_loss=0.3210 critic_loss=108080347545.6000 entropy=17.5328 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 123160] reward=-120977273.6 actor_loss=0.2426 critic_loss=105258378273.0323 entropy=17.5129 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 123160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-387523.4 mean_steps=16.0
|
|
[Episode 123170] reward=-122107576.4 actor_loss=0.2903 critic_loss=104659862381.7143 entropy=17.5109 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 123180] reward=-121515117.9 actor_loss=0.2782 critic_loss=112637244355.7647 entropy=17.5123 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 123180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611988.9 mean_steps=12.8
|
|
[Episode 123190] reward=-119665572.3 actor_loss=0.3462 critic_loss=107049510588.6316 entropy=17.5101 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 123200] reward=-122130663.7 actor_loss=0.2666 critic_loss=108465662753.3913 entropy=17.5109 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 123200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477782.6 mean_steps=13.7
|
|
[Episode 123210] reward=-119526050.0 actor_loss=0.2875 critic_loss=110776440149.3333 entropy=17.5201 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 123220] reward=-117050860.6 actor_loss=0.2471 critic_loss=102659971439.5897 entropy=17.5118 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 123220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544643.8 mean_steps=12.9
|
|
[Episode 123230] reward=-113153397.1 actor_loss=0.3464 critic_loss=95494148486.0952 entropy=17.4923 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 123240] reward=-117965528.5 actor_loss=0.4065 critic_loss=102959850746.3111 entropy=17.5033 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 123240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-560067.3 mean_steps=13.8
|
|
[Episode 123250] reward=-118526015.9 actor_loss=0.1979 critic_loss=107551341889.8286 entropy=17.4883 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 123260] reward=-112781183.4 actor_loss=0.2566 critic_loss=95013036288.0000 entropy=17.4895 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 123260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-505325.1 mean_steps=15.5
|
|
[Episode 123270] reward=-120801480.4 actor_loss=0.2378 critic_loss=108289499418.4828 entropy=17.4900 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 123280] reward=-113140909.3 actor_loss=0.2845 critic_loss=100726773917.5385 entropy=17.4924 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 123280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541960.6 mean_steps=14.0
|
|
[Episode 123290] reward=-114730492.7 actor_loss=0.3222 critic_loss=98885325724.9032 entropy=17.4986 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 123300] reward=-118215681.4 actor_loss=0.3203 critic_loss=113393608021.3333 entropy=17.5011 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 123300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452713.3 mean_steps=15.6
|
|
[Episode 123310] reward=-116303094.8 actor_loss=0.3838 critic_loss=108984115764.9655 entropy=17.5014 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 123320] reward=-119653485.0 actor_loss=0.2636 critic_loss=105239743977.7391 entropy=17.5112 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 123320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453361.5 mean_steps=14.6
|
|
[Episode 123330] reward=-119405081.1 actor_loss=0.2974 critic_loss=98639309161.4118 entropy=17.5106 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 123340] reward=-120699211.4 actor_loss=0.2378 critic_loss=106150097305.6000 entropy=17.5155 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 123340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440990.8 mean_steps=14.8
|
|
[Episode 123350] reward=-114726270.8 actor_loss=0.3081 critic_loss=95940615306.3784 entropy=17.5190 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 123360] reward=-117581124.4 actor_loss=0.2541 critic_loss=103783344810.6667 entropy=17.5149 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 123360] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-250875.0 mean_steps=17.4
|
|
[Episode 123370] reward=-113565465.7 actor_loss=0.4141 critic_loss=96944326428.4444 entropy=17.5197 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 123380] reward=-113188729.3 actor_loss=0.2736 critic_loss=96264818509.9130 entropy=17.5160 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 123380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469116.2 mean_steps=14.2
|
|
[Episode 123390] reward=-115707759.2 actor_loss=0.3245 critic_loss=96925444840.7273 entropy=17.5085 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 123400] reward=-122620259.3 actor_loss=0.2635 critic_loss=111713323008.0000 entropy=17.5067 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 123400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459229.1 mean_steps=14.1
|
|
[Episode 123410] reward=-119387368.9 actor_loss=0.3428 critic_loss=158140832000.0000 entropy=17.5254 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 123420] reward=-117755800.3 actor_loss=0.3458 critic_loss=100659774025.1429 entropy=17.5358 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 123420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417502.6 mean_steps=14.9
|
|
[Episode 123430] reward=-123744067.1 actor_loss=0.2027 critic_loss=154348027588.9231 entropy=17.5361 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 123440] reward=-118020866.4 actor_loss=0.4253 critic_loss=100649876058.3529 entropy=17.5281 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 123440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537746.0 mean_steps=13.8
|
|
[Episode 123450] reward=-122445127.6 actor_loss=0.2335 critic_loss=103002773396.2105 entropy=17.5231 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 123460] reward=-147106183.4 actor_loss=0.3225 critic_loss=3156406265931.8521 entropy=17.5122 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 123460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-353656.6 mean_steps=15.6
|
|
[Episode 123470] reward=-183458713.1 actor_loss=0.2414 critic_loss=16200706818048.0000 entropy=17.5150 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 123480] reward=-124965087.6 actor_loss=0.3432 critic_loss=110612482906.8387 entropy=17.4982 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 123480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454628.8 mean_steps=14.2
|
|
[Episode 123490] reward=-120200996.2 actor_loss=0.2724 critic_loss=106546619460.2667 entropy=17.4926 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 123500] reward=-115039798.9 actor_loss=0.3118 critic_loss=95868694983.1111 entropy=17.4951 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 123500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575125.2 mean_steps=13.5
|
|
[Episode 123510] reward=-4565633442.9 actor_loss=0.2569 critic_loss=43893099176953992.0000 entropy=17.5035 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 123520] reward=-116862462.9 actor_loss=0.3239 critic_loss=102163556925.4400 entropy=17.5133 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 123520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500676.1 mean_steps=14.0
|
|
[Episode 123530] reward=-116789128.1 actor_loss=0.3637 critic_loss=96452331333.8182 entropy=17.5045 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 123540] reward=-120576044.3 actor_loss=0.2863 critic_loss=104075342002.0870 entropy=17.5179 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 123540] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-275382.8 mean_steps=17.0
|
|
[Episode 123550] reward=-117405619.7 actor_loss=0.3875 critic_loss=99082178692.1290 entropy=17.4941 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 123560] reward=-120482560.3 actor_loss=0.2155 critic_loss=109232472064.0000 entropy=17.4841 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 123560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545660.2 mean_steps=13.2
|
|
[Episode 123570] reward=-121617362.4 actor_loss=0.2085 critic_loss=98558381494.8571 entropy=17.4637 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 123580] reward=-118927462.8 actor_loss=0.2917 critic_loss=103241460796.2353 entropy=17.4918 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 123580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621000.6 mean_steps=11.6
|
|
[Episode 123590] reward=-112747517.8 actor_loss=0.3042 critic_loss=98488306164.6222 entropy=17.5000 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 123600] reward=-119567857.3 actor_loss=0.2616 critic_loss=104029098147.8400 entropy=17.5112 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 123600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-350427.8 mean_steps=15.7
|
|
[Episode 123610] reward=-117490136.6 actor_loss=0.3326 critic_loss=101211696696.8889 entropy=17.5070 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 123620] reward=-114964393.9 actor_loss=0.3535 critic_loss=97258539326.5778 entropy=17.5015 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 123620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462790.7 mean_steps=15.6
|
|
[Episode 123630] reward=-122330732.5 actor_loss=0.2982 critic_loss=106311726858.2400 entropy=17.4991 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 123640] reward=-119003525.4 actor_loss=0.3303 critic_loss=100839446755.5556 entropy=17.5057 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 123640] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-334136.8 mean_steps=17.2
|
|
[Episode 123650] reward=-113628263.8 actor_loss=0.3194 critic_loss=98419480803.5556 entropy=17.4973 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 123660] reward=-117380906.2 actor_loss=0.3261 critic_loss=103023797452.8000 entropy=17.5185 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 123660] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-757250.9 mean_steps=11.2
|
|
[Episode 123670] reward=-116395482.3 actor_loss=0.2863 critic_loss=100133982822.4000 entropy=17.5121 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 123680] reward=-118833817.2 actor_loss=0.3591 critic_loss=97959069013.3333 entropy=17.5210 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 123680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-508145.1 mean_steps=12.1
|
|
[Episode 123690] reward=-118649997.4 actor_loss=0.3241 critic_loss=101523933138.4889 entropy=17.5199 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 123700] reward=-114964278.7 actor_loss=0.3557 critic_loss=97410768464.8421 entropy=17.5185 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 123700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584845.0 mean_steps=12.5
|
|
[Episode 123710] reward=-117658406.7 actor_loss=0.3286 critic_loss=98698491221.3333 entropy=17.5140 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 123720] reward=-113693072.8 actor_loss=0.3521 critic_loss=94091646976.0000 entropy=17.5207 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 123720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482954.7 mean_steps=13.7
|
|
[Episode 123730] reward=-116057337.3 actor_loss=0.3030 critic_loss=100266789309.2174 entropy=17.5229 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 123740] reward=-119419879.9 actor_loss=0.2638 critic_loss=100926611456.0000 entropy=17.5210 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 123740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-418427.6 mean_steps=14.1
|
|
[Episode 123750] reward=-122995737.6 actor_loss=0.2868 critic_loss=106677104640.0000 entropy=17.5251 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 123760] reward=-121864304.6 actor_loss=0.2845 critic_loss=105922156544.0000 entropy=17.5288 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 123760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505000.3 mean_steps=13.9
|
|
[Episode 123770] reward=-119656554.6 actor_loss=0.3103 critic_loss=104931011717.5652 entropy=17.5184 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 123780] reward=-115310184.4 actor_loss=0.3819 critic_loss=96582648937.9310 entropy=17.5164 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 123780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445449.4 mean_steps=15.6
|
|
[Episode 123790] reward=-114326092.4 actor_loss=0.3337 critic_loss=96850569546.3226 entropy=17.5159 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 123800] reward=-115154356.7 actor_loss=0.2945 critic_loss=93230180579.5556 entropy=17.4969 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 123800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553597.3 mean_steps=13.3
|
|
[Episode 123810] reward=-118602718.7 actor_loss=0.3332 critic_loss=99568139507.8095 entropy=17.4867 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 123820] reward=-108527285.3 actor_loss=0.2875 critic_loss=96696127488.0000 entropy=17.4936 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 123820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-634756.3 mean_steps=13.1
|
|
[Episode 123830] reward=-112987519.4 actor_loss=0.4538 critic_loss=95137707277.4737 entropy=17.4887 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 123840] reward=-118532503.0 actor_loss=0.2892 critic_loss=102886188583.3846 entropy=17.4907 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 123840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-563207.9 mean_steps=14.6
|
|
[Episode 123850] reward=-116035393.8 actor_loss=0.3246 critic_loss=101493240342.2609 entropy=17.4810 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 123860] reward=-116969388.0 actor_loss=0.2296 critic_loss=95408091880.7273 entropy=17.4706 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 123860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551049.8 mean_steps=13.3
|
|
[Episode 123870] reward=-114151571.2 actor_loss=0.2595 critic_loss=94441840324.9231 entropy=17.4599 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 123880] reward=-113188237.3 actor_loss=0.3185 critic_loss=95095235505.2308 entropy=17.4678 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 123880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-601841.4 mean_steps=13.8
|
|
[Episode 123890] reward=-114447171.6 actor_loss=0.2464 critic_loss=103127920896.0000 entropy=17.4663 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 123900] reward=-119326732.3 actor_loss=0.2528 critic_loss=99909479459.3103 entropy=17.4658 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 123900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416255.0 mean_steps=14.6
|
|
[Episode 123910] reward=-116016774.5 actor_loss=0.4146 critic_loss=94576495729.7778 entropy=17.4648 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 123920] reward=-114937989.5 actor_loss=0.3956 critic_loss=96517703369.6970 entropy=17.4591 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 123920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410652.5 mean_steps=16.1
|
|
[Episode 123930] reward=-116605433.9 actor_loss=0.3305 critic_loss=97853648523.6364 entropy=17.4505 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 123940] reward=-116806076.9 actor_loss=0.3873 critic_loss=99579779481.6000 entropy=17.4663 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 123940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458785.2 mean_steps=15.5
|
|
[Episode 123950] reward=-112421544.1 actor_loss=0.3127 critic_loss=90927116834.1333 entropy=17.4608 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 123960] reward=-122434928.0 actor_loss=0.2631 critic_loss=110367756800.0000 entropy=17.4680 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 123960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489034.4 mean_steps=13.8
|
|
[Episode 123970] reward=-116064030.5 actor_loss=0.3323 critic_loss=99191133135.2381 entropy=17.4595 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 123980] reward=-118008569.3 actor_loss=0.2869 critic_loss=99013747756.5217 entropy=17.4532 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 123980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541761.6 mean_steps=14.4
|
|
[Episode 123990] reward=-118741679.0 actor_loss=0.2572 critic_loss=96821175016.7273 entropy=17.4625 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 124000] reward=-114188012.6 actor_loss=0.2487 critic_loss=95992252825.6000 entropy=17.4807 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 124000] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-261075.5 mean_steps=17.7
|
|
[Episode 124010] reward=-116530097.4 actor_loss=0.3050 critic_loss=108002855019.7895 entropy=17.4691 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 124020] reward=-115676814.7 actor_loss=0.3993 critic_loss=94995808802.1333 entropy=17.4682 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 124020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557593.4 mean_steps=12.3
|
|
[Episode 124030] reward=-121340029.9 actor_loss=0.2348 critic_loss=103196266919.7241 entropy=17.4595 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 124040] reward=-119505838.3 actor_loss=0.3065 critic_loss=103435814684.4444 entropy=17.4751 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 124040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424399.9 mean_steps=14.4
|
|
[Episode 124050] reward=-117151891.7 actor_loss=0.2059 critic_loss=96556637262.7692 entropy=17.4802 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 124060] reward=-119485187.2 actor_loss=0.4327 critic_loss=103184900915.2000 entropy=17.4831 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 124060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489021.9 mean_steps=15.0
|
|
[Episode 124070] reward=-115715578.7 actor_loss=0.2752 critic_loss=97767718912.0000 entropy=17.4981 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 124080] reward=-119613516.7 actor_loss=0.3705 critic_loss=101788594898.8235 entropy=17.5004 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 124080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563599.3 mean_steps=12.6
|
|
[Episode 124090] reward=-114564918.1 actor_loss=0.2529 critic_loss=103583348053.3333 entropy=17.5060 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 124100] reward=-119512453.8 actor_loss=0.4052 critic_loss=106249664391.5294 entropy=17.5134 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 124100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-498004.3 mean_steps=12.8
|
|
[Episode 124110] reward=-117041977.9 actor_loss=0.2506 critic_loss=99062489391.4074 entropy=17.5144 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 124120] reward=-113984556.2 actor_loss=0.4055 critic_loss=98119125333.3333 entropy=17.5189 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 124120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517354.3 mean_steps=14.1
|
|
[Episode 124130] reward=-113120375.6 actor_loss=0.3511 critic_loss=96696531854.2222 entropy=17.5234 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 124140] reward=-117285527.5 actor_loss=0.3759 critic_loss=98174102641.7778 entropy=17.5111 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 124140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555931.6 mean_steps=13.3
|
|
[Episode 124150] reward=-121747878.9 actor_loss=0.2285 critic_loss=100779999232.0000 entropy=17.5431 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 124160] reward=-115892059.3 actor_loss=0.2960 critic_loss=100557648418.1333 entropy=17.5380 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 124160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507571.7 mean_steps=13.2
|
|
[Episode 124170] reward=-115644509.9 actor_loss=0.2994 critic_loss=100550487912.2963 entropy=17.5447 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 124180] reward=-115282916.9 actor_loss=0.3122 critic_loss=102014477653.3333 entropy=17.5483 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 124180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432820.4 mean_steps=14.2
|
|
[Episode 124190] reward=-112975585.6 actor_loss=0.3614 critic_loss=101261150538.3226 entropy=17.5442 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 124200] reward=-119616595.8 actor_loss=0.2747 critic_loss=99390275584.0000 entropy=17.5554 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 124200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-626511.5 mean_steps=11.1
|
|
[Episode 124210] reward=-108533970.9 actor_loss=0.4014 critic_loss=89383592448.0000 entropy=17.5510 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 124220] reward=-117811247.7 actor_loss=0.3569 critic_loss=93352957602.3415 entropy=17.5544 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 124220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541549.5 mean_steps=14.1
|
|
[Episode 124230] reward=-120408616.2 actor_loss=0.2777 critic_loss=102162947303.2258 entropy=17.5576 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 124240] reward=-116885224.6 actor_loss=0.2928 critic_loss=102664723660.8000 entropy=17.5527 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 124240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460883.8 mean_steps=13.6
|
|
[Episode 124250] reward=-118144183.8 actor_loss=0.2944 critic_loss=100470536419.5556 entropy=17.5633 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 124260] reward=-117227661.9 actor_loss=0.2827 critic_loss=97026057502.7200 entropy=17.5545 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 124260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589700.9 mean_steps=12.7
|
|
[Episode 124270] reward=-118725090.7 actor_loss=0.3254 critic_loss=110051804956.4444 entropy=17.5627 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 124280] reward=-115212737.6 actor_loss=0.3048 critic_loss=90385241156.2667 entropy=17.5590 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 124280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452025.4 mean_steps=14.4
|
|
[Episode 124290] reward=-118520293.6 actor_loss=0.3248 critic_loss=107332612587.5200 entropy=17.5752 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 124300] reward=-121988337.8 actor_loss=0.2620 critic_loss=113068157155.5556 entropy=17.5657 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 124300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-400986.3 mean_steps=14.1
|
|
[Episode 124310] reward=-108664723.8 actor_loss=0.3797 critic_loss=99043521629.0909 entropy=17.5716 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 124320] reward=-112905728.0 actor_loss=0.4368 critic_loss=98965923309.0370 entropy=17.5692 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 124320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-460238.3 mean_steps=16.1
|
|
[Episode 124330] reward=-123259609.9 actor_loss=0.4156 critic_loss=119716547098.9474 entropy=17.5763 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 124340] reward=-117425929.4 actor_loss=0.2665 critic_loss=102458374436.5714 entropy=17.5755 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 124340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-473244.1 mean_steps=15.4
|
|
[Episode 124350] reward=-118509190.8 actor_loss=0.3846 critic_loss=98483261618.0870 entropy=17.5648 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 124360] reward=-115691166.3 actor_loss=0.2377 critic_loss=96985009766.4000 entropy=17.5606 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 124360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435675.4 mean_steps=14.9
|
|
[Episode 124370] reward=-118502464.7 actor_loss=0.4733 critic_loss=103182487552.0000 entropy=17.5672 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 124380] reward=-118085819.2 actor_loss=0.3389 critic_loss=100749607458.1333 entropy=17.5846 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 124380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529238.9 mean_steps=14.2
|
|
[Episode 124390] reward=-122398391.4 actor_loss=0.3193 critic_loss=112782152757.8947 entropy=17.5913 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 124400] reward=-118716465.0 actor_loss=0.2755 critic_loss=101923002368.0000 entropy=17.5922 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 124400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382860.7 mean_steps=14.9
|
|
[Episode 124410] reward=-118880139.8 actor_loss=0.3104 critic_loss=100766720682.6667 entropy=17.6017 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 124420] reward=-115607242.5 actor_loss=0.3303 critic_loss=102718878606.2222 entropy=17.6004 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 124420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-461436.0 mean_steps=15.2
|
|
[Episode 124430] reward=-117709764.6 actor_loss=0.2891 critic_loss=101895208231.8222 entropy=17.5946 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 124440] reward=-118273235.9 actor_loss=0.3356 critic_loss=97108728993.6842 entropy=17.5843 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 124440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-651326.0 mean_steps=13.6
|
|
[Episode 124450] reward=-119283919.9 actor_loss=0.3664 critic_loss=99347244958.4762 entropy=17.5807 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 124460] reward=-116787826.2 actor_loss=0.3551 critic_loss=99101575070.4762 entropy=17.5869 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 124460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554634.3 mean_steps=13.3
|
|
[Episode 124470] reward=-120219182.4 actor_loss=0.2608 critic_loss=103785089706.6667 entropy=17.5791 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 124480] reward=-117155702.5 actor_loss=0.2994 critic_loss=101969878818.5946 entropy=17.5791 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 124480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-434767.3 mean_steps=16.1
|
|
[Episode 124490] reward=-122934726.1 actor_loss=0.1828 critic_loss=107003462724.2667 entropy=17.5866 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 124500] reward=-117310587.1 actor_loss=0.3142 critic_loss=102058352640.0000 entropy=17.5791 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 124500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447548.1 mean_steps=14.7
|
|
[Episode 124510] reward=-115665758.5 actor_loss=0.2867 critic_loss=96794414457.2632 entropy=17.5690 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 124520] reward=-115975674.5 actor_loss=0.4354 critic_loss=96779502636.5217 entropy=17.5633 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 124520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503538.2 mean_steps=12.9
|
|
[Episode 124530] reward=-119554677.5 actor_loss=0.3546 critic_loss=99456229102.9333 entropy=17.5652 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 124540] reward=-112359572.1 actor_loss=0.3385 critic_loss=95677974016.0000 entropy=17.5565 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 124540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-372878.6 mean_steps=15.7
|
|
[Episode 124550] reward=-111196168.8 actor_loss=0.2563 critic_loss=90077041911.1724 entropy=17.5564 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 124560] reward=-112232567.4 actor_loss=0.3401 critic_loss=93493604227.8788 entropy=17.5641 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 124560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-579706.6 mean_steps=13.3
|
|
[Episode 124570] reward=-112039545.4 actor_loss=0.3220 critic_loss=98586634240.0000 entropy=17.5549 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 124580] reward=-114883683.8 actor_loss=0.3989 critic_loss=95380583765.3333 entropy=17.5495 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 124580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577731.2 mean_steps=13.2
|
|
[Episode 124590] reward=-118414588.9 actor_loss=0.3376 critic_loss=99939861913.6000 entropy=17.5555 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 124600] reward=-116474858.4 actor_loss=0.3354 critic_loss=98446652757.3333 entropy=17.5577 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 124600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446580.1 mean_steps=15.2
|
|
[Episode 124610] reward=-113315231.8 actor_loss=0.2030 critic_loss=94372137642.6667 entropy=17.5657 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 124620] reward=-115687144.1 actor_loss=0.2556 critic_loss=95815626328.2759 entropy=17.5554 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 124620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-623592.0 mean_steps=10.9
|
|
[Episode 124630] reward=-117299249.7 actor_loss=0.3607 critic_loss=97287506944.0000 entropy=17.5632 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 124640] reward=-117702758.6 actor_loss=0.2400 critic_loss=101461260138.1463 entropy=17.5584 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 124640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523795.4 mean_steps=13.1
|
|
[Episode 124650] reward=-118761411.6 actor_loss=0.1764 critic_loss=99965313570.1333 entropy=17.5570 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 124660] reward=-123177596.5 actor_loss=0.3329 critic_loss=104628776329.8462 entropy=17.5586 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 124660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-566929.5 mean_steps=11.7
|
|
[Episode 124670] reward=-116268717.6 actor_loss=0.2319 critic_loss=107206873088.0000 entropy=17.5622 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 124680] reward=-115711879.8 actor_loss=0.3631 critic_loss=98447535932.9524 entropy=17.5652 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 124680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-688245.8 mean_steps=12.2
|
|
[Episode 124690] reward=-114748895.7 actor_loss=0.2532 critic_loss=99083276561.0667 entropy=17.5618 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 124700] reward=-117582087.3 actor_loss=0.3171 critic_loss=98586744293.0526 entropy=17.5628 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 124700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-616762.3 mean_steps=11.8
|
|
[Episode 124710] reward=-116850647.0 actor_loss=0.3576 critic_loss=100129000301.7143 entropy=17.5734 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 124720] reward=-121413810.1 actor_loss=0.3661 critic_loss=109810526162.4889 entropy=17.5689 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 124720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-508467.7 mean_steps=14.6
|
|
[Episode 124730] reward=-112387307.6 actor_loss=0.3160 critic_loss=95384627694.3448 entropy=17.5647 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 124740] reward=-114518871.7 actor_loss=0.3176 critic_loss=101863756751.2381 entropy=17.5541 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 124740] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-641355.1 mean_steps=11.3
|
|
[Episode 124750] reward=-125618157.5 actor_loss=0.3342 critic_loss=114704956958.1176 entropy=17.5540 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 124760] reward=-121787807.3 actor_loss=0.2455 critic_loss=107261432217.6000 entropy=17.5480 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 124760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632573.4 mean_steps=12.3
|
|
[Episode 124770] reward=-115878722.8 actor_loss=0.3652 critic_loss=98564837060.9231 entropy=17.5636 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 124780] reward=-117377282.1 actor_loss=0.3991 critic_loss=99953905531.8710 entropy=17.5698 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 124780] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-331960.9 mean_steps=16.5
|
|
[Episode 124790] reward=-114906038.5 actor_loss=0.2651 critic_loss=96300144932.5714 entropy=17.5650 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 124800] reward=-119532232.8 actor_loss=0.2341 critic_loss=101886512936.4211 entropy=17.5718 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 124800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557941.2 mean_steps=12.6
|
|
[Episode 124810] reward=-113204438.2 actor_loss=0.3467 critic_loss=97364138276.5714 entropy=17.5683 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 124820] reward=-118694484.3 actor_loss=0.2503 critic_loss=103515983462.4000 entropy=17.5607 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 124820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571171.9 mean_steps=12.2
|
|
[Episode 124830] reward=-118609143.3 actor_loss=0.3457 critic_loss=103142571349.3333 entropy=17.5734 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 124840] reward=-115654687.2 actor_loss=0.4187 critic_loss=98725776952.8889 entropy=17.5662 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 124840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436732.2 mean_steps=14.3
|
|
[Episode 124850] reward=-114022073.8 actor_loss=0.3081 critic_loss=100503200591.4483 entropy=17.5507 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 124860] reward=-116321127.4 actor_loss=0.3096 critic_loss=94537839502.2222 entropy=17.5677 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 124860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505364.4 mean_steps=14.7
|
|
[Episode 124870] reward=-121122866.6 actor_loss=0.2512 critic_loss=99987814058.6667 entropy=17.5699 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 124880] reward=-115273416.7 actor_loss=0.4028 critic_loss=95431242865.7778 entropy=17.5569 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 124880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-385232.5 mean_steps=16.6
|
|
[Episode 124890] reward=-118541292.3 actor_loss=0.3028 critic_loss=100905515963.7333 entropy=17.5635 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 124900] reward=-120351725.5 actor_loss=0.3222 critic_loss=109668626139.4286 entropy=17.5605 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 124900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535895.8 mean_steps=14.2
|
|
[Episode 124910] reward=-119953568.4 actor_loss=0.3321 critic_loss=104760395245.0370 entropy=17.5737 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 124920] reward=-115963821.8 actor_loss=0.2497 critic_loss=99046899712.0000 entropy=17.5733 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 124920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-393890.4 mean_steps=14.4
|
|
[Episode 124930] reward=-113422707.2 actor_loss=0.2661 critic_loss=95813760159.2889 entropy=17.5738 approx_kl=0.0108 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 124940] reward=-107846195.4 actor_loss=0.4081 critic_loss=91228767027.2000 entropy=17.5786 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 124940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470056.7 mean_steps=14.6
|
|
[Episode 124950] reward=-118835844.4 actor_loss=0.3616 critic_loss=99571840000.0000 entropy=17.5790 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 124960] reward=-121573800.0 actor_loss=0.2317 critic_loss=100024617642.6667 entropy=17.5769 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 124960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-545917.5 mean_steps=14.7
|
|
[Episode 124970] reward=-114278142.0 actor_loss=0.4568 critic_loss=96132341248.0000 entropy=17.5736 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1523 front_blocked=0
|
|
[Episode 124980] reward=-119466588.1 actor_loss=0.2422 critic_loss=102676204657.7778 entropy=17.5653 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 124980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-627722.8 mean_steps=13.8
|
|
[Episode 124990] reward=-123200269.9 actor_loss=0.2569 critic_loss=162680996470.1538 entropy=17.5677 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 125000] reward=-116860712.7 actor_loss=0.3230 critic_loss=100801189068.8000 entropy=17.5814 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 125000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537671.2 mean_steps=13.3
|
|
[Episode 125010] reward=-116384710.2 actor_loss=0.3270 critic_loss=98871606178.9091 entropy=17.5713 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 125020] reward=-118669238.5 actor_loss=0.3227 critic_loss=100950202449.9200 entropy=17.5794 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 125020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434648.8 mean_steps=14.2
|
|
[Episode 125030] reward=-118947477.0 actor_loss=0.2570 critic_loss=104799995904.0000 entropy=17.5787 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 125040] reward=-114465206.0 actor_loss=0.2569 critic_loss=94608556812.1905 entropy=17.5825 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 125040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493117.1 mean_steps=14.6
|
|
[Episode 125050] reward=-115186860.4 actor_loss=0.3178 critic_loss=96402877741.1765 entropy=17.5757 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 125060] reward=-125904086.7 actor_loss=0.1835 critic_loss=109328674600.4211 entropy=17.5825 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 125060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575256.0 mean_steps=13.7
|
|
[Episode 125070] reward=-122424678.0 actor_loss=0.1939 critic_loss=107104969303.4146 entropy=17.5983 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 125080] reward=-116424581.8 actor_loss=0.2777 critic_loss=97624111910.7879 entropy=17.5932 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 125080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463642.9 mean_steps=14.7
|
|
[Episode 125090] reward=-118093698.0 actor_loss=0.3123 critic_loss=108201024716.8000 entropy=17.5922 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 125100] reward=-120190171.6 actor_loss=0.2052 critic_loss=99962968145.9200 entropy=17.6031 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 125100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558513.3 mean_steps=13.5
|
|
[Episode 125110] reward=-120855389.2 actor_loss=0.2249 critic_loss=100873753122.1333 entropy=17.5925 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 125120] reward=-121469752.2 actor_loss=0.2168 critic_loss=128772487577.6000 entropy=17.5896 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 125120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-381470.1 mean_steps=15.7
|
|
[Episode 125130] reward=-122118812.1 actor_loss=0.3527 critic_loss=102722983168.0000 entropy=17.5980 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 125140] reward=-122772264.2 actor_loss=0.2941 critic_loss=101496304071.1111 entropy=17.5977 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 125140] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-576832.6 mean_steps=11.4
|
|
[Episode 125150] reward=-115255666.9 actor_loss=0.3257 critic_loss=96395460251.8261 entropy=17.6057 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 125160] reward=-119941917.1 actor_loss=0.3142 critic_loss=98563107288.6154 entropy=17.6063 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 125160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399765.7 mean_steps=15.2
|
|
[Episode 125170] reward=-117977341.8 actor_loss=0.3131 critic_loss=99371527606.8571 entropy=17.6212 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 125180] reward=-116558721.0 actor_loss=0.3209 critic_loss=96142661789.5385 entropy=17.6217 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 125180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499571.1 mean_steps=14.8
|
|
[Episode 125190] reward=-118454847.5 actor_loss=0.2520 critic_loss=103656736190.3590 entropy=17.6215 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 125200] reward=-116884151.7 actor_loss=0.4045 critic_loss=96649519816.3478 entropy=17.6189 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 125200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470582.8 mean_steps=13.8
|
|
[Episode 125210] reward=-114599959.6 actor_loss=0.2205 critic_loss=97422273682.2857 entropy=17.6234 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 125220] reward=-118076179.5 actor_loss=0.2496 critic_loss=101399206972.2353 entropy=17.6357 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 125220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503962.2 mean_steps=13.8
|
|
[Episode 125230] reward=-124084845.9 actor_loss=0.2464 critic_loss=107862212111.5152 entropy=17.6246 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 125240] reward=-118749965.9 actor_loss=0.2075 critic_loss=103044081371.4286 entropy=17.6291 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 125240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539826.7 mean_steps=12.9
|
|
[Episode 125250] reward=-122893625.9 actor_loss=0.2922 critic_loss=108133524626.2857 entropy=17.6187 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 125260] reward=-120137614.5 actor_loss=0.3162 critic_loss=102252275939.5556 entropy=17.6187 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 125260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-459640.1 mean_steps=13.6
|
|
[Episode 125270] reward=-114942666.5 actor_loss=0.3070 critic_loss=96311562825.1429 entropy=17.6066 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 125280] reward=-120459320.5 actor_loss=0.2796 critic_loss=103585845052.9524 entropy=17.5976 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 125280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-465754.1 mean_steps=12.8
|
|
[Episode 125290] reward=-116573506.0 actor_loss=0.3333 critic_loss=94797470319.3044 entropy=17.5873 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 125300] reward=-117068277.6 actor_loss=0.3862 critic_loss=99668305812.2105 entropy=17.5872 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 125300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-523784.5 mean_steps=15.0
|
|
[Episode 125310] reward=-116716428.6 actor_loss=0.2795 critic_loss=105555894272.0000 entropy=17.5844 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 125320] reward=-112477305.5 actor_loss=0.2559 critic_loss=95183309062.5641 entropy=17.5916 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 125320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489439.6 mean_steps=13.8
|
|
[Episode 125330] reward=-119373862.4 actor_loss=0.2438 critic_loss=99338537642.6667 entropy=17.5910 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 125340] reward=-118547478.5 actor_loss=0.2025 critic_loss=103226225757.0909 entropy=17.5908 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 125340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612854.4 mean_steps=12.6
|
|
[Episode 125350] reward=-121505225.9 actor_loss=0.2843 critic_loss=108077786180.2667 entropy=17.5848 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 125360] reward=-116539437.4 actor_loss=0.3351 critic_loss=101516416614.4000 entropy=17.5815 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 125360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-578874.4 mean_steps=11.7
|
|
[Episode 125370] reward=-111040218.5 actor_loss=0.3393 critic_loss=101106259148.8000 entropy=17.5779 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 125380] reward=-112054541.5 actor_loss=0.2939 critic_loss=93331848912.5926 entropy=17.5773 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 125380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453652.3 mean_steps=14.2
|
|
[Episode 125390] reward=-125261557.6 actor_loss=0.2636 critic_loss=111667839249.0667 entropy=17.5718 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 125400] reward=-116767620.0 actor_loss=0.3148 critic_loss=97042857487.5152 entropy=17.5775 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 125400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-414888.4 mean_steps=16.3
|
|
[Episode 125410] reward=-119559746.0 actor_loss=0.2938 critic_loss=102987076446.3158 entropy=17.5789 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 125420] reward=-115405880.6 actor_loss=0.2517 critic_loss=96162465319.3846 entropy=17.5753 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 125420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410638.1 mean_steps=15.6
|
|
[Episode 125430] reward=-114952104.4 actor_loss=0.4277 critic_loss=103680040656.5926 entropy=17.5703 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 125440] reward=-122535570.3 actor_loss=0.1930 critic_loss=107548177671.3143 entropy=17.5728 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 125440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486894.4 mean_steps=14.8
|
|
[Episode 125450] reward=-117776506.0 actor_loss=0.1928 critic_loss=98890290661.0526 entropy=17.5739 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 125460] reward=-116584078.8 actor_loss=0.3936 critic_loss=129965654298.4828 entropy=17.5862 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 125460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481701.7 mean_steps=14.8
|
|
[Episode 125470] reward=-116338968.2 actor_loss=0.4691 critic_loss=118325434413.5111 entropy=17.5889 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 125480] reward=-114223674.1 actor_loss=0.2905 critic_loss=98294922960.5926 entropy=17.5848 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 125480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508218.5 mean_steps=14.3
|
|
[Episode 125490] reward=-118427855.1 actor_loss=0.3753 critic_loss=98640570906.9474 entropy=17.5813 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 125500] reward=-115344241.4 actor_loss=0.2994 critic_loss=99711016960.0000 entropy=17.5700 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 125500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462856.6 mean_steps=14.5
|
|
[Episode 125510] reward=-113028697.5 actor_loss=0.2796 critic_loss=94134971890.8718 entropy=17.5757 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 125520] reward=-118289887.0 actor_loss=0.2927 critic_loss=100106852194.4615 entropy=17.5836 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 125520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-543655.4 mean_steps=12.2
|
|
[Episode 125530] reward=-121714797.2 actor_loss=0.2467 critic_loss=105099794659.5556 entropy=17.5711 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 125540] reward=-116437502.3 actor_loss=0.2728 critic_loss=98542637966.2222 entropy=17.5756 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 125540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549178.5 mean_steps=13.4
|
|
[Episode 125550] reward=-140760237.6 actor_loss=0.2818 critic_loss=1774659971754.6667 entropy=17.5735 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 125560] reward=-114055139.2 actor_loss=0.2840 critic_loss=88752089889.3913 entropy=17.5775 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 125560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-652350.3 mean_steps=14.2
|
|
[Episode 125570] reward=-117610874.0 actor_loss=0.2551 critic_loss=100821125120.0000 entropy=17.5704 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 125580] reward=-115616861.8 actor_loss=0.3081 critic_loss=101296027420.4444 entropy=17.5642 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 125580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-383990.8 mean_steps=16.9
|
|
[Episode 125590] reward=-111925224.2 actor_loss=0.3709 critic_loss=94836056064.0000 entropy=17.5629 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 125600] reward=-113641262.9 actor_loss=0.2254 critic_loss=95884403624.2286 entropy=17.5650 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 125600] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-626578.2 mean_steps=10.9
|
|
[Episode 125610] reward=-119370642.3 actor_loss=0.1947 critic_loss=96129925832.3478 entropy=17.5637 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 125620] reward=-115329287.4 actor_loss=0.3062 critic_loss=106326051108.5714 entropy=17.5492 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 125620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436760.8 mean_steps=14.9
|
|
[Episode 125630] reward=-119846283.9 actor_loss=0.2050 critic_loss=104770586396.4444 entropy=17.5486 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 125640] reward=-118694189.8 actor_loss=0.2862 critic_loss=100351824860.6897 entropy=17.5322 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 125640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397322.7 mean_steps=14.8
|
|
[Episode 125650] reward=-120718957.9 actor_loss=0.6024 critic_loss=163261561241.6000 entropy=17.5325 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 125660] reward=-120884649.5 actor_loss=0.3634 critic_loss=102967802013.5385 entropy=17.5459 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 125660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533675.7 mean_steps=14.2
|
|
[Episode 125670] reward=-121893844.0 actor_loss=0.3088 critic_loss=103760685899.2941 entropy=17.5279 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 125680] reward=-116727601.3 actor_loss=0.3561 critic_loss=96937765102.1395 entropy=17.5309 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 125680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-520163.4 mean_steps=14.8
|
|
[Episode 125690] reward=-117051489.2 actor_loss=0.2238 critic_loss=99361848069.6889 entropy=17.5183 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 125700] reward=-528871287.4 actor_loss=17.1662 critic_loss=516778679179673.6250 entropy=17.5159 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 125700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-441880.6 mean_steps=15.8
|
|
[Episode 125710] reward=-118454784.9 actor_loss=0.2869 critic_loss=101060776680.7273 entropy=17.5081 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 125720] reward=-109070697.1 actor_loss=0.4095 critic_loss=93335041445.6471 entropy=17.4971 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 125720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446849.7 mean_steps=14.7
|
|
[Episode 125730] reward=-115342704.0 actor_loss=0.3482 critic_loss=95067470879.0303 entropy=17.4901 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 125740] reward=-118403155.3 actor_loss=0.2798 critic_loss=97981487513.6000 entropy=17.4941 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 125740] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-658376.7 mean_steps=10.4
|
|
[Episode 125750] reward=-114755440.6 actor_loss=0.3202 critic_loss=92526695765.3333 entropy=17.4993 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 125760] reward=-118957463.7 actor_loss=0.2749 critic_loss=101337370214.4000 entropy=17.5068 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 125760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563788.4 mean_steps=12.6
|
|
[Episode 125770] reward=-119113955.8 actor_loss=0.2716 critic_loss=100772996528.3556 entropy=17.5043 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 125780] reward=-115031634.5 actor_loss=0.3187 critic_loss=100782795798.7556 entropy=17.4972 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 125780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463314.8 mean_steps=15.8
|
|
[Episode 125790] reward=-114376781.2 actor_loss=0.2883 critic_loss=93186610705.6552 entropy=17.4895 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 125800] reward=-118607565.6 actor_loss=0.2439 critic_loss=97132418692.7407 entropy=17.4874 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 125800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463235.8 mean_steps=15.6
|
|
[Episode 125810] reward=-113458186.1 actor_loss=0.2653 critic_loss=94503111338.6667 entropy=17.5004 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 125820] reward=-117542243.7 actor_loss=0.2345 critic_loss=100029878695.7241 entropy=17.5023 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 125820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471719.8 mean_steps=14.9
|
|
[Episode 125830] reward=-116225869.9 actor_loss=0.3510 critic_loss=98422065081.3793 entropy=17.5172 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 125840] reward=-122054641.9 actor_loss=0.3123 critic_loss=105741482963.4783 entropy=17.5168 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 125840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398863.6 mean_steps=14.7
|
|
[Episode 125850] reward=-228527584.3 actor_loss=0.9185 critic_loss=47635976758418.2891 entropy=17.5209 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 125860] reward=-119042002.0 actor_loss=0.3103 critic_loss=97561991213.5111 entropy=17.5286 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 125860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-397476.6 mean_steps=15.8
|
|
[Episode 125870] reward=-121933137.3 actor_loss=0.2944 critic_loss=102378324399.1579 entropy=17.5245 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 125880] reward=-118331476.6 actor_loss=0.3438 critic_loss=96915508272.7619 entropy=17.4953 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 125880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537102.9 mean_steps=13.8
|
|
[Episode 125890] reward=-119794810.2 actor_loss=0.3211 critic_loss=98812156495.6444 entropy=17.5078 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 125900] reward=-114920783.5 actor_loss=0.2965 critic_loss=98637046052.5714 entropy=17.4883 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 125900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526119.1 mean_steps=13.1
|
|
[Episode 125910] reward=-114824327.7 actor_loss=0.2782 critic_loss=91106755407.4483 entropy=17.4843 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 125920] reward=-122744130.3 actor_loss=0.1515 critic_loss=157340819456.0000 entropy=17.4974 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 125920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481372.2 mean_steps=14.0
|
|
[Episode 125930] reward=-123018432.5 actor_loss=0.2385 critic_loss=107815948603.0769 entropy=17.5020 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 125940] reward=-162342301.4 actor_loss=0.4539 critic_loss=8334171844235.6367 entropy=17.4956 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 125940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530524.3 mean_steps=13.4
|
|
[Episode 125950] reward=-115402435.3 actor_loss=0.3104 critic_loss=105768092392.7273 entropy=17.4979 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 125960] reward=-118181916.5 actor_loss=0.3584 critic_loss=98565458784.7111 entropy=17.4994 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 125960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478898.4 mean_steps=14.0
|
|
[Episode 125970] reward=-118585108.8 actor_loss=0.2650 critic_loss=93743739699.2000 entropy=17.4952 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 125980] reward=-112847453.6 actor_loss=0.3689 critic_loss=92820165245.1555 entropy=17.4900 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 125980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-409837.8 mean_steps=13.4
|
|
[Episode 125990] reward=-117425283.1 actor_loss=0.3290 critic_loss=102728810678.0444 entropy=17.5048 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 126000] reward=-117557286.2 actor_loss=0.2629 critic_loss=97738766745.6000 entropy=17.5034 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 126000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-585541.3 mean_steps=13.4
|
|
[Episode 126010] reward=-114182150.4 actor_loss=0.4114 critic_loss=92222257561.6000 entropy=17.5125 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 126020] reward=-118556982.4 actor_loss=0.2610 critic_loss=95247128646.6207 entropy=17.5310 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 126020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557686.6 mean_steps=13.6
|
|
[Episode 126030] reward=-119800753.9 actor_loss=0.3076 critic_loss=105095038098.2857 entropy=17.5333 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 126040] reward=-118197926.5 actor_loss=0.2576 critic_loss=96559136768.0000 entropy=17.5326 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 126040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-552077.0 mean_steps=12.6
|
|
[Episode 126050] reward=-121106428.8 actor_loss=0.2888 critic_loss=100421012945.4545 entropy=17.5389 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 126060] reward=-117375373.0 actor_loss=0.2852 critic_loss=102031542954.6667 entropy=17.5330 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 126060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378100.5 mean_steps=16.1
|
|
[Episode 126070] reward=-114379407.8 actor_loss=0.3202 critic_loss=92097093632.0000 entropy=17.5225 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 126080] reward=-118043055.2 actor_loss=0.3214 critic_loss=103336406861.9130 entropy=17.5155 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 126080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480141.4 mean_steps=13.9
|
|
[Episode 126090] reward=-116041875.3 actor_loss=0.2923 critic_loss=99040059192.1951 entropy=17.5122 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 126100] reward=-125230108.8 actor_loss=0.2775 critic_loss=222585256345.6000 entropy=17.5029 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 126100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-630980.3 mean_steps=11.7
|
|
[Episode 126110] reward=-120013304.6 actor_loss=0.2999 critic_loss=100003340470.0444 entropy=17.5044 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 126120] reward=-225258616.1 actor_loss=0.2395 critic_loss=37628288265238.7578 entropy=17.5006 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 126120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390255.7 mean_steps=15.1
|
|
[Episode 126130] reward=-116534361.7 actor_loss=0.3154 critic_loss=103517860522.6667 entropy=17.4946 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 126140] reward=-119754263.4 actor_loss=0.2955 critic_loss=103962125000.3478 entropy=17.5011 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 126140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536094.4 mean_steps=13.3
|
|
[Episode 126150] reward=-117875481.5 actor_loss=0.3181 critic_loss=102339188209.3714 entropy=17.5000 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 126160] reward=-115827327.8 actor_loss=0.3186 critic_loss=103359566097.0667 entropy=17.4902 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 126160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548615.5 mean_steps=12.4
|
|
[Episode 126170] reward=-116885308.3 actor_loss=0.1823 critic_loss=110532749312.0000 entropy=17.5012 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 126180] reward=-114188680.0 actor_loss=0.3015 critic_loss=107708485106.8718 entropy=17.5269 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 126180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-547805.9 mean_steps=11.2
|
|
[Episode 126190] reward=-117427851.5 actor_loss=0.3276 critic_loss=93569886344.5333 entropy=17.5256 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 126200] reward=-656642752.4 actor_loss=0.8459 critic_loss=824812725542712.2500 entropy=17.5412 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 126200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545977.9 mean_steps=13.4
|
|
[Episode 126210] reward=-123188804.7 actor_loss=0.1849 critic_loss=115688406608.8421 entropy=17.5437 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 126220] reward=-113906345.0 actor_loss=0.2696 critic_loss=97763117145.0435 entropy=17.5368 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 126220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-393901.5 mean_steps=16.9
|
|
[Episode 126230] reward=-112521709.0 actor_loss=0.2719 critic_loss=95379816448.0000 entropy=17.5413 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 126240] reward=-118463743.3 actor_loss=0.2744 critic_loss=103463549114.1818 entropy=17.5601 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 126240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519625.4 mean_steps=14.1
|
|
[Episode 126250] reward=-111246852.6 actor_loss=0.2807 critic_loss=108634008371.2000 entropy=17.5577 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 126260] reward=-114828377.8 actor_loss=0.2951 critic_loss=96851732532.5128 entropy=17.5735 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 126260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-624527.8 mean_steps=13.4
|
|
[Episode 126270] reward=-120402108.2 actor_loss=0.2061 critic_loss=109049865452.3077 entropy=17.5755 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 126280] reward=-126476346.3 actor_loss=0.6647 critic_loss=564241875688.7273 entropy=17.5926 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 126280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551610.9 mean_steps=13.2
|
|
[Episode 126290] reward=-119280016.8 actor_loss=0.3314 critic_loss=106581106232.8889 entropy=17.5703 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 126300] reward=-137456781.5 actor_loss=0.3684 critic_loss=2106447473322.6667 entropy=17.5896 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 126300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-459048.3 mean_steps=13.3
|
|
[Episode 126310] reward=-113031617.7 actor_loss=0.2145 critic_loss=100007829248.0000 entropy=17.5986 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 126320] reward=-880783126.8 actor_loss=0.5583 critic_loss=1642811609958172.5000 entropy=17.6171 approx_kl=0.0023 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 126320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509465.4 mean_steps=13.2
|
|
[Episode 126330] reward=-151508316.3 actor_loss=0.6107 critic_loss=5638782521617.0664 entropy=17.6172 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 126340] reward=-408577757.4 actor_loss=0.3544 critic_loss=211223956020701.8750 entropy=17.6190 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 126340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553981.2 mean_steps=14.1
|
|
[Episode 126350] reward=-116767085.0 actor_loss=0.2246 critic_loss=120044400230.4000 entropy=17.6205 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 126360] reward=-112804484.6 actor_loss=0.2608 critic_loss=98902314908.0976 entropy=17.6288 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 126360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375508.3 mean_steps=15.9
|
|
[Episode 126370] reward=-115799915.0 actor_loss=0.2510 critic_loss=98205373599.2889 entropy=17.6267 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 126380] reward=-115942354.0 actor_loss=0.3119 critic_loss=97733647473.7778 entropy=17.6235 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 126380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-586051.4 mean_steps=13.3
|
|
[Episode 126390] reward=-110984923.4 actor_loss=0.3396 critic_loss=93517412882.9630 entropy=17.6251 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 126400] reward=-116966300.1 actor_loss=0.2509 critic_loss=101046912978.4889 entropy=17.6131 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 126400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-556038.2 mean_steps=12.4
|
|
[Episode 126410] reward=-118930882.8 actor_loss=0.2292 critic_loss=107058809696.7111 entropy=17.6126 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 126420] reward=-120045764.1 actor_loss=0.3180 critic_loss=110627290018.9091 entropy=17.6087 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 126420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551173.9 mean_steps=13.2
|
|
[Episode 126430] reward=-120840244.1 actor_loss=0.2751 critic_loss=105010391147.7895 entropy=17.6093 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 126440] reward=-112110805.9 actor_loss=0.3853 critic_loss=122201672265.1429 entropy=17.6165 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 126440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505036.4 mean_steps=14.2
|
|
[Episode 126450] reward=-108997132.8 actor_loss=0.3788 critic_loss=117877660110.4516 entropy=17.6211 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 126460] reward=-116820543.2 actor_loss=0.2606 critic_loss=99989522350.0800 entropy=17.6252 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 126460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456495.9 mean_steps=14.7
|
|
[Episode 126470] reward=-119649695.7 actor_loss=0.2995 critic_loss=104148870758.4000 entropy=17.6333 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 126480] reward=-117221040.2 actor_loss=0.2246 critic_loss=100443952287.2889 entropy=17.6174 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 126480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567310.6 mean_steps=12.6
|
|
[Episode 126490] reward=-116624817.4 actor_loss=0.2462 critic_loss=105388824037.0526 entropy=17.6190 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 126500] reward=-182403925.6 actor_loss=0.4379 critic_loss=18697582154874.8789 entropy=17.6231 approx_kl=0.0039 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 126500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499922.0 mean_steps=14.3
|
|
[Episode 126510] reward=-111275117.2 actor_loss=0.3210 critic_loss=92869681479.6800 entropy=17.6215 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 126520] reward=-115819368.4 actor_loss=0.3314 critic_loss=101322741805.5111 entropy=17.6492 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 126520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-374098.6 mean_steps=16.1
|
|
[Episode 126530] reward=-202618858.1 actor_loss=0.4191 critic_loss=24802991538176.0000 entropy=17.6414 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 126540] reward=-115248745.2 actor_loss=0.2897 critic_loss=107778029940.3636 entropy=17.6479 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 126540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510050.5 mean_steps=13.2
|
|
[Episode 126550] reward=-114975810.8 actor_loss=0.2313 critic_loss=100172832103.7838 entropy=17.6527 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 126560] reward=-113927992.6 actor_loss=0.2508 critic_loss=95497105740.1081 entropy=17.6693 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 126560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493799.4 mean_steps=14.7
|
|
[Episode 126570] reward=-118131159.7 actor_loss=0.2591 critic_loss=96795339854.7692 entropy=17.6493 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 126580] reward=-118249282.4 actor_loss=0.3331 critic_loss=101157608015.6444 entropy=17.6685 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 126580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551358.7 mean_steps=13.7
|
|
[Episode 126590] reward=-112648545.8 actor_loss=0.2799 critic_loss=93293548157.1555 entropy=17.6754 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 126600] reward=-119007542.6 actor_loss=0.3037 critic_loss=106429752088.7742 entropy=17.6835 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 126600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420355.4 mean_steps=14.1
|
|
[Episode 126610] reward=-116534878.8 actor_loss=0.3488 critic_loss=100459986944.0000 entropy=17.6827 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 126620] reward=-115205656.4 actor_loss=0.1694 critic_loss=95112057357.1282 entropy=17.6891 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 126620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401438.0 mean_steps=15.2
|
|
[Episode 126630] reward=-115596390.6 actor_loss=0.2299 critic_loss=103023520901.5652 entropy=17.6947 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 126640] reward=-119953163.9 actor_loss=0.2814 critic_loss=106387013947.0769 entropy=17.6938 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 126640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-635681.3 mean_steps=12.4
|
|
[Episode 126650] reward=-124288778.0 actor_loss=0.3385 critic_loss=108034454394.4348 entropy=17.7009 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 126660] reward=-119066403.7 actor_loss=0.2829 critic_loss=106565668955.0222 entropy=17.6970 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 126660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-512263.1 mean_steps=15.3
|
|
[Episode 126670] reward=-118387035.8 actor_loss=0.2854 critic_loss=104549142186.6667 entropy=17.7107 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 126680] reward=-117266010.2 actor_loss=0.2546 critic_loss=95416361275.0769 entropy=17.7166 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 126680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397470.0 mean_steps=15.3
|
|
[Episode 126690] reward=-115494330.8 actor_loss=0.3252 critic_loss=96447759616.0000 entropy=17.7010 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 126700] reward=-118970581.1 actor_loss=0.3218 critic_loss=99203774084.7407 entropy=17.6992 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 126700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511118.3 mean_steps=14.0
|
|
[Episode 126710] reward=-120001162.7 actor_loss=0.2616 critic_loss=105575386112.0000 entropy=17.6934 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 126720] reward=-339210489.2 actor_loss=0.6077 critic_loss=159426543056759.4688 entropy=17.6987 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 126720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544439.2 mean_steps=13.2
|
|
[Episode 126730] reward=-117980128.1 actor_loss=0.2860 critic_loss=106547012148.9655 entropy=17.7170 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 126740] reward=-118204297.8 actor_loss=0.3218 critic_loss=101903013205.3333 entropy=17.7199 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 126740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-372321.0 mean_steps=15.7
|
|
[Episode 126750] reward=-250877216.7 actor_loss=0.2554 critic_loss=57818849086304.7109 entropy=17.7372 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 126760] reward=-137690010.4 actor_loss=0.1919 critic_loss=1690972940190.4761 entropy=17.7519 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 126760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499626.8 mean_steps=13.8
|
|
[Episode 126770] reward=-117223672.3 actor_loss=0.3179 critic_loss=104706053059.7647 entropy=17.7715 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 126780] reward=-129166318.9 actor_loss=0.2432 critic_loss=1115046712898.7827 entropy=17.7678 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 126780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-339905.9 mean_steps=14.8
|
|
[Episode 126790] reward=-119447814.4 actor_loss=0.2780 critic_loss=107321006307.5556 entropy=17.7701 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 126800] reward=-121361857.4 actor_loss=0.2804 critic_loss=110489123225.6000 entropy=17.7867 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 126800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-2080691.4 mean_steps=15.2
|
|
[Episode 126810] reward=-114884382.8 actor_loss=0.2580 critic_loss=100319143377.4545 entropy=17.7958 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 126820] reward=-144384560.6 actor_loss=0.5700 critic_loss=3681836204032.0000 entropy=17.8004 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 126820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-404704.9 mean_steps=15.5
|
|
[Episode 126830] reward=-111159108.7 actor_loss=0.3029 critic_loss=90501624081.0667 entropy=17.8123 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 126840] reward=-117552754.3 actor_loss=0.3197 critic_loss=102281714136.6154 entropy=17.7910 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 126840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462452.4 mean_steps=15.3
|
|
[Episode 126850] reward=-109457040.7 actor_loss=0.3724 critic_loss=96723832194.8445 entropy=17.7971 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 126860] reward=-115172905.7 actor_loss=0.3348 critic_loss=93253778181.6889 entropy=17.7780 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 126860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580666.5 mean_steps=12.6
|
|
[Episode 126870] reward=-119517068.3 actor_loss=0.3046 critic_loss=99506724499.9111 entropy=17.7700 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 126880] reward=-112427012.7 actor_loss=0.3847 critic_loss=94713593656.1951 entropy=17.7498 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 126880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559532.0 mean_steps=12.4
|
|
[Episode 126890] reward=-119962243.9 actor_loss=0.2824 critic_loss=99428074200.1778 entropy=17.7741 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 126900] reward=-119424328.2 actor_loss=0.2727 critic_loss=100864056251.7333 entropy=17.7713 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 126900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368037.9 mean_steps=15.6
|
|
[Episode 126910] reward=-115407848.9 actor_loss=0.2351 critic_loss=95897589467.4286 entropy=17.7615 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 126920] reward=-121455782.2 actor_loss=0.1934 critic_loss=103592129331.2000 entropy=17.7419 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 126920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-541424.8 mean_steps=14.9
|
|
[Episode 126930] reward=-115369686.1 actor_loss=0.2622 critic_loss=99424994613.5814 entropy=17.7314 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 126940] reward=-117100934.1 actor_loss=0.3090 critic_loss=103436254321.7778 entropy=17.7056 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 126940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528309.2 mean_steps=12.9
|
|
[Episode 126950] reward=-119300537.7 actor_loss=0.5503 critic_loss=105188668881.4545 entropy=17.7057 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 126960] reward=-122768768.2 actor_loss=0.3191 critic_loss=334338206026.3226 entropy=17.7058 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 126960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-475982.6 mean_steps=15.3
|
|
[Episode 126970] reward=-118770138.1 actor_loss=0.3761 critic_loss=107570298515.9111 entropy=17.7011 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 126980] reward=-119886467.5 actor_loss=0.2432 critic_loss=104692992434.4242 entropy=17.6946 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 126980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414204.0 mean_steps=15.7
|
|
[Episode 126990] reward=-113760259.8 actor_loss=0.3750 critic_loss=102957889308.4444 entropy=17.6957 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 127000] reward=-117784093.4 actor_loss=0.2481 critic_loss=104369338231.4667 entropy=17.6878 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 127000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574961.8 mean_steps=13.2
|
|
[Episode 127010] reward=-120796459.9 actor_loss=0.2236 critic_loss=106267446112.7111 entropy=17.6961 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 127020] reward=-118845313.6 actor_loss=0.3667 critic_loss=103194303419.7333 entropy=17.6812 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 127020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419905.5 mean_steps=15.2
|
|
[Episode 127030] reward=-117789312.4 actor_loss=0.3091 critic_loss=101747629351.8222 entropy=17.6702 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 127040] reward=-116985953.7 actor_loss=0.2683 critic_loss=97066497911.4667 entropy=17.6725 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 127040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-460462.7 mean_steps=15.4
|
|
[Episode 127050] reward=-118354888.7 actor_loss=0.2616 critic_loss=98650800609.8824 entropy=17.6742 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 127060] reward=-115861389.4 actor_loss=0.2499 critic_loss=99637348682.3226 entropy=17.6667 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 127060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600201.5 mean_steps=12.6
|
|
[Episode 127070] reward=-114794857.6 actor_loss=0.2739 critic_loss=97269461447.1111 entropy=17.6549 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 127080] reward=-115409223.7 actor_loss=0.2784 critic_loss=93261420407.4667 entropy=17.6373 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 127080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444429.1 mean_steps=15.2
|
|
[Episode 127090] reward=-131919457.1 actor_loss=0.2640 critic_loss=1080170763605.3334 entropy=17.6495 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 127100] reward=-124934164.2 actor_loss=0.1794 critic_loss=105606782785.4884 entropy=17.6446 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 127100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-512338.8 mean_steps=14.7
|
|
[Episode 127110] reward=-117511613.4 actor_loss=0.3137 critic_loss=103328374784.0000 entropy=17.6463 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 127120] reward=-118545099.5 actor_loss=0.3025 critic_loss=106454994124.8000 entropy=17.6346 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 127120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427014.8 mean_steps=15.2
|
|
[Episode 127130] reward=-124033470.1 actor_loss=0.1921 critic_loss=110198915800.1778 entropy=17.6013 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 127140] reward=-114467995.7 actor_loss=0.2357 critic_loss=110570501461.3333 entropy=17.5996 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 127140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456271.1 mean_steps=13.6
|
|
[Episode 127150] reward=-114250359.0 actor_loss=0.3233 critic_loss=105751411461.6889 entropy=17.5901 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 127160] reward=-116138079.5 actor_loss=0.3372 critic_loss=103773667958.1538 entropy=17.5841 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 127160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548654.1 mean_steps=13.4
|
|
[Episode 127170] reward=-117953027.8 actor_loss=0.2975 critic_loss=93687375644.4444 entropy=17.5907 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 127180] reward=-1064512611.2 actor_loss=0.3109 critic_loss=1957186597633684.0000 entropy=17.5820 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 127180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540725.3 mean_steps=13.3
|
|
[Episode 127190] reward=-121368426.1 actor_loss=0.2868 critic_loss=124280662698.6667 entropy=17.5831 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 127200] reward=-236879945.1 actor_loss=0.2118 critic_loss=49355477291281.0703 entropy=17.5842 approx_kl=0.0013 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 127200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500270.8 mean_steps=14.8
|
|
[Episode 127210] reward=-133776411.0 actor_loss=0.2723 critic_loss=1681787530444.8000 entropy=17.5935 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 127220] reward=-122439385.7 actor_loss=0.3605 critic_loss=343601137152.0000 entropy=17.6166 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 127220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510587.5 mean_steps=13.4
|
|
[Episode 127230] reward=-118120388.4 actor_loss=0.1708 critic_loss=100730516457.2444 entropy=17.6290 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 127240] reward=-114369684.6 actor_loss=0.3354 critic_loss=95438783785.2903 entropy=17.6260 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 127240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-407156.0 mean_steps=15.2
|
|
[Episode 127250] reward=-119045052.7 actor_loss=0.2365 critic_loss=99717407857.7778 entropy=17.6168 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 127260] reward=-113597809.1 actor_loss=0.2575 critic_loss=90180290696.5333 entropy=17.6121 approx_kl=0.0107 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 127260] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-266898.4 mean_steps=17.6
|
|
[Episode 127270] reward=-116486229.1 actor_loss=0.2768 critic_loss=101550885156.5714 entropy=17.6137 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 127280] reward=-115635368.8 actor_loss=0.2770 critic_loss=99875804610.5600 entropy=17.6156 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 127280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589931.9 mean_steps=13.4
|
|
[Episode 127290] reward=-115529872.6 actor_loss=0.3530 critic_loss=107643262293.3333 entropy=17.6009 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 127300] reward=-116622824.6 actor_loss=0.3578 critic_loss=103654702011.7333 entropy=17.5929 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 127300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511324.6 mean_steps=14.1
|
|
[Episode 127310] reward=-117923736.7 actor_loss=0.3072 critic_loss=97480972697.6000 entropy=17.5753 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 127320] reward=-114775813.0 actor_loss=0.3127 critic_loss=94396392474.2564 entropy=17.5826 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 127320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-650016.9 mean_steps=11.8
|
|
[Episode 127330] reward=-122105637.8 actor_loss=0.3544 critic_loss=108724397852.4444 entropy=17.5801 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 127340] reward=-118386094.1 actor_loss=0.2673 critic_loss=104528585432.1778 entropy=17.5764 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 127340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-512119.2 mean_steps=15.0
|
|
[Episode 127350] reward=-125115977.1 actor_loss=0.2410 critic_loss=108603573862.4000 entropy=17.5642 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 127360] reward=-121970829.2 actor_loss=0.2494 critic_loss=104217203545.3023 entropy=17.5640 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 127360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450955.1 mean_steps=14.2
|
|
[Episode 127370] reward=-126295429.9 actor_loss=0.3429 critic_loss=469767044156.2353 entropy=17.5962 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 127380] reward=-120539125.2 actor_loss=0.2943 critic_loss=101067969247.1795 entropy=17.5862 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 127380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-418934.4 mean_steps=14.2
|
|
[Episode 127390] reward=-119433476.6 actor_loss=0.1883 critic_loss=97933977372.4444 entropy=17.5865 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 127400] reward=-124843782.5 actor_loss=0.2064 critic_loss=110619020567.2727 entropy=17.5848 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 127400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519589.6 mean_steps=12.8
|
|
[Episode 127410] reward=-116929865.2 actor_loss=0.2536 critic_loss=98708785470.5778 entropy=17.5887 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 127420] reward=-304250926.6 actor_loss=0.3419 critic_loss=108443524818534.4062 entropy=17.5995 approx_kl=0.0015 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 127420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495908.5 mean_steps=14.1
|
|
[Episode 127430] reward=-120225068.7 actor_loss=0.2525 critic_loss=103568686882.5946 entropy=17.5932 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 127440] reward=-296992698.2 actor_loss=0.5210 critic_loss=119141600720031.2812 entropy=17.5981 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 127440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504310.1 mean_steps=13.9
|
|
[Episode 127450] reward=-117602764.3 actor_loss=0.2995 critic_loss=98467278392.8889 entropy=17.6108 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 127460] reward=-118913062.0 actor_loss=0.3019 critic_loss=99543067488.7111 entropy=17.6249 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 127460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446948.7 mean_steps=15.4
|
|
[Episode 127470] reward=-122591737.8 actor_loss=0.2146 critic_loss=104278886772.3636 entropy=17.6191 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 127480] reward=-118820043.0 actor_loss=0.1775 critic_loss=97775035460.2667 entropy=17.6031 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 127480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466034.3 mean_steps=14.8
|
|
[Episode 127490] reward=-120377698.9 actor_loss=0.3159 critic_loss=105171979543.2727 entropy=17.6087 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 127500] reward=-116549643.5 actor_loss=0.2945 critic_loss=98164016560.3556 entropy=17.5969 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 127500] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-715387.5 mean_steps=10.4
|
|
[Episode 127510] reward=-119233086.1 actor_loss=0.3044 critic_loss=101046672615.2258 entropy=17.6049 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 127520] reward=-119194070.2 actor_loss=0.2473 critic_loss=103251632128.0000 entropy=17.6062 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 127520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444627.5 mean_steps=15.2
|
|
[Episode 127530] reward=-115550361.1 actor_loss=0.1829 critic_loss=94095739790.2222 entropy=17.5990 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 127540] reward=-119851199.7 actor_loss=0.3774 critic_loss=102980310501.0526 entropy=17.6097 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 127540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513836.5 mean_steps=14.2
|
|
[Episode 127550] reward=-164115152.0 actor_loss=0.2669 critic_loss=8350662283172.9775 entropy=17.6213 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 127560] reward=-160517069.7 actor_loss=0.3041 critic_loss=6422746348475.7334 entropy=17.6280 approx_kl=0.0001 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 127560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479985.1 mean_steps=14.4
|
|
[Episode 127570] reward=-306815186.0 actor_loss=0.2304 critic_loss=86953992126464.0000 entropy=17.6497 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 127580] reward=-112641610.5 actor_loss=0.2576 critic_loss=96233126765.7143 entropy=17.6747 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 127580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570343.7 mean_steps=13.2
|
|
[Episode 127590] reward=-110536444.5 actor_loss=0.2751 critic_loss=97520404616.5333 entropy=17.6612 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 127600] reward=-120900540.0 actor_loss=0.2106 critic_loss=106873219754.6667 entropy=17.6543 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 127600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603013.8 mean_steps=11.7
|
|
[Episode 127610] reward=-137115927.5 actor_loss=0.3000 critic_loss=2592235183581.8667 entropy=17.6516 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 127620] reward=-115153480.5 actor_loss=0.3562 critic_loss=96234735752.5333 entropy=17.6430 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 127620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-492101.3 mean_steps=12.1
|
|
[Episode 127630] reward=-181935611.7 actor_loss=0.2399 critic_loss=10173753828072.7266 entropy=17.6465 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 127640] reward=-112028945.1 actor_loss=0.2890 critic_loss=97119832382.5778 entropy=17.6494 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 127640] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644602.8 mean_steps=12.3
|
|
[Episode 127650] reward=-379987234.7 actor_loss=0.2879 critic_loss=99428028173516.7969 entropy=17.6617 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 127660] reward=-146984795.1 actor_loss=0.3140 critic_loss=5136543942792.5332 entropy=17.6669 approx_kl=-0.0005 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 127660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445165.4 mean_steps=14.4
|
|
[Episode 127670] reward=-112215481.9 actor_loss=0.3715 critic_loss=100938386636.8000 entropy=17.6607 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 127680] reward=-1790408646.9 actor_loss=0.2655 critic_loss=3920684930074760.5000 entropy=17.6793 approx_kl=0.0011 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 127680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437209.0 mean_steps=14.3
|
|
[Episode 127690] reward=-120651262.0 actor_loss=0.2810 critic_loss=101934654896.3556 entropy=17.6757 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 127700] reward=-118989584.9 actor_loss=0.2655 critic_loss=100530265565.8667 entropy=17.6635 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 127700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-534950.0 mean_steps=14.8
|
|
[Episode 127710] reward=-117449295.1 actor_loss=0.2668 critic_loss=100232733286.4000 entropy=17.6493 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 127720] reward=-228169744.4 actor_loss=0.5693 critic_loss=46780880956074.6641 entropy=17.6567 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 127720] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-702275.3 mean_steps=11.6
|
|
[Episode 127730] reward=-123328824.1 actor_loss=0.1642 critic_loss=105750763617.5238 entropy=17.6566 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 127740] reward=-114543794.5 actor_loss=0.2949 critic_loss=102052130169.2632 entropy=17.6585 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 127740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588912.8 mean_steps=13.6
|
|
[Episode 127750] reward=-114562820.9 actor_loss=0.2465 critic_loss=98509326745.6000 entropy=17.6457 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 127760] reward=-113200585.2 actor_loss=0.2278 critic_loss=93836585283.3684 entropy=17.6392 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 127760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521811.6 mean_steps=14.0
|
|
[Episode 127770] reward=-119061857.9 actor_loss=0.2542 critic_loss=95963407155.2000 entropy=17.6372 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 127780] reward=-118005995.7 actor_loss=0.2627 critic_loss=123309987750.9565 entropy=17.6470 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 127780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456221.6 mean_steps=14.5
|
|
[Episode 127790] reward=-117125939.4 actor_loss=0.3831 critic_loss=106683270485.3333 entropy=17.6526 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 127800] reward=-119007864.1 actor_loss=0.2858 critic_loss=100202090868.3636 entropy=17.6609 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 127800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502392.7 mean_steps=13.9
|
|
[Episode 127810] reward=-119638028.6 actor_loss=0.2878 critic_loss=110063023991.4667 entropy=17.6632 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 127820] reward=-126796059.5 actor_loss=0.3507 critic_loss=371234235278.2222 entropy=17.6733 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 127820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524173.9 mean_steps=14.2
|
|
[Episode 127830] reward=-310147107.0 actor_loss=0.2418 critic_loss=52344684351761.0703 entropy=17.6716 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 127840] reward=-588613382.0 actor_loss=0.9365 critic_loss=531147278478774.8750 entropy=17.6735 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 127840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477031.8 mean_steps=14.7
|
|
[Episode 127850] reward=-311262905.1 actor_loss=0.2306 critic_loss=77719595617848.8906 entropy=17.6850 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 127860] reward=-524724242.7 actor_loss=0.2018 critic_loss=223458460795608.1875 entropy=17.6957 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Eval 127860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575752.1 mean_steps=12.6
|
|
[Episode 127870] reward=-118489490.2 actor_loss=0.2396 critic_loss=105997651330.8445 entropy=17.7163 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 127880] reward=-136015331.3 actor_loss=0.1919 critic_loss=1246919376896.0000 entropy=17.7270 approx_kl=0.0045 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 127880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507729.8 mean_steps=14.0
|
|
[Episode 127890] reward=-116470674.4 actor_loss=0.3624 critic_loss=96884233375.2889 entropy=17.7122 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 127900] reward=-116453801.1 actor_loss=0.3407 critic_loss=99564367598.9333 entropy=17.7137 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 127900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514844.6 mean_steps=14.0
|
|
[Episode 127910] reward=-120108266.1 actor_loss=0.3405 critic_loss=101206514999.6522 entropy=17.7065 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 127920] reward=-117388309.0 actor_loss=0.3119 critic_loss=100928994508.8000 entropy=17.7162 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 127920] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-346771.9 mean_steps=16.9
|
|
[Episode 127930] reward=-120175018.9 actor_loss=0.2456 critic_loss=101805226120.5333 entropy=17.7238 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 127940] reward=-114827588.5 actor_loss=0.2813 critic_loss=96441442304.0000 entropy=17.7332 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 127940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-636787.5 mean_steps=12.5
|
|
[Episode 127950] reward=-115409256.4 actor_loss=0.3236 critic_loss=99720897058.1333 entropy=17.7309 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 127960] reward=-114523920.0 actor_loss=0.3006 critic_loss=98585545022.5778 entropy=17.7109 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 127960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-395730.9 mean_steps=14.2
|
|
[Episode 127970] reward=-116513576.2 actor_loss=0.3527 critic_loss=99348552089.6000 entropy=17.7304 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 127980] reward=-119477542.9 actor_loss=0.2629 critic_loss=105820991666.0870 entropy=17.7435 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 127980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533796.4 mean_steps=13.1
|
|
[Episode 127990] reward=-115774131.0 actor_loss=0.3514 critic_loss=116273031395.5556 entropy=17.7538 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 128000] reward=-119956453.0 actor_loss=0.2569 critic_loss=103408937728.0000 entropy=17.7519 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 128000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363813.9 mean_steps=15.7
|
|
[Episode 128010] reward=-117704192.5 actor_loss=0.2372 critic_loss=103022592546.1333 entropy=17.7466 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 128020] reward=-260412069.6 actor_loss=0.3357 critic_loss=66603249281082.5156 entropy=17.7402 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 128020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-405382.9 mean_steps=17.1
|
|
[Episode 128030] reward=-118049971.3 actor_loss=0.2499 critic_loss=97464811884.0889 entropy=17.7335 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 128040] reward=-119893720.8 actor_loss=0.2126 critic_loss=102137538452.2105 entropy=17.7357 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 128040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410462.4 mean_steps=15.2
|
|
[Episode 128050] reward=-120516528.4 actor_loss=0.3465 critic_loss=105193381705.9556 entropy=17.7452 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 128060] reward=-120805230.3 actor_loss=0.3006 critic_loss=106068877129.9556 entropy=17.7423 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 128060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503440.5 mean_steps=13.8
|
|
[Episode 128070] reward=-116473481.0 actor_loss=0.2900 critic_loss=99755996253.0909 entropy=17.7420 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 128080] reward=-120414263.7 actor_loss=0.3096 critic_loss=101148019916.8000 entropy=17.7386 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 128080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-484645.2 mean_steps=15.7
|
|
[Episode 128090] reward=-118441572.5 actor_loss=0.4079 critic_loss=101683931272.5333 entropy=17.7271 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 128100] reward=-118896661.1 actor_loss=0.3492 critic_loss=105298697875.9111 entropy=17.7351 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 128100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369868.3 mean_steps=15.8
|
|
[Episode 128110] reward=-118376841.0 actor_loss=0.3308 critic_loss=109518734336.0000 entropy=17.7465 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 128120] reward=-112015746.6 actor_loss=0.2733 critic_loss=96519774838.1538 entropy=17.7395 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 128120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563274.2 mean_steps=13.8
|
|
[Episode 128130] reward=-124272397.6 actor_loss=0.2552 critic_loss=106836322036.8696 entropy=17.7492 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 128140] reward=-114562382.2 actor_loss=0.2875 critic_loss=98319797270.7556 entropy=17.7533 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 128140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578612.4 mean_steps=13.7
|
|
[Episode 128150] reward=-114813202.8 actor_loss=0.3578 critic_loss=100808633668.6829 entropy=17.7273 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 128160] reward=-116393681.7 actor_loss=0.2479 critic_loss=101159384951.4667 entropy=17.7283 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 128160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568273.9 mean_steps=13.3
|
|
[Episode 128170] reward=-158521001.4 actor_loss=2.9052 critic_loss=7001665776941.1768 entropy=17.7518 approx_kl=0.0036 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 128180] reward=-167064334.8 actor_loss=0.8862 critic_loss=9326068603289.5996 entropy=17.7778 approx_kl=0.0027 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 128180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-375395.9 mean_steps=16.7
|
|
[Episode 128190] reward=-121464609.6 actor_loss=0.2510 critic_loss=104999327744.0000 entropy=17.7758 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 128200] reward=-118016226.3 actor_loss=0.2837 critic_loss=108562419065.2632 entropy=17.7785 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 128200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-458336.8 mean_steps=13.6
|
|
[Episode 128210] reward=-215285106.6 actor_loss=0.2864 critic_loss=32747948850107.7344 entropy=17.7770 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 128220] reward=-121065378.6 actor_loss=0.3048 critic_loss=117631122090.6667 entropy=17.7740 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 128220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387959.5 mean_steps=14.8
|
|
[Episode 128230] reward=-121285857.0 actor_loss=0.2972 critic_loss=107314794711.5789 entropy=17.7572 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 128240] reward=-126438546.5 actor_loss=0.4209 critic_loss=809194681895.3846 entropy=17.7506 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 128240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570569.9 mean_steps=12.2
|
|
[Episode 128250] reward=-114152585.8 actor_loss=0.3770 critic_loss=130172744817.7778 entropy=17.7662 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 128260] reward=-119699259.2 actor_loss=0.2703 critic_loss=136235404765.8667 entropy=17.7848 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 128260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457966.8 mean_steps=13.8
|
|
[Episode 128270] reward=-195683698.1 actor_loss=0.2237 critic_loss=18956099768957.1562 entropy=17.7664 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 128280] reward=-117120524.2 actor_loss=0.3682 critic_loss=105707590217.1429 entropy=17.7500 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 128280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471029.4 mean_steps=14.8
|
|
[Episode 128290] reward=-116244786.0 actor_loss=0.3425 critic_loss=103141657804.8000 entropy=17.7513 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 128300] reward=-119266918.3 actor_loss=0.1773 critic_loss=103782372147.2000 entropy=17.7503 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 128300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-810915.7 mean_steps=17.2
|
|
[Episode 128310] reward=-121778725.8 actor_loss=0.2901 critic_loss=99961826508.8000 entropy=17.7540 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 128320] reward=-118004820.4 actor_loss=0.2993 critic_loss=100478203949.5111 entropy=17.7651 approx_kl=0.0122 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 128320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-410934.7 mean_steps=14.6
|
|
[Episode 128330] reward=-114437175.4 actor_loss=0.2469 critic_loss=97106519008.9697 entropy=17.7688 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 128340] reward=-121986872.9 actor_loss=0.2369 critic_loss=108578227086.2222 entropy=17.7668 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 128340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505409.0 mean_steps=14.9
|
|
[Episode 128350] reward=-120998293.8 actor_loss=0.2037 critic_loss=111300046574.9333 entropy=17.7737 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 128360] reward=-120000202.6 actor_loss=0.2875 critic_loss=107064185323.5200 entropy=17.7903 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 128360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-610736.3 mean_steps=13.7
|
|
[Episode 128370] reward=-118203742.2 actor_loss=0.3671 critic_loss=102044518536.5333 entropy=17.7758 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 128380] reward=-124916463.3 actor_loss=0.2483 critic_loss=106800122197.3333 entropy=17.7949 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 128380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437471.7 mean_steps=14.3
|
|
[Episode 128390] reward=-116294232.0 actor_loss=0.3619 critic_loss=100522773248.0000 entropy=17.7981 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 128400] reward=-121230235.4 actor_loss=0.2812 critic_loss=105297220403.2000 entropy=17.7842 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 128400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596188.3 mean_steps=12.8
|
|
[Episode 128410] reward=-120811357.9 actor_loss=0.3215 critic_loss=103845675736.1778 entropy=17.7908 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 128420] reward=-115137771.0 actor_loss=0.2902 critic_loss=95378984504.8889 entropy=17.7893 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 128420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571498.7 mean_steps=12.7
|
|
[Episode 128430] reward=-116308170.3 actor_loss=0.3589 critic_loss=105457751381.3333 entropy=17.7727 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 128440] reward=-389549614.2 actor_loss=40.4779 critic_loss=165491364034423.4688 entropy=17.7819 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 128440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531332.0 mean_steps=14.0
|
|
[Episode 128450] reward=-117710782.0 actor_loss=0.3371 critic_loss=105011260549.5652 entropy=17.8210 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 128460] reward=-119256761.0 actor_loss=0.2771 critic_loss=121245232670.1176 entropy=17.8188 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 128460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-474934.5 mean_steps=13.2
|
|
[Episode 128470] reward=-119392866.1 actor_loss=0.3056 critic_loss=103754682823.1111 entropy=17.8280 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 128480] reward=-119671798.4 actor_loss=0.3026 critic_loss=124926690725.6471 entropy=17.8286 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 128480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500828.2 mean_steps=15.0
|
|
[Episode 128490] reward=-117881311.1 actor_loss=0.3058 critic_loss=102793964748.8000 entropy=17.8270 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 128500] reward=-117094426.3 actor_loss=0.3143 critic_loss=104415612836.9778 entropy=17.8256 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 128500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398228.4 mean_steps=16.1
|
|
[Episode 128510] reward=-125241852.6 actor_loss=10.8237 critic_loss=232828514304.0000 entropy=17.8192 approx_kl=0.0044 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 128520] reward=-117792710.8 actor_loss=0.3667 critic_loss=109733805884.9524 entropy=17.8281 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 128520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504092.2 mean_steps=14.2
|
|
[Episode 128530] reward=-123187682.5 actor_loss=0.3407 critic_loss=110905076320.8649 entropy=17.8297 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 128540] reward=-354833961.7 actor_loss=25.2090 critic_loss=150704565996020.6250 entropy=17.8488 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 128540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578402.6 mean_steps=12.8
|
|
[Episode 128550] reward=-146878963.1 actor_loss=0.3980 critic_loss=3390566977444.9775 entropy=17.8533 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 128560] reward=-121995159.4 actor_loss=0.1891 critic_loss=108826957482.6667 entropy=17.8444 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 128560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435936.4 mean_steps=15.2
|
|
[Episode 128570] reward=-122895420.5 actor_loss=0.2083 critic_loss=103949996347.0769 entropy=17.8398 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 128580] reward=-119546254.4 actor_loss=0.3344 critic_loss=108303391061.3333 entropy=17.8174 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 128580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521534.5 mean_steps=13.3
|
|
[Episode 128590] reward=-125609833.5 actor_loss=0.2631 critic_loss=239880225167.6097 entropy=17.8209 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 128600] reward=-118640743.5 actor_loss=0.3398 critic_loss=106849742574.9333 entropy=17.8306 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 128600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365492.4 mean_steps=15.8
|
|
[Episode 128610] reward=-122835959.8 actor_loss=0.2645 critic_loss=117633136453.8182 entropy=17.8446 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 128620] reward=-120268786.9 actor_loss=0.3364 critic_loss=115555288860.4444 entropy=17.8362 approx_kl=0.0105 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 128620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613234.8 mean_steps=12.8
|
|
[Episode 128630] reward=-116840037.0 actor_loss=0.3471 critic_loss=98206238128.3556 entropy=17.8362 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 128640] reward=-119612325.0 actor_loss=0.1997 critic_loss=106062009829.0526 entropy=17.8362 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 128640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-372235.7 mean_steps=16.9
|
|
[Episode 128650] reward=-122120121.5 actor_loss=0.2654 critic_loss=104149318466.3704 entropy=17.8315 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 128660] reward=-119686675.5 actor_loss=0.2939 critic_loss=104337756160.0000 entropy=17.8349 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 128660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452669.9 mean_steps=14.7
|
|
[Episode 128670] reward=-119350114.7 actor_loss=0.1898 critic_loss=107787495803.2593 entropy=17.8412 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 128680] reward=-113565238.9 actor_loss=0.3167 critic_loss=97836160551.3846 entropy=17.8344 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 128680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616599.3 mean_steps=13.1
|
|
[Episode 128690] reward=-121571360.5 actor_loss=0.1729 critic_loss=110671395407.6444 entropy=17.8360 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 128700] reward=-114535656.7 actor_loss=0.2885 critic_loss=103591560015.4483 entropy=17.8246 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 128700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396605.3 mean_steps=16.5
|
|
[Episode 128710] reward=-114877199.1 actor_loss=0.2693 critic_loss=101429866332.1600 entropy=17.8321 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 128720] reward=-120414789.5 actor_loss=0.2187 critic_loss=100169366186.6667 entropy=17.8283 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 128720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573759.9 mean_steps=12.4
|
|
[Episode 128730] reward=-114628593.1 actor_loss=0.3160 critic_loss=101591692970.6667 entropy=17.8225 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 128740] reward=-113627293.5 actor_loss=0.3408 critic_loss=106805655306.2400 entropy=17.8121 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 128740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394423.4 mean_steps=15.9
|
|
[Episode 128750] reward=-115766898.2 actor_loss=0.2390 critic_loss=97576939520.0000 entropy=17.8128 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 128760] reward=-113937637.4 actor_loss=0.3783 critic_loss=98001980854.8571 entropy=17.8081 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 128760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378038.1 mean_steps=15.9
|
|
[Episode 128770] reward=-120819511.1 actor_loss=0.3210 critic_loss=112608851418.5366 entropy=17.8018 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 128780] reward=-116902518.1 actor_loss=0.3101 critic_loss=102712756381.5385 entropy=17.8010 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 128780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-472442.9 mean_steps=15.8
|
|
[Episode 128790] reward=-120085410.8 actor_loss=0.2537 critic_loss=100981395000.8889 entropy=17.7918 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 128800] reward=-175944042.9 actor_loss=0.7364 critic_loss=15731197535118.2227 entropy=17.7876 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 128800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509337.8 mean_steps=14.2
|
|
[Episode 128810] reward=-123937832.8 actor_loss=0.2928 critic_loss=108293748235.3778 entropy=17.7823 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 128820] reward=-118548010.7 actor_loss=0.3366 critic_loss=96860593629.8667 entropy=17.7936 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 128820] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-257569.9 mean_steps=17.1
|
|
[Episode 128830] reward=-121539857.7 actor_loss=0.3803 critic_loss=313556599512.1778 entropy=17.7947 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 128840] reward=-116533878.8 actor_loss=0.3275 critic_loss=100793414004.3636 entropy=17.7976 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 128840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-608402.1 mean_steps=12.6
|
|
[Episode 128850] reward=-146068117.7 actor_loss=0.1997 critic_loss=2549530688079.6445 entropy=17.8058 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 128860] reward=-130675131.1 actor_loss=0.2218 critic_loss=983190827463.1111 entropy=17.8083 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 128860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-589828.8 mean_steps=12.2
|
|
[Episode 128870] reward=-325035157.7 actor_loss=35.7875 critic_loss=141157912805376.0000 entropy=17.8408 approx_kl=0.0149 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 128880] reward=-120554020.6 actor_loss=0.2810 critic_loss=106960224737.8824 entropy=17.8589 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 128880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527640.2 mean_steps=13.4
|
|
[Episode 128890] reward=-120822139.3 actor_loss=0.3216 critic_loss=104536996417.6410 entropy=17.8681 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 128900] reward=-122469014.7 actor_loss=0.2200 critic_loss=175776120832.0000 entropy=17.8536 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 128900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376472.7 mean_steps=15.9
|
|
[Episode 128910] reward=-122876129.9 actor_loss=0.2630 critic_loss=111473968154.2564 entropy=17.8642 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 128920] reward=-151871772.1 actor_loss=0.3292 critic_loss=4283773846633.9312 entropy=17.8757 approx_kl=0.0027 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 128920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-629536.6 mean_steps=12.1
|
|
[Episode 128930] reward=-123309457.5 actor_loss=0.1966 critic_loss=110830344929.2800 entropy=17.8750 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 128940] reward=-118470901.7 actor_loss=0.2706 critic_loss=108092514304.0000 entropy=17.8759 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 128940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-470544.0 mean_steps=16.6
|
|
[Episode 128950] reward=-122505875.6 actor_loss=0.3162 critic_loss=105819667251.2000 entropy=17.8936 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 128960] reward=-117796324.3 actor_loss=0.3483 critic_loss=102809878155.6364 entropy=17.8870 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 128960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-543028.8 mean_steps=14.2
|
|
[Episode 128970] reward=-235051625.4 actor_loss=0.1534 critic_loss=41598190197145.6016 entropy=17.8999 approx_kl=0.0004 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 128980] reward=-123298237.5 actor_loss=0.2413 critic_loss=109604396600.8889 entropy=17.9102 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 128980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386581.9 mean_steps=15.9
|
|
[Episode 128990] reward=-119611370.9 actor_loss=0.2554 critic_loss=100269561329.3714 entropy=17.9101 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 129000] reward=-120850320.6 actor_loss=0.3066 critic_loss=113858063928.8889 entropy=17.9053 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 129000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453909.4 mean_steps=15.4
|
|
[Episode 129010] reward=-119957568.2 actor_loss=0.3085 critic_loss=102485553425.0667 entropy=17.8962 approx_kl=0.0117 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 129020] reward=-124076071.2 actor_loss=0.2223 critic_loss=111938058922.6667 entropy=17.8908 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 129020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443937.9 mean_steps=15.3
|
|
[Episode 129030] reward=-124196119.6 actor_loss=0.2428 critic_loss=118605366461.6296 entropy=17.8809 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 129040] reward=-130143644.8 actor_loss=0.2258 critic_loss=433416412901.5172 entropy=17.8740 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 129040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496101.5 mean_steps=13.9
|
|
[Episode 129050] reward=-123475215.9 actor_loss=0.2079 critic_loss=108721962635.6364 entropy=17.8865 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 129060] reward=-121012879.0 actor_loss=0.2171 critic_loss=103358356411.7333 entropy=17.8760 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 129060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393880.8 mean_steps=16.4
|
|
[Episode 129070] reward=-121937483.7 actor_loss=0.3159 critic_loss=106383443382.8571 entropy=17.8781 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 129080] reward=-118665314.7 actor_loss=0.2996 critic_loss=114847463765.3333 entropy=17.8652 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 129080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549612.5 mean_steps=14.7
|
|
[Episode 129090] reward=-119817882.7 actor_loss=0.2204 critic_loss=104731519840.7111 entropy=17.8644 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 129100] reward=-125454508.6 actor_loss=0.2833 critic_loss=105721469337.6000 entropy=17.8571 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 129100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-640774.9 mean_steps=11.6
|
|
[Episode 129110] reward=-117608326.4 actor_loss=0.2737 critic_loss=103425771209.6970 entropy=17.8554 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 129120] reward=-123342789.4 actor_loss=0.3107 critic_loss=106986477206.5882 entropy=17.8490 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 129120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424636.4 mean_steps=14.2
|
|
[Episode 129130] reward=-123211353.9 actor_loss=0.4715 critic_loss=676151961554.4889 entropy=17.8676 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 129140] reward=-120917030.9 actor_loss=0.2871 critic_loss=105162348468.1481 entropy=17.8457 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 129140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398483.2 mean_steps=16.3
|
|
[Episode 129150] reward=-124144334.3 actor_loss=0.2523 critic_loss=115566714233.2632 entropy=17.8374 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 129160] reward=-118744929.1 actor_loss=0.2261 critic_loss=103315380633.6000 entropy=17.8362 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 129160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458069.7 mean_steps=14.4
|
|
[Episode 129170] reward=-119367816.6 actor_loss=0.2754 critic_loss=106406498171.8710 entropy=17.8354 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 129180] reward=-121807002.4 actor_loss=0.3389 critic_loss=108904688459.2941 entropy=17.8354 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 129180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-540467.1 mean_steps=16.2
|
|
[Episode 129190] reward=-117387427.0 actor_loss=0.2871 critic_loss=106781318257.7778 entropy=17.8388 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 129200] reward=-120729422.3 actor_loss=0.2165 critic_loss=134569395968.0000 entropy=17.8501 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 129200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635934.6 mean_steps=12.1
|
|
[Episode 129210] reward=-124394558.3 actor_loss=0.2508 critic_loss=112525984707.7647 entropy=17.8361 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 129220] reward=-117249081.3 actor_loss=0.2519 critic_loss=101575637841.1707 entropy=17.8315 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 129220] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-670747.0 mean_steps=11.6
|
|
[Episode 129230] reward=-122032118.6 actor_loss=0.3324 critic_loss=110668013568.0000 entropy=17.8432 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 129240] reward=-119819620.1 actor_loss=0.4358 critic_loss=131133493130.9714 entropy=17.8486 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 129240] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-369612.4 mean_steps=18.1
|
|
[Episode 129250] reward=-129960591.8 actor_loss=0.2486 critic_loss=635724165435.0769 entropy=17.8938 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 129260] reward=-123122087.6 actor_loss=0.2981 critic_loss=128948820172.8000 entropy=17.9012 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 129260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353810.0 mean_steps=16.9
|
|
[Episode 129270] reward=-121365286.1 actor_loss=0.1935 critic_loss=143030837690.8108 entropy=17.9067 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 129280] reward=-122906392.6 actor_loss=0.2944 critic_loss=111790486949.6471 entropy=17.9131 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 129280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375812.2 mean_steps=15.9
|
|
[Episode 129290] reward=-120539305.1 actor_loss=0.2759 critic_loss=102318961641.2444 entropy=17.9156 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 129300] reward=-122031459.7 actor_loss=0.2429 critic_loss=104995718103.0400 entropy=17.9315 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 129300] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-729538.1 mean_steps=11.6
|
|
[Episode 129310] reward=-120232385.2 actor_loss=0.2515 critic_loss=107914366520.8889 entropy=17.9326 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 129320] reward=-120246924.5 actor_loss=0.2911 critic_loss=101691073448.2286 entropy=17.9315 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 129320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-577577.9 mean_steps=14.6
|
|
[Episode 129330] reward=-114738930.2 actor_loss=0.2828 critic_loss=104673093534.4762 entropy=17.9385 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 129340] reward=-119665176.8 actor_loss=0.3388 critic_loss=110730584792.1778 entropy=17.9411 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 129340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401072.1 mean_steps=16.1
|
|
[Episode 129350] reward=-116800339.5 actor_loss=0.3548 critic_loss=114296481018.3111 entropy=17.9280 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 129360] reward=-119971759.5 actor_loss=0.2551 critic_loss=103338910351.3600 entropy=17.9335 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 129360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434166.9 mean_steps=14.5
|
|
[Episode 129370] reward=-136711258.6 actor_loss=0.2606 critic_loss=1457826877535.2559 entropy=17.9108 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 129380] reward=-119673601.8 actor_loss=0.2186 critic_loss=109600923283.9111 entropy=17.9151 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 129380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-368133.4 mean_steps=17.0
|
|
[Episode 129390] reward=-120495818.6 actor_loss=0.2912 critic_loss=108242217096.5333 entropy=17.9176 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 129400] reward=-123044141.0 actor_loss=0.3633 critic_loss=130896601088.0000 entropy=17.9204 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 129400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485976.0 mean_steps=14.2
|
|
[Episode 129410] reward=-124037942.4 actor_loss=0.1923 critic_loss=111004474063.5676 entropy=17.9187 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 129420] reward=-122574440.2 actor_loss=0.2339 critic_loss=108046824369.2308 entropy=17.9020 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 129420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-512184.8 mean_steps=13.5
|
|
[Episode 129430] reward=-118650762.0 actor_loss=0.2694 critic_loss=103480067868.4444 entropy=17.9096 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 129440] reward=-121030339.3 actor_loss=0.2493 critic_loss=111852160438.8571 entropy=17.9182 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 129440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602064.4 mean_steps=12.9
|
|
[Episode 129450] reward=-459335499.2 actor_loss=32.0571 critic_loss=297710508028219.0625 entropy=17.9200 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 129460] reward=-125273955.6 actor_loss=0.2521 critic_loss=136896944730.3529 entropy=17.9168 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 129460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537610.2 mean_steps=13.2
|
|
[Episode 129470] reward=-123927603.6 actor_loss=0.2625 critic_loss=115182393519.5429 entropy=17.9173 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 129480] reward=-122324064.6 actor_loss=0.1887 critic_loss=105626331322.1818 entropy=17.9212 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 129480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-438112.9 mean_steps=16.4
|
|
[Episode 129490] reward=-121113972.1 actor_loss=0.2222 critic_loss=108106260695.5789 entropy=17.9152 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 129500] reward=-124099356.1 actor_loss=0.1728 critic_loss=117448331355.0222 entropy=17.9207 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 129500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-560260.4 mean_steps=14.7
|
|
[Episode 129510] reward=-128342073.4 actor_loss=0.2043 critic_loss=158938087424.0000 entropy=17.9190 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 129520] reward=-119209149.1 actor_loss=0.3763 critic_loss=112033606170.9474 entropy=17.9019 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 129520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424980.3 mean_steps=14.6
|
|
[Episode 129530] reward=-229323569.2 actor_loss=0.2512 critic_loss=32676727005001.9570 entropy=17.8921 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 129540] reward=-121622718.0 actor_loss=0.2170 critic_loss=104002686464.0000 entropy=17.8844 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 129540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377260.3 mean_steps=16.1
|
|
[Episode 129550] reward=-117821675.2 actor_loss=0.2879 critic_loss=108209059405.5758 entropy=17.8834 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 129560] reward=-295544810.7 actor_loss=0.2056 critic_loss=88882901715080.5312 entropy=17.8726 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 129560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626651.7 mean_steps=13.2
|
|
[Episode 129570] reward=-122050324.7 actor_loss=0.2048 critic_loss=114662378402.9091 entropy=17.8817 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 129580] reward=-121535841.8 actor_loss=0.3574 critic_loss=118654413092.5714 entropy=17.8851 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 129580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399966.1 mean_steps=16.4
|
|
[Episode 129590] reward=-119186385.2 actor_loss=0.2625 critic_loss=110927377749.3333 entropy=17.8949 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 129600] reward=-125164569.6 actor_loss=0.2254 critic_loss=210839624434.5263 entropy=17.8947 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 129600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-272294.9 mean_steps=16.8
|
|
[Episode 129610] reward=-119114858.7 actor_loss=0.2687 critic_loss=101354232945.7778 entropy=17.8894 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 129620] reward=-120221815.0 actor_loss=0.2725 critic_loss=102249542367.1795 entropy=17.8862 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 129620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-698100.6 mean_steps=11.7
|
|
[Episode 129630] reward=-119636600.7 actor_loss=0.3441 critic_loss=107089014875.0222 entropy=17.8830 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 129640] reward=-122230591.0 actor_loss=0.3256 critic_loss=108927918808.1778 entropy=17.8816 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 129640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-390508.5 mean_steps=17.6
|
|
[Episode 129650] reward=-119491500.2 actor_loss=0.2828 critic_loss=102401764374.7556 entropy=17.8749 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 129660] reward=-121071430.2 actor_loss=0.2717 critic_loss=109531154108.6316 entropy=17.8699 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 129660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548133.1 mean_steps=13.6
|
|
[Episode 129670] reward=-446216013.7 actor_loss=0.2212 critic_loss=180626891060955.4375 entropy=17.8729 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 129680] reward=-124790316.5 actor_loss=0.2427 critic_loss=118834744920.2759 entropy=17.8692 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 129680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509251.7 mean_steps=13.9
|
|
[Episode 129690] reward=-117671786.2 actor_loss=0.3176 critic_loss=100527022899.2000 entropy=17.8711 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 129700] reward=-120195755.3 actor_loss=0.2756 critic_loss=105330666030.5455 entropy=17.8728 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 129700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371507.4 mean_steps=16.4
|
|
[Episode 129710] reward=-121424407.3 actor_loss=0.3393 critic_loss=114533029888.0000 entropy=17.8783 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 129720] reward=-114808157.9 actor_loss=0.3536 critic_loss=102685598476.1905 entropy=17.8636 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 129720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-331611.1 mean_steps=15.7
|
|
[Episode 129730] reward=-119585400.5 actor_loss=0.2731 critic_loss=103131829452.8000 entropy=17.8537 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 129740] reward=-113529732.2 actor_loss=0.3176 critic_loss=95595468642.4615 entropy=17.8307 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 129740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504887.3 mean_steps=14.4
|
|
[Episode 129750] reward=-118551041.3 actor_loss=0.2699 critic_loss=109882916278.8571 entropy=17.8210 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 129760] reward=-117403945.4 actor_loss=0.2846 critic_loss=100348005210.8387 entropy=17.8202 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 129760] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-723788.6 mean_steps=10.6
|
|
[Episode 129770] reward=-122213098.9 actor_loss=0.2905 critic_loss=101641037050.3111 entropy=17.8101 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 129780] reward=-119086151.1 actor_loss=0.2996 critic_loss=98966435157.3333 entropy=17.8021 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 129780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-525287.7 mean_steps=12.6
|
|
[Episode 129790] reward=-117993369.4 actor_loss=0.2447 critic_loss=99321852359.1111 entropy=17.7965 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 129800] reward=-121196508.1 actor_loss=0.2718 critic_loss=102367988212.6222 entropy=17.7856 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 129800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-478082.9 mean_steps=13.3
|
|
[Episode 129810] reward=-117084706.6 actor_loss=0.2840 critic_loss=98580109622.3030 entropy=17.7679 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 129820] reward=-115515330.2 actor_loss=0.2999 critic_loss=97726834096.3556 entropy=17.7693 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 129820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-401545.6 mean_steps=14.8
|
|
[Episode 129830] reward=-119115248.9 actor_loss=0.3024 critic_loss=99019862878.3158 entropy=17.7681 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 129840] reward=-124017839.2 actor_loss=0.2903 critic_loss=105249574912.0000 entropy=17.7598 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 129840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-393241.3 mean_steps=15.5
|
|
[Episode 129850] reward=-124243827.5 actor_loss=0.2314 critic_loss=132196227229.5385 entropy=17.7592 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 129860] reward=-112502648.6 actor_loss=0.3991 critic_loss=102377986145.5238 entropy=17.7847 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 129860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-574008.8 mean_steps=14.1
|
|
[Episode 129870] reward=-129933453.2 actor_loss=0.3006 critic_loss=587702341632.0000 entropy=17.7870 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 129880] reward=-124063116.8 actor_loss=0.2283 critic_loss=271013034771.6923 entropy=17.8000 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 129880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503414.6 mean_steps=14.4
|
|
[Episode 129890] reward=-113648938.1 actor_loss=0.2680 critic_loss=93059867534.2222 entropy=17.7854 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 129900] reward=-118866847.1 actor_loss=0.4194 critic_loss=95535543247.2381 entropy=17.7849 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 129900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483876.2 mean_steps=14.6
|
|
[Episode 129910] reward=-120100602.2 actor_loss=0.3230 critic_loss=105989598230.7556 entropy=17.7780 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 129920] reward=-118610366.8 actor_loss=0.2538 critic_loss=102227629943.4667 entropy=17.7732 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 129920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477300.7 mean_steps=14.3
|
|
[Episode 129930] reward=-117734411.9 actor_loss=0.3158 critic_loss=104063581297.7778 entropy=17.7829 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 129940] reward=-115907212.6 actor_loss=0.3878 critic_loss=106676348183.2727 entropy=17.7795 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 129940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531753.3 mean_steps=14.4
|
|
[Episode 129950] reward=-114497802.2 actor_loss=0.3787 critic_loss=94796076600.8889 entropy=17.7763 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 129960] reward=-121069190.3 actor_loss=0.3162 critic_loss=102719702016.0000 entropy=17.7700 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 129960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522237.5 mean_steps=13.6
|
|
[Episode 129970] reward=-117115197.1 actor_loss=0.2794 critic_loss=100841778380.8000 entropy=17.7656 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 129980] reward=-120328863.2 actor_loss=0.3522 critic_loss=104939416877.1765 entropy=17.7774 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 129980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-554082.1 mean_steps=14.8
|
|
[Episode 129990] reward=-120386069.6 actor_loss=0.2207 critic_loss=108303321946.8387 entropy=17.7748 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 130000] reward=-117205639.6 actor_loss=0.3515 critic_loss=103799417434.3529 entropy=17.7795 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 130000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359470.7 mean_steps=17.3
|
|
[Episode 130010] reward=-114602432.5 actor_loss=0.3694 critic_loss=94171652096.0000 entropy=17.7841 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 130020] reward=-115323883.6 actor_loss=0.2835 critic_loss=106145053263.6444 entropy=17.7803 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 130020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524264.3 mean_steps=13.6
|
|
[Episode 130030] reward=-122986955.1 actor_loss=0.3863 critic_loss=110820228375.2727 entropy=17.7690 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 130040] reward=-115846823.3 actor_loss=0.2444 critic_loss=93009017036.8000 entropy=17.7553 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 130040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367937.7 mean_steps=16.2
|
|
[Episode 130050] reward=-121400219.3 actor_loss=0.2710 critic_loss=105827840215.5789 entropy=17.7629 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 130060] reward=-116409606.7 actor_loss=0.3059 critic_loss=100362596111.0588 entropy=17.7486 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 130060] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-664635.8 mean_steps=11.6
|
|
[Episode 130070] reward=-119218020.9 actor_loss=0.2449 critic_loss=100396912081.4545 entropy=17.7467 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 130080] reward=-124310866.8 actor_loss=0.2405 critic_loss=187600157230.5454 entropy=17.7314 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 130080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-674839.5 mean_steps=12.7
|
|
[Episode 130090] reward=-118868794.1 actor_loss=0.3014 critic_loss=93870948937.1429 entropy=17.7444 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 130100] reward=-120784306.6 actor_loss=0.1828 critic_loss=105143871750.5641 entropy=17.7406 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 130100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543158.0 mean_steps=13.9
|
|
[Episode 130110] reward=-122843033.6 actor_loss=0.2727 critic_loss=123604751701.3333 entropy=17.7459 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 130120] reward=-117447274.4 actor_loss=0.3192 critic_loss=102389549131.8519 entropy=17.7309 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 130120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470486.1 mean_steps=14.8
|
|
[Episode 130130] reward=-124230543.8 actor_loss=0.3360 critic_loss=113932406632.2963 entropy=17.7300 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 130140] reward=-124146528.5 actor_loss=0.2242 critic_loss=108779645987.3103 entropy=17.7284 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 130140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-486536.9 mean_steps=13.1
|
|
[Episode 130150] reward=-118103463.0 actor_loss=0.2819 critic_loss=93642793984.0000 entropy=17.7339 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 130160] reward=-123918026.6 actor_loss=0.3484 critic_loss=107215813095.6190 entropy=17.7323 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 130160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540422.0 mean_steps=13.7
|
|
[Episode 130170] reward=-119252671.8 actor_loss=0.3262 critic_loss=111099787384.4706 entropy=17.7264 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 130180] reward=-117447899.6 actor_loss=0.3202 critic_loss=102365946624.0000 entropy=17.7171 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 130180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521726.7 mean_steps=14.4
|
|
[Episode 130190] reward=-125827885.7 actor_loss=0.2977 critic_loss=112874042982.4000 entropy=17.7134 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 130200] reward=-117945710.8 actor_loss=0.2990 critic_loss=101754131078.7368 entropy=17.7189 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 130200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515603.5 mean_steps=14.2
|
|
[Episode 130210] reward=-248958942.1 actor_loss=0.3115 critic_loss=46794621262961.7812 entropy=17.7252 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 130220] reward=-120600188.5 actor_loss=0.1946 critic_loss=110058452231.3143 entropy=17.7121 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 130220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555492.3 mean_steps=13.2
|
|
[Episode 130230] reward=-119164809.5 actor_loss=0.3491 critic_loss=101567710720.0000 entropy=17.7223 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 130240] reward=-126257379.8 actor_loss=0.2154 critic_loss=262272979666.8235 entropy=17.7277 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 130240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562345.3 mean_steps=12.6
|
|
[Episode 130250] reward=-117225822.4 actor_loss=0.3164 critic_loss=101355369267.2000 entropy=17.7289 approx_kl=0.0110 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 130260] reward=-115094816.7 actor_loss=0.3451 critic_loss=96293247886.2222 entropy=17.7263 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 130260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-461685.7 mean_steps=15.6
|
|
[Episode 130270] reward=-122480858.3 actor_loss=0.2876 critic_loss=101510524635.4286 entropy=17.7244 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 130280] reward=-115796196.9 actor_loss=0.2261 critic_loss=96065576082.2857 entropy=17.7240 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 130280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382655.1 mean_steps=16.2
|
|
[Episode 130290] reward=-118177244.2 actor_loss=0.2586 critic_loss=103497258507.3778 entropy=17.7194 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 130300] reward=-118499421.6 actor_loss=0.2595 critic_loss=104882656938.6667 entropy=17.7081 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 130300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642448.5 mean_steps=12.2
|
|
[Episode 130310] reward=-120066119.1 actor_loss=0.2920 critic_loss=108890951884.8000 entropy=17.7023 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 130320] reward=-117210851.3 actor_loss=0.3742 critic_loss=98429337600.0000 entropy=17.6978 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 130320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460884.5 mean_steps=15.2
|
|
[Episode 130330] reward=-114729437.7 actor_loss=0.3500 critic_loss=99142267699.2000 entropy=17.6955 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 130340] reward=-120873138.7 actor_loss=0.2774 critic_loss=99805929687.5789 entropy=17.6929 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 130340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-380117.1 mean_steps=15.0
|
|
[Episode 130350] reward=-113209868.1 actor_loss=0.3983 critic_loss=100302255786.6667 entropy=17.6973 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 130360] reward=-113597991.1 actor_loss=0.3881 critic_loss=99551321234.2857 entropy=17.6745 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 130360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483506.4 mean_steps=15.1
|
|
[Episode 130370] reward=-124632337.3 actor_loss=0.2427 critic_loss=105335397436.2353 entropy=17.6744 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 130380] reward=-186599365.3 actor_loss=0.3065 critic_loss=15290807743647.2891 entropy=17.6841 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 130380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433347.2 mean_steps=15.6
|
|
[Episode 130390] reward=-122396309.4 actor_loss=0.3332 critic_loss=97323783372.8000 entropy=17.6854 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 130400] reward=-120323338.9 actor_loss=0.2420 critic_loss=104184529338.8108 entropy=17.6838 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 130400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491013.7 mean_steps=14.0
|
|
[Episode 130410] reward=-118797951.0 actor_loss=0.3569 critic_loss=99807266492.6316 entropy=17.6862 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 130420] reward=-124857851.6 actor_loss=0.2639 critic_loss=110192841159.1111 entropy=17.6799 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 130420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474080.3 mean_steps=14.8
|
|
[Episode 130430] reward=-119478101.9 actor_loss=0.3803 critic_loss=103040843448.3200 entropy=17.6796 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 130440] reward=-118582004.2 actor_loss=0.3153 critic_loss=101047780291.7647 entropy=17.6877 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 130440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-473914.0 mean_steps=16.0
|
|
[Episode 130450] reward=-123200503.3 actor_loss=0.2993 critic_loss=100642906697.1429 entropy=17.6844 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 130460] reward=-123704976.2 actor_loss=0.2569 critic_loss=125981249295.0588 entropy=17.6950 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 130460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-492492.8 mean_steps=13.3
|
|
[Episode 130470] reward=-122934827.0 actor_loss=0.2710 critic_loss=126296269783.0400 entropy=17.7017 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 130480] reward=-118751006.5 actor_loss=0.2330 critic_loss=102931874056.2581 entropy=17.6893 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 130480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416681.3 mean_steps=15.6
|
|
[Episode 130490] reward=-121803698.5 actor_loss=0.2811 critic_loss=104166562702.2222 entropy=17.7135 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 130500] reward=-113338946.6 actor_loss=0.2905 critic_loss=92041220187.0222 entropy=17.7140 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 130500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442216.5 mean_steps=15.2
|
|
[Episode 130510] reward=-120787992.8 actor_loss=0.3000 critic_loss=141395619624.4211 entropy=17.7210 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 130520] reward=-118753440.2 actor_loss=0.2227 critic_loss=98172666985.9310 entropy=17.7209 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 130520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476002.3 mean_steps=14.7
|
|
[Episode 130530] reward=-115706456.0 actor_loss=0.3662 critic_loss=98337000470.7556 entropy=17.7152 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 130540] reward=-122033139.3 actor_loss=0.2604 critic_loss=102832235042.1333 entropy=17.7015 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 130540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442706.6 mean_steps=15.8
|
|
[Episode 130550] reward=-118527862.4 actor_loss=0.2868 critic_loss=97035601001.9310 entropy=17.7117 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 130560] reward=-117410793.8 actor_loss=0.3610 critic_loss=96713919273.6744 entropy=17.6993 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 130560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-466055.5 mean_steps=16.2
|
|
[Episode 130570] reward=-119659366.8 actor_loss=0.2376 critic_loss=101276405122.8445 entropy=17.6968 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 130580] reward=-116574615.1 actor_loss=0.2415 critic_loss=94970791886.0488 entropy=17.6870 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 130580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544853.2 mean_steps=13.2
|
|
[Episode 130590] reward=-115934399.1 actor_loss=0.3098 critic_loss=96352326269.1555 entropy=17.6918 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 130600] reward=-117984214.0 actor_loss=0.1883 critic_loss=98349692063.2889 entropy=17.7135 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 130600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389305.0 mean_steps=16.1
|
|
[Episode 130610] reward=-118452541.5 actor_loss=0.3160 critic_loss=99199817409.4222 entropy=17.6961 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 130620] reward=-121706774.9 actor_loss=0.3018 critic_loss=115563622491.0222 entropy=17.7045 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 130620] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-588657.8 mean_steps=11.6
|
|
[Episode 130630] reward=-118159659.4 actor_loss=0.2063 critic_loss=103360693979.4286 entropy=17.7142 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 130640] reward=-116020995.5 actor_loss=0.2501 critic_loss=101792922848.7805 entropy=17.7093 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 130640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590324.1 mean_steps=12.8
|
|
[Episode 130650] reward=-118383343.8 actor_loss=0.4218 critic_loss=105759250841.6000 entropy=17.7130 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 130660] reward=-116514607.2 actor_loss=0.2589 critic_loss=101146789660.4444 entropy=17.7237 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 130660] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-344872.3 mean_steps=16.4
|
|
[Episode 130670] reward=-122541906.9 actor_loss=0.2892 critic_loss=109083496903.1111 entropy=17.7261 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 130680] reward=-122351369.5 actor_loss=0.2895 critic_loss=111225004760.1778 entropy=17.7222 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 130680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547742.4 mean_steps=13.2
|
|
[Episode 130690] reward=-120934855.4 actor_loss=0.3059 critic_loss=102951766516.6222 entropy=17.7429 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 130700] reward=-119080859.0 actor_loss=0.2651 critic_loss=108924554808.8889 entropy=17.7364 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 130700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-667336.3 mean_steps=12.4
|
|
[Episode 130710] reward=-119466205.5 actor_loss=0.3323 critic_loss=101992080725.3333 entropy=17.7340 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 130720] reward=-119235582.6 actor_loss=0.3419 critic_loss=103741824393.8462 entropy=17.7179 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 130720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-633359.7 mean_steps=11.7
|
|
[Episode 130730] reward=-121965147.1 actor_loss=0.2185 critic_loss=105112543954.8235 entropy=17.7276 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 130740] reward=-125981278.1 actor_loss=0.3480 critic_loss=114910057813.3333 entropy=17.7154 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 130740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-466043.1 mean_steps=13.8
|
|
[Episode 130750] reward=-125413838.1 actor_loss=0.2155 critic_loss=107010664220.4444 entropy=17.7093 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 130760] reward=-121858581.2 actor_loss=0.2723 critic_loss=105777654811.6757 entropy=17.7144 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 130760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-591704.2 mean_steps=11.7
|
|
[Episode 130770] reward=-120927862.1 actor_loss=0.3198 critic_loss=121612138346.1463 entropy=17.7088 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 130780] reward=-119463517.1 actor_loss=0.2530 critic_loss=104301277814.1538 entropy=17.7129 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 130780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552576.9 mean_steps=13.2
|
|
[Episode 130790] reward=-119232143.6 actor_loss=0.3111 critic_loss=98950554328.1778 entropy=17.7096 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 130800] reward=-121207925.0 actor_loss=0.2750 critic_loss=105804343068.4444 entropy=17.7020 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 130800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-682202.1 mean_steps=13.1
|
|
[Episode 130810] reward=-124246409.5 actor_loss=0.3474 critic_loss=106352231862.8571 entropy=17.6882 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 130820] reward=-117706807.9 actor_loss=0.2997 critic_loss=105883640172.0889 entropy=17.7150 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 130820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-645474.0 mean_steps=11.8
|
|
[Episode 130830] reward=-118841706.8 actor_loss=0.2887 critic_loss=103534678331.0769 entropy=17.7178 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 130840] reward=-119612678.6 actor_loss=0.3874 critic_loss=97067854086.5641 entropy=17.7160 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 130840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579206.4 mean_steps=13.2
|
|
[Episode 130850] reward=-119484741.4 actor_loss=0.3359 critic_loss=100028179725.4737 entropy=17.7150 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 130860] reward=-120251849.3 actor_loss=0.3376 critic_loss=114938997782.7556 entropy=17.7068 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 130860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443830.1 mean_steps=15.8
|
|
[Episode 130870] reward=-122524686.1 actor_loss=0.2322 critic_loss=106639224923.0222 entropy=17.7070 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 130880] reward=-118687340.9 actor_loss=0.2635 critic_loss=99389514010.4828 entropy=17.7134 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 130880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-415067.5 mean_steps=15.4
|
|
[Episode 130890] reward=-120541492.9 actor_loss=0.3498 critic_loss=100597752172.0889 entropy=17.7172 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 130900] reward=-453804260.3 actor_loss=0.5418 critic_loss=198064415021465.5938 entropy=17.7133 approx_kl=0.0028 kl_stop=1 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 130900] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-250179.6 mean_steps=18.4
|
|
[Episode 130910] reward=-117763572.0 actor_loss=0.3442 critic_loss=96448108854.3030 entropy=17.7104 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 130920] reward=-123098622.7 actor_loss=0.2971 critic_loss=110905463808.0000 entropy=17.7280 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 130920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578453.1 mean_steps=12.8
|
|
[Episode 130930] reward=-121367953.7 actor_loss=0.2011 critic_loss=101539210661.6471 entropy=17.7374 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 130940] reward=-119611540.1 actor_loss=0.3045 critic_loss=99276189696.0000 entropy=17.7561 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 130940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514179.0 mean_steps=14.2
|
|
[Episode 130950] reward=-116595950.0 actor_loss=0.3573 critic_loss=114747412935.1111 entropy=17.7819 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 130960] reward=-121565023.0 actor_loss=0.3662 critic_loss=105213683388.6316 entropy=17.7836 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 130960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-400736.2 mean_steps=16.9
|
|
[Episode 130970] reward=-123063271.6 actor_loss=0.2553 critic_loss=105542318125.5111 entropy=17.7648 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 130980] reward=-1329229103.0 actor_loss=0.3583 critic_loss=3385591197993824.5000 entropy=17.7620 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 130980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539293.4 mean_steps=13.6
|
|
[Episode 130990] reward=-121938991.1 actor_loss=0.2719 critic_loss=106837626788.9778 entropy=17.7674 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 131000] reward=-3968944434.3 actor_loss=0.4070 critic_loss=18134792360299360.0000 entropy=17.7662 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Eval 131000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-123707917.8 mean_steps=24.0
|
|
[Episode 131010] reward=-121345631.6 actor_loss=0.2537 critic_loss=120998956418.8445 entropy=17.7803 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 131020] reward=-280347235.6 actor_loss=0.1970 critic_loss=101812839056816.3594 entropy=17.7850 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 131020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-654075.3 mean_steps=13.3
|
|
[Episode 131030] reward=-115907502.2 actor_loss=0.3463 critic_loss=98047989077.3333 entropy=17.7949 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 131040] reward=-115787508.4 actor_loss=0.2285 critic_loss=97076357888.0000 entropy=17.7984 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 131040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613307.0 mean_steps=12.9
|
|
[Episode 131050] reward=-724307428.0 actor_loss=0.2884 critic_loss=1067812288340514.1250 entropy=17.7947 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 131060] reward=-2651961839.7 actor_loss=0.3326 critic_loss=6309185893433344.0000 entropy=17.7941 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1061 front_blocked=0
|
|
[Eval 131060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617918.1 mean_steps=11.8
|
|
[Episode 131070] reward=-974123330.2 actor_loss=0.2370 critic_loss=1748438239511256.2500 entropy=17.7953 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 131080] reward=-122036496.1 actor_loss=0.3484 critic_loss=105855056827.7333 entropy=17.8113 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 131080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403313.9 mean_steps=15.4
|
|
[Episode 131090] reward=-319332426.0 actor_loss=0.2997 critic_loss=120512604798976.0000 entropy=17.8381 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 131100] reward=-118922359.3 actor_loss=0.1855 critic_loss=104217736715.3778 entropy=17.8346 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 131100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407876.5 mean_steps=16.4
|
|
[Episode 131110] reward=-119118083.4 actor_loss=0.1983 critic_loss=99613328998.4000 entropy=17.8273 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 131120] reward=-121901495.0 actor_loss=0.3494 critic_loss=106376558273.4222 entropy=17.8227 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 131120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583151.9 mean_steps=12.7
|
|
[Episode 131130] reward=-2646073850.2 actor_loss=0.2443 critic_loss=10282521634543298.0000 entropy=17.8213 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 131140] reward=-117016476.5 actor_loss=0.2926 critic_loss=110718697472.0000 entropy=17.8299 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 131140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410082.7 mean_steps=16.2
|
|
[Episode 131150] reward=-115110795.1 actor_loss=0.2779 critic_loss=102281674752.0000 entropy=17.8483 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 131160] reward=-1240013201.0 actor_loss=0.2481 critic_loss=1894304276731949.5000 entropy=17.8436 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 131160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475215.3 mean_steps=14.6
|
|
[Episode 131170] reward=-806269860.9 actor_loss=1.4047 critic_loss=1254697064698934.0000 entropy=17.8568 approx_kl=0.0020 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 131180] reward=-120369009.4 actor_loss=0.3618 critic_loss=104014448687.6279 entropy=17.8616 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 131180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-357441.5 mean_steps=16.8
|
|
[Episode 131190] reward=-121938172.4 actor_loss=0.2931 critic_loss=125621184414.4762 entropy=17.8696 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 131200] reward=-118883564.7 actor_loss=0.3622 critic_loss=108150882121.9556 entropy=17.9014 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 131200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530900.0 mean_steps=13.7
|
|
[Episode 131210] reward=-1074891003.0 actor_loss=0.3273 critic_loss=1766087992157558.7500 entropy=17.9138 approx_kl=0.0018 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 131220] reward=-124610977.0 actor_loss=0.2999 critic_loss=109626150912.0000 entropy=17.9191 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 131220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-372267.7 mean_steps=16.2
|
|
[Episode 131230] reward=-2272138220.6 actor_loss=67.9141 critic_loss=5051904822019504.0000 entropy=17.9516 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1061 front_blocked=0
|
|
[Episode 131240] reward=-861157486.6 actor_loss=0.2985 critic_loss=1128173987634289.7500 entropy=17.9642 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 131240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550842.8 mean_steps=13.6
|
|
[Episode 131250] reward=-122433532.8 actor_loss=0.2864 critic_loss=123681760870.4000 entropy=17.9511 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 131260] reward=-866244944.2 actor_loss=0.3564 critic_loss=1204724376832136.5000 entropy=17.9461 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 131260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509164.0 mean_steps=14.9
|
|
[Episode 131270] reward=-774411555.6 actor_loss=0.2170 critic_loss=650380074898773.3750 entropy=17.9636 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 131280] reward=-193845166.1 actor_loss=0.1879 critic_loss=16105487728640.0000 entropy=17.9553 approx_kl=0.0004 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 131280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353287.6 mean_steps=16.6
|
|
[Episode 131290] reward=-489438932.6 actor_loss=6.1888 critic_loss=402180512782745.6250 entropy=17.9689 approx_kl=0.0035 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 131300] reward=-123978611.1 actor_loss=0.2004 critic_loss=138726569779.2000 entropy=17.9744 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 131300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398945.2 mean_steps=15.4
|
|
[Episode 131310] reward=-569247257.6 actor_loss=0.3943 critic_loss=625030129901021.8750 entropy=17.9949 approx_kl=-0.0010 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 131320] reward=-125686257.8 actor_loss=0.3073 critic_loss=112392296568.4706 entropy=17.9964 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 131320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497115.0 mean_steps=15.2
|
|
[Episode 131330] reward=-406520495.9 actor_loss=0.2291 critic_loss=254237890564733.1562 entropy=18.0146 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 131340] reward=-120379666.0 actor_loss=0.2024 critic_loss=105617204040.2051 entropy=18.0271 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 131340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469262.1 mean_steps=14.8
|
|
[Episode 131350] reward=-122380286.7 actor_loss=0.3653 critic_loss=112113923884.1379 entropy=18.0293 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 131360] reward=-148063345.0 actor_loss=0.2887 critic_loss=2254474505966.9331 entropy=18.0306 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 131360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-514673.2 mean_steps=13.1
|
|
[Episode 131370] reward=-117575420.5 actor_loss=0.2825 critic_loss=107121235103.2889 entropy=18.0265 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 131380] reward=-130824550.0 actor_loss=0.3142 critic_loss=387394834610.0870 entropy=18.0374 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 131380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584496.5 mean_steps=12.7
|
|
[Episode 131390] reward=-131815528.3 actor_loss=0.2061 critic_loss=344718628961.5238 entropy=18.0335 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 131400] reward=-123421401.2 actor_loss=0.3593 critic_loss=110664374135.4667 entropy=18.0233 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 131400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478171.6 mean_steps=15.1
|
|
[Episode 131410] reward=-123191821.2 actor_loss=0.3134 critic_loss=108536609255.6190 entropy=18.0153 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 131420] reward=-122482581.4 actor_loss=0.2639 critic_loss=106026214368.9697 entropy=18.0000 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 131420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447254.6 mean_steps=15.7
|
|
[Episode 131430] reward=-117612535.4 actor_loss=0.2962 critic_loss=97743360091.0222 entropy=17.9834 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 131440] reward=-125673078.4 actor_loss=0.2271 critic_loss=114045435904.0000 entropy=17.9798 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 131440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424472.8 mean_steps=15.4
|
|
[Episode 131450] reward=-122747927.5 actor_loss=0.3217 critic_loss=109561476437.3333 entropy=17.9765 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 131460] reward=-120953708.3 actor_loss=0.2760 critic_loss=106251220127.2889 entropy=17.9601 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 131460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-574997.1 mean_steps=14.8
|
|
[Episode 131470] reward=-122928430.7 actor_loss=0.3139 critic_loss=112616407040.0000 entropy=17.9477 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 131480] reward=-122101395.5 actor_loss=0.1248 critic_loss=105935018569.1429 entropy=17.9405 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 131480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585613.7 mean_steps=12.8
|
|
[Episode 131490] reward=-119629417.7 actor_loss=0.3494 critic_loss=103245825117.0909 entropy=17.9373 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 131500] reward=-120847788.8 actor_loss=0.2051 critic_loss=103398024948.8696 entropy=17.9306 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 131500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-620909.7 mean_steps=13.3
|
|
[Episode 131510] reward=-118232006.1 actor_loss=0.2831 critic_loss=104691983183.4483 entropy=17.9208 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 131520] reward=-122531222.0 actor_loss=0.2558 critic_loss=110622972416.0000 entropy=17.9339 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 131520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471654.2 mean_steps=15.2
|
|
[Episode 131530] reward=-117739323.4 actor_loss=0.3301 critic_loss=105733517312.0000 entropy=17.9509 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 131540] reward=-162054700.8 actor_loss=0.3036 critic_loss=7983183933755.0771 entropy=17.9459 approx_kl=0.0031 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 131540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-498565.4 mean_steps=15.8
|
|
[Episode 131550] reward=-157791289.1 actor_loss=0.2984 critic_loss=6278295951769.5996 entropy=17.9420 approx_kl=0.0033 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 131560] reward=-116641542.6 actor_loss=0.2811 critic_loss=107684310799.0588 entropy=17.9399 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 131560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517695.4 mean_steps=14.0
|
|
[Episode 131570] reward=-120403050.3 actor_loss=0.3809 critic_loss=106270606586.3111 entropy=17.9377 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 131580] reward=-116573277.8 actor_loss=0.4231 critic_loss=106376347332.9231 entropy=17.9417 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 131580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-490132.3 mean_steps=16.2
|
|
[Episode 131590] reward=-125344597.8 actor_loss=0.2592 critic_loss=107061017258.6667 entropy=17.9380 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 131600] reward=-120712043.3 actor_loss=0.3047 critic_loss=105464489392.3556 entropy=17.9324 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 131600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-414228.0 mean_steps=16.5
|
|
[Episode 131610] reward=-122542037.2 actor_loss=0.2173 critic_loss=125271891022.7692 entropy=17.9422 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 131620] reward=-116570474.0 actor_loss=0.3355 critic_loss=110044897865.1429 entropy=17.9547 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 131620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480370.1 mean_steps=14.3
|
|
[Episode 131630] reward=-122802955.5 actor_loss=0.2475 critic_loss=112836714222.9333 entropy=17.9522 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 131640] reward=-145408736.7 actor_loss=0.3360 critic_loss=2453652961871.6445 entropy=17.9756 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 131640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526217.3 mean_steps=13.2
|
|
[Episode 131650] reward=-120948691.6 actor_loss=0.3485 critic_loss=109086798461.1555 entropy=17.9656 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 131660] reward=-120556554.5 actor_loss=0.3475 critic_loss=104911824486.4000 entropy=17.9685 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 131660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532766.3 mean_steps=13.1
|
|
[Episode 131670] reward=-124031237.0 actor_loss=0.2415 critic_loss=153885307252.3636 entropy=17.9412 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 131680] reward=-604165151.8 actor_loss=0.1997 critic_loss=664918319227790.2500 entropy=17.9451 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 131680] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-320298.0 mean_steps=17.6
|
|
[Episode 131690] reward=-121929011.2 actor_loss=0.3187 critic_loss=110688103992.8889 entropy=17.9505 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 131700] reward=-307828753.9 actor_loss=0.2267 critic_loss=84275287803260.3438 entropy=17.9627 approx_kl=0.0035 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 131700] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-641247.9 mean_steps=11.3
|
|
[Episode 131710] reward=-350936676.5 actor_loss=2.7584 critic_loss=110937638880492.3125 entropy=17.9633 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 131720] reward=-558552708.7 actor_loss=0.2921 critic_loss=432811305365959.1250 entropy=17.9406 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 131720] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-370896.3 mean_steps=16.9
|
|
[Episode 131730] reward=-126317958.3 actor_loss=0.3645 critic_loss=115664794335.1795 entropy=17.9949 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 131740] reward=-389288539.7 actor_loss=0.3213 critic_loss=230188073178453.3438 entropy=17.9984 approx_kl=0.0022 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 131740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500117.7 mean_steps=13.8
|
|
[Episode 131750] reward=-585661862.5 actor_loss=0.2564 critic_loss=631922242669772.7500 entropy=18.0117 approx_kl=-0.0004 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 131760] reward=-153749821.0 actor_loss=0.3281 critic_loss=4490905727795.2002 entropy=18.0379 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 131760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541236.7 mean_steps=13.4
|
|
[Episode 131770] reward=-507239170.4 actor_loss=0.2332 critic_loss=326668173559580.4375 entropy=18.0406 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 131780] reward=-232258631.8 actor_loss=0.3387 critic_loss=39946707324108.7969 entropy=18.0454 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 131780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398929.9 mean_steps=15.8
|
|
[Episode 131790] reward=-125238097.0 actor_loss=0.2631 critic_loss=115739048527.6444 entropy=18.0723 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 131800] reward=-117464916.1 actor_loss=0.3472 critic_loss=102146497697.6842 entropy=18.0488 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 131800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517532.8 mean_steps=14.8
|
|
[Episode 131810] reward=-147605608.1 actor_loss=0.2572 critic_loss=1760774324224.0000 entropy=18.0431 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 131820] reward=-121378231.6 actor_loss=0.2852 critic_loss=106170246263.0698 entropy=18.0500 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 131820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613192.8 mean_steps=12.9
|
|
[Episode 131830] reward=-138507424.3 actor_loss=0.2763 critic_loss=1281380117006.6287 entropy=18.0417 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 131840] reward=-123241282.8 actor_loss=0.2651 critic_loss=111052365824.0000 entropy=18.0282 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 131840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617101.2 mean_steps=11.7
|
|
[Episode 131850] reward=-123732511.7 actor_loss=0.3714 critic_loss=127059756100.2667 entropy=18.0191 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 131860] reward=-124345982.9 actor_loss=0.3905 critic_loss=128427805354.6667 entropy=18.0128 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 131860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-404791.1 mean_steps=15.0
|
|
[Episode 131870] reward=-127297442.0 actor_loss=0.3288 critic_loss=124623417571.5556 entropy=18.0160 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 131880] reward=-123673135.2 actor_loss=0.3205 critic_loss=110401300616.5333 entropy=18.0242 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 131880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526764.9 mean_steps=14.2
|
|
[Episode 131890] reward=-760393971.5 actor_loss=0.6954 critic_loss=1170722360772380.5000 entropy=18.0184 approx_kl=0.0045 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 131900] reward=-125840429.6 actor_loss=0.2203 critic_loss=112132371176.7273 entropy=18.0001 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 131900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437168.1 mean_steps=15.3
|
|
[Episode 131910] reward=-125202386.7 actor_loss=0.2691 critic_loss=111258261731.5556 entropy=17.9722 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 131920] reward=-122941613.5 actor_loss=0.2707 critic_loss=106887964899.5556 entropy=17.9837 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 131920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409944.7 mean_steps=15.8
|
|
[Episode 131930] reward=-126135639.3 actor_loss=0.2644 critic_loss=221163228364.8000 entropy=17.9774 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 131940] reward=-121651106.8 actor_loss=0.2246 critic_loss=110280191836.1600 entropy=17.9772 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 131940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-536779.1 mean_steps=12.1
|
|
[Episode 131950] reward=-122573362.2 actor_loss=0.2789 critic_loss=116987761664.0000 entropy=17.9915 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 131960] reward=-123159744.5 actor_loss=0.2913 critic_loss=105819433067.7895 entropy=17.9976 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 131960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-396568.6 mean_steps=17.0
|
|
[Episode 131970] reward=-123976250.2 actor_loss=0.2785 critic_loss=108175356626.8235 entropy=18.0060 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 131980] reward=-121786912.9 actor_loss=0.3440 critic_loss=103216578286.9333 entropy=18.0115 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 131980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473754.2 mean_steps=14.3
|
|
[Episode 131990] reward=-120582945.3 actor_loss=0.2028 critic_loss=109873011618.9091 entropy=18.0153 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 132000] reward=-126309334.9 actor_loss=0.2478 critic_loss=114163372566.2609 entropy=18.0093 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 132000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-578969.0 mean_steps=14.4
|
|
[Episode 132010] reward=-125732417.3 actor_loss=0.3519 critic_loss=117446975115.6364 entropy=18.0178 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 132020] reward=-120049636.6 actor_loss=0.4110 critic_loss=103745144627.2000 entropy=18.0064 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 132020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-524132.2 mean_steps=14.8
|
|
[Episode 132030] reward=-118148785.1 actor_loss=0.3163 critic_loss=100408671883.6364 entropy=18.0157 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 132040] reward=-121285848.8 actor_loss=0.2774 critic_loss=112152144554.6667 entropy=18.0289 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 132040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-543997.3 mean_steps=14.0
|
|
[Episode 132050] reward=-125402120.9 actor_loss=0.2272 critic_loss=110366683331.0476 entropy=18.0209 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 132060] reward=-113319940.3 actor_loss=0.3132 critic_loss=102284480420.9778 entropy=18.0188 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 132060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353772.5 mean_steps=16.6
|
|
[Episode 132070] reward=-197622304.3 actor_loss=0.3220 critic_loss=21066998134283.3789 entropy=18.0174 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 132080] reward=-125791505.0 actor_loss=0.3524 critic_loss=197765665353.1429 entropy=18.0183 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 132080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470359.1 mean_steps=14.6
|
|
[Episode 132090] reward=-124700658.1 actor_loss=0.2993 critic_loss=115560869432.8889 entropy=18.0071 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 132100] reward=-169517376.2 actor_loss=0.2901 critic_loss=8195316261410.1338 entropy=18.0002 approx_kl=0.0002 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 132100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479522.3 mean_steps=13.8
|
|
[Episode 132110] reward=-126343650.4 actor_loss=0.1975 critic_loss=112102191559.1111 entropy=18.0137 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 132120] reward=-117852814.0 actor_loss=0.2411 critic_loss=109973947628.3077 entropy=17.9827 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 132120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541911.6 mean_steps=14.2
|
|
[Episode 132130] reward=-121311899.9 actor_loss=0.2850 critic_loss=103005521317.6471 entropy=17.9751 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 132140] reward=-121089423.7 actor_loss=0.2747 critic_loss=107822329651.2000 entropy=17.9562 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 132140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467177.2 mean_steps=13.8
|
|
[Episode 132150] reward=-121515741.5 actor_loss=0.2489 critic_loss=109611695672.8889 entropy=17.9805 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 132160] reward=-304114498.6 actor_loss=0.3955 critic_loss=113499518982189.5156 entropy=17.9846 approx_kl=-0.0000 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 132160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410634.0 mean_steps=15.2
|
|
[Episode 132170] reward=-121378119.8 actor_loss=0.2822 critic_loss=102764402915.5556 entropy=17.9764 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 132180] reward=-125744490.8 actor_loss=0.3290 critic_loss=110215567041.4222 entropy=17.9672 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 132180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-648001.3 mean_steps=12.3
|
|
[Episode 132190] reward=-119618256.4 actor_loss=0.2398 critic_loss=110646711637.3333 entropy=17.9584 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 132200] reward=-274861933.0 actor_loss=17.0667 critic_loss=62504179043896.8906 entropy=17.9594 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 132200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-609746.5 mean_steps=12.8
|
|
[Episode 132210] reward=-125215541.3 actor_loss=0.3231 critic_loss=112096278869.3333 entropy=17.9423 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 132220] reward=-123927154.8 actor_loss=0.1959 critic_loss=168226005174.0444 entropy=17.9546 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 132220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382303.6 mean_steps=15.7
|
|
[Episode 132230] reward=-121978530.9 actor_loss=0.4226 critic_loss=108822736622.9333 entropy=17.9651 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 132240] reward=-1979764767.9 actor_loss=0.3190 critic_loss=4388191595303913.0000 entropy=17.9805 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 132240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399969.0 mean_steps=16.2
|
|
[Episode 132250] reward=-121008686.5 actor_loss=0.2540 critic_loss=110447543532.3077 entropy=17.9904 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 132260] reward=-1846352262.7 actor_loss=0.6795 critic_loss=7325993302869788.0000 entropy=18.0128 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 132260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560909.7 mean_steps=13.3
|
|
[Episode 132270] reward=-123674792.2 actor_loss=0.2291 critic_loss=106620114625.4222 entropy=17.9927 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 132280] reward=-145731463.3 actor_loss=0.2338 critic_loss=2332743565312.0000 entropy=17.9763 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 132280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497612.3 mean_steps=13.9
|
|
[Episode 132290] reward=-124376231.4 actor_loss=0.2896 critic_loss=108727907381.8947 entropy=17.9700 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 132300] reward=-123746922.9 actor_loss=0.2509 critic_loss=106530173873.2308 entropy=17.9728 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 132300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-583600.7 mean_steps=14.2
|
|
[Episode 132310] reward=-119221505.7 actor_loss=0.3140 critic_loss=101511254016.0000 entropy=17.9795 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 132320] reward=-123656707.2 actor_loss=0.2527 critic_loss=112130297088.0000 entropy=17.9723 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 132320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464898.8 mean_steps=13.8
|
|
[Episode 132330] reward=-121109205.3 actor_loss=0.2353 critic_loss=135652015399.8222 entropy=17.9770 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 132340] reward=-120772354.3 actor_loss=0.3150 critic_loss=112619454464.0000 entropy=17.9620 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 132340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497053.5 mean_steps=13.6
|
|
[Episode 132350] reward=-122300390.2 actor_loss=0.3235 critic_loss=106041582660.2667 entropy=17.9637 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 132360] reward=-134957494.2 actor_loss=0.3762 critic_loss=1387156723029.3333 entropy=17.9658 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 132360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596244.9 mean_steps=13.7
|
|
[Episode 132370] reward=-119678348.8 actor_loss=0.3048 critic_loss=101293549626.5143 entropy=17.9690 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 132380] reward=-130681969.1 actor_loss=0.2761 critic_loss=306242628266.6667 entropy=17.9671 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 132380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421756.1 mean_steps=16.5
|
|
[Episode 132390] reward=-119241543.7 actor_loss=0.2601 critic_loss=107629975051.3778 entropy=17.9528 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 132400] reward=-172878773.3 actor_loss=0.2636 critic_loss=11226235954790.4004 entropy=17.9546 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 132400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488934.6 mean_steps=14.7
|
|
[Episode 132410] reward=-2081813397.1 actor_loss=0.2583 critic_loss=8972217736343643.0000 entropy=17.9782 approx_kl=-0.0001 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 132420] reward=-119844991.4 actor_loss=0.3807 critic_loss=102023211066.5143 entropy=17.9911 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 132420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404100.3 mean_steps=16.2
|
|
[Episode 132430] reward=-538042938.0 actor_loss=0.2689 critic_loss=326313570322204.4375 entropy=17.9920 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 132440] reward=-179397292.8 actor_loss=0.3053 critic_loss=12266027905479.1113 entropy=18.0037 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 132440] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361597.9 mean_steps=16.9
|
|
[Episode 132450] reward=-196633188.6 actor_loss=0.2510 critic_loss=20620874705257.4102 entropy=18.0292 approx_kl=0.0030 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 132460] reward=-127395584.3 actor_loss=0.3071 critic_loss=109187996178.9630 entropy=18.0124 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 132460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-389163.2 mean_steps=15.1
|
|
[Episode 132470] reward=-117506638.2 actor_loss=0.3432 critic_loss=98002435465.8462 entropy=17.9917 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 132480] reward=-128324239.3 actor_loss=0.2899 critic_loss=366387371821.9487 entropy=17.9825 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 132480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-362697.7 mean_steps=16.9
|
|
[Episode 132490] reward=-117189115.1 actor_loss=0.3228 critic_loss=99600909744.3556 entropy=17.9886 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 132500] reward=-385536111.3 actor_loss=0.2282 critic_loss=173678013660403.8125 entropy=17.9883 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 132500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492279.3 mean_steps=14.6
|
|
[Episode 132510] reward=-123615570.9 actor_loss=0.3412 critic_loss=105882678977.4222 entropy=17.9926 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 132520] reward=-130058715.3 actor_loss=0.2582 critic_loss=122247151616.0000 entropy=17.9997 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 132520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457201.8 mean_steps=15.6
|
|
[Episode 132530] reward=-122591569.6 actor_loss=0.2912 critic_loss=109437563335.1111 entropy=17.9877 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 132540] reward=-117711146.1 actor_loss=0.3890 critic_loss=104208763926.7556 entropy=17.9718 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 132540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-583893.3 mean_steps=11.8
|
|
[Episode 132550] reward=-123702080.8 actor_loss=0.2912 critic_loss=120721716929.4222 entropy=17.9901 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 132560] reward=-2879163466.2 actor_loss=0.2305 critic_loss=8409307260830060.0000 entropy=17.9891 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1035 front_blocked=0
|
|
[Eval 132560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360653.0 mean_steps=16.1
|
|
[Episode 132570] reward=-120896320.3 actor_loss=0.3438 critic_loss=106520129536.0000 entropy=17.9753 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 132580] reward=-751626246.1 actor_loss=0.2829 critic_loss=859003846160839.1250 entropy=17.9951 approx_kl=0.0008 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 132580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437402.5 mean_steps=15.4
|
|
[Episode 132590] reward=-118771794.6 actor_loss=0.2743 critic_loss=106692166860.8000 entropy=17.9919 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 132600] reward=-125137684.9 actor_loss=0.1162 critic_loss=109857280091.0222 entropy=17.9775 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 132600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394990.6 mean_steps=16.0
|
|
[Episode 132610] reward=-119788163.5 actor_loss=0.2821 critic_loss=101666451671.5789 entropy=17.9842 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 132620] reward=-126068020.8 actor_loss=0.1675 critic_loss=109741023459.5556 entropy=17.9780 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 132620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536703.8 mean_steps=14.6
|
|
[Episode 132630] reward=-1605098607.2 actor_loss=0.3734 critic_loss=5779187627039130.0000 entropy=17.9743 approx_kl=-0.0004 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 132640] reward=-265272799.9 actor_loss=0.3764 critic_loss=59343706667235.5547 entropy=17.9921 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 132640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519676.9 mean_steps=14.4
|
|
[Episode 132650] reward=-513717721.3 actor_loss=0.3187 critic_loss=453790036213395.9375 entropy=17.9903 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 132660] reward=-118944833.2 actor_loss=0.2230 critic_loss=105599640020.1143 entropy=17.9891 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 132660] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-377893.0 mean_steps=17.2
|
|
[Episode 132670] reward=-254474720.4 actor_loss=0.3244 critic_loss=54148973781902.2188 entropy=17.9903 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 132680] reward=-536687810.8 actor_loss=0.2921 critic_loss=379777853755665.0625 entropy=17.9813 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 132680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-449124.0 mean_steps=14.2
|
|
[Episode 132690] reward=-121089326.2 actor_loss=0.3759 critic_loss=106119625477.6889 entropy=17.9911 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 132700] reward=-114479548.7 actor_loss=0.3254 critic_loss=113003985851.7333 entropy=17.9796 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 132700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-390927.2 mean_steps=16.3
|
|
[Episode 132710] reward=-122714139.5 actor_loss=0.3026 critic_loss=101654031701.3333 entropy=17.9823 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 132720] reward=-122794199.8 actor_loss=0.3267 critic_loss=108855255880.2051 entropy=17.9847 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 132720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607649.1 mean_steps=12.8
|
|
[Episode 132730] reward=-120568823.2 actor_loss=0.3127 critic_loss=99579234258.4889 entropy=17.9607 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 132740] reward=-291315683.3 actor_loss=0.3345 critic_loss=94087212573218.1406 entropy=17.9623 approx_kl=-0.0006 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 132740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631937.6 mean_steps=11.8
|
|
[Episode 132750] reward=-123627824.6 actor_loss=0.1901 critic_loss=105910777068.3077 entropy=17.9737 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 132760] reward=-125604683.2 actor_loss=0.2403 critic_loss=136205521237.3333 entropy=17.9856 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 132760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415862.0 mean_steps=16.2
|
|
[Episode 132770] reward=-119918175.7 actor_loss=0.2827 critic_loss=107949766061.4194 entropy=17.9799 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 132780] reward=-578977825.5 actor_loss=0.1709 critic_loss=564903956994821.7500 entropy=17.9988 approx_kl=-0.0016 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Eval 132780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453567.2 mean_steps=15.7
|
|
[Episode 132790] reward=-325958549.8 actor_loss=0.2569 critic_loss=78921528887068.4375 entropy=18.0415 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 132800] reward=-121945508.9 actor_loss=0.2679 critic_loss=108111775516.4444 entropy=18.0654 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 132800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506783.3 mean_steps=14.1
|
|
[Episode 132810] reward=-123799564.4 actor_loss=0.2280 critic_loss=113768774419.6923 entropy=18.0607 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 132820] reward=-122323958.9 actor_loss=0.3262 critic_loss=112971304049.7778 entropy=18.0619 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 132820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515896.3 mean_steps=14.4
|
|
[Episode 132830] reward=-119242140.2 actor_loss=0.2143 critic_loss=110483016557.7143 entropy=18.0659 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 132840] reward=-127527651.0 actor_loss=0.2468 critic_loss=113317387806.1176 entropy=18.0813 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 132840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412522.2 mean_steps=16.4
|
|
[Episode 132850] reward=-121730801.6 actor_loss=0.3514 critic_loss=103754277774.2222 entropy=18.0596 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 132860] reward=-236974203.2 actor_loss=0.3128 critic_loss=49555821344267.3750 entropy=18.0782 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 132860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429297.2 mean_steps=15.7
|
|
[Episode 132870] reward=-119552830.3 actor_loss=0.4450 critic_loss=242633580826.4828 entropy=18.0791 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 132880] reward=-122006080.6 actor_loss=0.2284 critic_loss=103638975010.1333 entropy=18.0675 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 132880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409812.4 mean_steps=15.8
|
|
[Episode 132890] reward=-120540064.9 actor_loss=0.3169 critic_loss=106314134262.5185 entropy=18.0686 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 132900] reward=-121047000.7 actor_loss=0.2774 critic_loss=103281002723.5556 entropy=18.0532 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 132900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578063.8 mean_steps=12.8
|
|
[Episode 132910] reward=-148773572.8 actor_loss=0.4034 critic_loss=3932141923714.8442 entropy=18.0600 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 132920] reward=-124324170.8 actor_loss=0.2632 critic_loss=110837250821.6889 entropy=18.0666 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 132920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396385.2 mean_steps=15.7
|
|
[Episode 132930] reward=-123935281.9 actor_loss=0.3477 critic_loss=104860121861.6889 entropy=18.0662 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 132940] reward=-121818180.2 actor_loss=0.2925 critic_loss=105529610513.0667 entropy=18.0644 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 132940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389529.9 mean_steps=16.4
|
|
[Episode 132950] reward=-123390577.2 actor_loss=0.2485 critic_loss=106456575362.8445 entropy=18.0262 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 132960] reward=-119191970.0 actor_loss=0.3359 critic_loss=116487177830.4000 entropy=18.0177 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 132960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417600.6 mean_steps=15.2
|
|
[Episode 132970] reward=-142599198.2 actor_loss=0.2932 critic_loss=2067439611904.0000 entropy=18.0226 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 132980] reward=-137419955.8 actor_loss=0.3283 critic_loss=1320864378606.9333 entropy=18.0151 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 132980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518021.7 mean_steps=12.9
|
|
[Episode 132990] reward=-115567583.6 actor_loss=0.2974 critic_loss=100079240078.2222 entropy=18.0078 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 133000] reward=-120315024.1 actor_loss=0.2540 critic_loss=106288490123.6364 entropy=18.0082 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 133000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-634564.4 mean_steps=12.9
|
|
[Episode 133010] reward=-127083338.5 actor_loss=0.3016 critic_loss=112002616072.8276 entropy=17.9986 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 133020] reward=-122905678.1 actor_loss=0.2570 critic_loss=105473678222.2222 entropy=17.9826 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 133020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535601.0 mean_steps=13.8
|
|
[Episode 133030] reward=-114130531.8 actor_loss=0.3197 critic_loss=93216371213.1282 entropy=17.9642 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 133040] reward=-119606135.3 actor_loss=0.3264 critic_loss=104936365357.1765 entropy=17.9750 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 133040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495553.0 mean_steps=15.1
|
|
[Episode 133050] reward=-120901059.6 actor_loss=0.2339 critic_loss=103231923996.4444 entropy=17.9775 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 133060] reward=-128060369.0 actor_loss=0.2389 critic_loss=110579108219.2593 entropy=17.9733 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 133060] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-347351.1 mean_steps=17.9
|
|
[Episode 133070] reward=-116691837.4 actor_loss=0.3184 critic_loss=101656415982.9333 entropy=17.9628 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 133080] reward=-115519978.0 actor_loss=0.3596 critic_loss=101588936658.4889 entropy=17.9778 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 133080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-508504.3 mean_steps=14.9
|
|
[Episode 133090] reward=-117410414.5 actor_loss=0.3082 critic_loss=100032818107.7333 entropy=17.9666 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 133100] reward=-121131607.2 actor_loss=0.3976 critic_loss=102746705464.8889 entropy=17.9696 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 133100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447692.0 mean_steps=15.5
|
|
[Episode 133110] reward=-122913006.7 actor_loss=0.2750 critic_loss=149707861469.8667 entropy=17.9761 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 133120] reward=-126643202.2 actor_loss=0.2256 critic_loss=110032013914.3529 entropy=17.9786 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 133120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479632.6 mean_steps=14.7
|
|
[Episode 133130] reward=-119861905.6 actor_loss=0.3181 critic_loss=104642013138.4889 entropy=17.9697 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 133140] reward=-120544629.2 actor_loss=0.3117 critic_loss=110150021766.7368 entropy=17.9647 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 133140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532278.0 mean_steps=13.3
|
|
[Episode 133150] reward=-126327887.8 actor_loss=0.2541 critic_loss=110224790272.0000 entropy=17.9651 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 133160] reward=-126550671.5 actor_loss=0.2121 critic_loss=111284901569.4222 entropy=17.9608 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 133160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-455670.1 mean_steps=13.6
|
|
[Episode 133170] reward=-118644817.6 actor_loss=0.2890 critic_loss=102947057026.8445 entropy=17.9468 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 133180] reward=-118729244.4 actor_loss=0.2842 critic_loss=96389488855.5789 entropy=17.9526 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 133180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500802.4 mean_steps=13.9
|
|
[Episode 133190] reward=-113498919.0 actor_loss=0.2854 critic_loss=95564245219.5556 entropy=17.9419 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 133200] reward=-122629790.7 actor_loss=0.2632 critic_loss=106482721731.7647 entropy=17.9385 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 133200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-335760.8 mean_steps=16.8
|
|
[Episode 133210] reward=-124074456.7 actor_loss=0.2069 critic_loss=107702706607.1579 entropy=17.9538 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 133220] reward=-125008701.8 actor_loss=0.2273 critic_loss=120709772083.2000 entropy=17.9429 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 133220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-441153.0 mean_steps=13.4
|
|
[Episode 133230] reward=-117477870.3 actor_loss=0.3407 critic_loss=111526295885.3954 entropy=17.9368 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 133240] reward=-117856494.0 actor_loss=0.3053 critic_loss=100682349772.8000 entropy=17.9342 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 133240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595141.3 mean_steps=12.4
|
|
[Episode 133250] reward=-119956734.5 actor_loss=0.3397 critic_loss=101813368695.4667 entropy=17.9152 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 133260] reward=-120486875.3 actor_loss=0.3491 critic_loss=102128299163.1515 entropy=17.9162 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 133260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-608074.1 mean_steps=13.7
|
|
[Episode 133270] reward=-120437545.9 actor_loss=0.3715 critic_loss=103715564809.4815 entropy=17.9154 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 133280] reward=-118855673.1 actor_loss=0.2721 critic_loss=102920069775.3600 entropy=17.9109 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 133280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421659.0 mean_steps=16.8
|
|
[Episode 133290] reward=-126875199.0 actor_loss=0.3015 critic_loss=289326566253.7143 entropy=17.9190 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 133300] reward=-123319851.5 actor_loss=0.2425 critic_loss=103686015514.9474 entropy=17.9144 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 133300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-603404.4 mean_steps=13.8
|
|
[Episode 133310] reward=-112144439.7 actor_loss=0.3581 critic_loss=96989998713.9048 entropy=17.9114 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 133320] reward=-116992570.6 actor_loss=0.2700 critic_loss=110269279254.7556 entropy=17.9010 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 133320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452827.6 mean_steps=14.8
|
|
[Episode 133330] reward=-120603834.7 actor_loss=0.3257 critic_loss=103587863893.3333 entropy=17.8777 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 133340] reward=-124411397.7 actor_loss=0.2388 critic_loss=130801617578.6667 entropy=17.8987 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 133340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446380.7 mean_steps=14.6
|
|
[Episode 133350] reward=-121578307.2 actor_loss=0.2894 critic_loss=108073871304.6487 entropy=17.9027 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 133360] reward=-123432630.1 actor_loss=0.2256 critic_loss=105789623395.0968 entropy=17.8988 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 133360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535451.7 mean_steps=14.2
|
|
[Episode 133370] reward=-122506021.5 actor_loss=0.3606 critic_loss=105600553233.0667 entropy=17.8852 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 133380] reward=-117748334.9 actor_loss=0.2847 critic_loss=102316944822.8571 entropy=17.8754 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 133380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-525336.9 mean_steps=15.1
|
|
[Episode 133390] reward=-121172852.7 actor_loss=0.2731 critic_loss=103285442206.8965 entropy=17.8645 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 133400] reward=-117552822.3 actor_loss=0.3548 critic_loss=96691575125.3333 entropy=17.8615 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 133400] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-364474.0 mean_steps=17.8
|
|
[Episode 133410] reward=-118705265.9 actor_loss=0.2260 critic_loss=105260094885.6471 entropy=17.8507 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 133420] reward=-119104286.4 actor_loss=0.3221 critic_loss=108438419823.5897 entropy=17.8457 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 133420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-533822.3 mean_steps=15.5
|
|
[Episode 133430] reward=-331465647.7 actor_loss=0.6313 critic_loss=78641386196536.8906 entropy=17.8402 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 133440] reward=-122490477.7 actor_loss=0.3070 critic_loss=107612239371.3778 entropy=17.8352 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 133440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508383.8 mean_steps=14.3
|
|
[Episode 133450] reward=-112404836.0 actor_loss=0.3302 critic_loss=95519006355.9111 entropy=17.8418 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 133460] reward=-207744563.2 actor_loss=0.3696 critic_loss=25355592581484.0898 entropy=17.8449 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 133460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559895.8 mean_steps=13.2
|
|
[Episode 133470] reward=-118605852.8 actor_loss=0.3004 critic_loss=116994132104.5333 entropy=17.8447 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 133480] reward=-113341658.6 actor_loss=0.1969 critic_loss=94560721016.4706 entropy=17.8390 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 133480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553027.7 mean_steps=14.3
|
|
[Episode 133490] reward=-115428056.6 actor_loss=0.2332 critic_loss=94588885530.9474 entropy=17.8414 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 133500] reward=-114071029.9 actor_loss=0.3146 critic_loss=98416212335.5897 entropy=17.8511 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 133500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516554.2 mean_steps=15.1
|
|
[Episode 133510] reward=-122528151.4 actor_loss=0.1822 critic_loss=103002302054.4000 entropy=17.8534 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 133520] reward=-119857255.1 actor_loss=0.2817 critic_loss=105028111018.6667 entropy=17.8312 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 133520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-595171.9 mean_steps=11.9
|
|
[Episode 133530] reward=-120569888.5 actor_loss=0.2800 critic_loss=103837346237.2174 entropy=17.8289 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 133540] reward=-122497015.7 actor_loss=0.2600 critic_loss=129023317926.9565 entropy=17.8180 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 133540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445899.3 mean_steps=15.7
|
|
[Episode 133550] reward=-120419186.2 actor_loss=0.2853 critic_loss=110835046497.5238 entropy=17.8129 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 133560] reward=-120174892.4 actor_loss=0.2112 critic_loss=101469884237.9130 entropy=17.8016 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 133560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483219.6 mean_steps=13.9
|
|
[Episode 133570] reward=-114694775.3 actor_loss=0.2575 critic_loss=96577584059.7333 entropy=17.8133 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 133580] reward=-118067496.7 actor_loss=0.3227 critic_loss=120360990782.0606 entropy=17.8176 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 133580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530351.2 mean_steps=13.3
|
|
[Episode 133590] reward=-418873572.5 actor_loss=0.4431 critic_loss=290791730871591.8125 entropy=17.8091 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 133600] reward=-127800655.7 actor_loss=0.3105 critic_loss=396319395254.8571 entropy=17.8218 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 133600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540079.1 mean_steps=13.2
|
|
[Episode 133610] reward=-135619318.8 actor_loss=0.4195 critic_loss=1510675851509.7600 entropy=17.8296 approx_kl=0.0037 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 133620] reward=-363705496.3 actor_loss=0.3738 critic_loss=196011458123093.3438 entropy=17.8376 approx_kl=-0.0004 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 133620] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-302238.7 mean_steps=17.4
|
|
[Episode 133630] reward=-120807188.9 actor_loss=0.2195 critic_loss=104648356139.7073 entropy=17.8304 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 133640] reward=-114892590.5 actor_loss=0.3021 critic_loss=97142412468.7059 entropy=17.8223 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 133640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413099.0 mean_steps=15.2
|
|
[Episode 133650] reward=-116708592.6 actor_loss=0.2669 critic_loss=118475301470.8148 entropy=17.8269 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 133660] reward=-115104613.6 actor_loss=0.4175 critic_loss=101490157704.5333 entropy=17.8231 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 133660] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-672726.6 mean_steps=11.2
|
|
[Episode 133670] reward=-124299591.1 actor_loss=0.2713 critic_loss=103279728394.2400 entropy=17.8201 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 133680] reward=-120783003.4 actor_loss=0.2700 critic_loss=104056320819.2000 entropy=17.8268 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 133680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-381200.7 mean_steps=17.1
|
|
[Episode 133690] reward=-122195636.1 actor_loss=0.2609 critic_loss=103870251229.4054 entropy=17.8224 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 133700] reward=-121393267.8 actor_loss=0.1956 critic_loss=98666835752.4211 entropy=17.8371 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 133700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432390.1 mean_steps=15.4
|
|
[Episode 133710] reward=-118461344.8 actor_loss=0.3053 critic_loss=104706467281.4545 entropy=17.8452 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 133720] reward=-121710064.3 actor_loss=0.2885 critic_loss=107236981473.2800 entropy=17.8289 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 133720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-484261.1 mean_steps=13.6
|
|
[Episode 133730] reward=-117173287.2 actor_loss=0.3060 critic_loss=94750011136.0000 entropy=17.8270 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 133740] reward=-115068157.6 actor_loss=0.3065 critic_loss=99135818956.8000 entropy=17.8101 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 133740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622152.1 mean_steps=11.9
|
|
[Episode 133750] reward=-123354957.4 actor_loss=0.3028 critic_loss=112174685360.5517 entropy=17.7977 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 133760] reward=-121184349.1 actor_loss=0.2389 critic_loss=101385579069.4400 entropy=17.7947 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 133760] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-703212.8 mean_steps=9.8
|
|
[Episode 133770] reward=-515331503.8 actor_loss=0.2761 critic_loss=444434974075926.7500 entropy=17.7991 approx_kl=0.0013 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 133780] reward=-1258523745.0 actor_loss=5.6095 critic_loss=3428905489816325.5000 entropy=17.8000 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 133780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472154.2 mean_steps=14.8
|
|
[Episode 133790] reward=-124528663.2 actor_loss=0.2985 critic_loss=105666385822.4762 entropy=17.7867 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 133800] reward=-119028633.4 actor_loss=0.3251 critic_loss=106346428513.5238 entropy=17.7760 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 133800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443593.4 mean_steps=15.2
|
|
[Episode 133810] reward=-115306672.5 actor_loss=0.2753 critic_loss=97729260643.0968 entropy=17.7694 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 133820] reward=-123326995.3 actor_loss=0.1963 critic_loss=108775918013.2174 entropy=17.7718 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 133820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579687.4 mean_steps=12.4
|
|
[Episode 133830] reward=-448228800.4 actor_loss=0.1586 critic_loss=257499710561393.7812 entropy=17.7826 approx_kl=-0.0011 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 133840] reward=-122391117.3 actor_loss=0.2799 critic_loss=104140869632.0000 entropy=17.7671 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 133840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521165.8 mean_steps=13.0
|
|
[Episode 133850] reward=-118367182.1 actor_loss=0.3321 critic_loss=101556122419.2000 entropy=17.7592 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 133860] reward=-119152778.4 actor_loss=0.3200 critic_loss=103301573973.3333 entropy=17.7536 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 133860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434345.5 mean_steps=15.3
|
|
[Episode 133870] reward=-120143023.0 actor_loss=0.2344 critic_loss=108810758046.4762 entropy=17.7547 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 133880] reward=-119114378.8 actor_loss=0.3237 critic_loss=99135888743.7838 entropy=17.7605 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 133880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417188.6 mean_steps=16.5
|
|
[Episode 133890] reward=-122173151.6 actor_loss=0.2558 critic_loss=102675176143.5676 entropy=17.7598 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 133900] reward=-118586669.9 actor_loss=0.2278 critic_loss=105595292206.5455 entropy=17.7562 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 133900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496816.6 mean_steps=14.9
|
|
[Episode 133910] reward=-123075030.1 actor_loss=0.2971 critic_loss=103451051544.3810 entropy=17.7454 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 133920] reward=-116528120.0 actor_loss=0.2837 critic_loss=100416329227.3778 entropy=17.7448 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 133920] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-270521.0 mean_steps=17.2
|
|
[Episode 133930] reward=-121413520.9 actor_loss=0.2897 critic_loss=103187885260.8000 entropy=17.7378 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 133940] reward=-124986796.2 actor_loss=0.3467 critic_loss=558643075169.5238 entropy=17.7347 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 133940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-406086.3 mean_steps=15.1
|
|
[Episode 133950] reward=-112375845.3 actor_loss=0.3016 critic_loss=91319910999.4146 entropy=17.7446 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 133960] reward=-122009618.3 actor_loss=0.2193 critic_loss=105012605300.3636 entropy=17.7387 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 133960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-602440.0 mean_steps=13.7
|
|
[Episode 133970] reward=-113692281.1 actor_loss=0.4296 critic_loss=95517833898.6667 entropy=17.7357 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 133980] reward=-120462604.6 actor_loss=0.2633 critic_loss=102130776064.0000 entropy=17.7227 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 133980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591367.8 mean_steps=12.7
|
|
[Episode 133990] reward=-115541824.9 actor_loss=0.4052 critic_loss=99892885124.7407 entropy=17.7200 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 134000] reward=-118268064.0 actor_loss=0.2911 critic_loss=102123723629.7143 entropy=17.7283 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 134000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591010.7 mean_steps=13.6
|
|
[Episode 134010] reward=-117172427.9 actor_loss=0.3428 critic_loss=98368095482.3111 entropy=17.7240 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 134020] reward=-156726245.7 actor_loss=0.2404 critic_loss=5685461664836.2666 entropy=17.7310 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 134020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506420.4 mean_steps=13.3
|
|
[Episode 134030] reward=-117957997.9 actor_loss=0.4121 critic_loss=96611985505.5238 entropy=17.7470 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 134040] reward=-122563129.6 actor_loss=0.1991 critic_loss=107323398642.1622 entropy=17.7671 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 134040] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-301953.5 mean_steps=16.7
|
|
[Episode 134050] reward=-119076730.3 actor_loss=0.3293 critic_loss=104063324532.3636 entropy=17.7714 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 134060] reward=-117944791.9 actor_loss=0.2710 critic_loss=101104679032.4706 entropy=17.7745 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 134060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437072.1 mean_steps=15.1
|
|
[Episode 134070] reward=-116594861.7 actor_loss=0.3231 critic_loss=95285898808.8889 entropy=17.7761 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 134080] reward=-122903638.9 actor_loss=0.2423 critic_loss=125576936960.0000 entropy=17.7743 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 134080] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-714043.9 mean_steps=10.8
|
|
[Episode 134090] reward=-114738339.9 actor_loss=0.2720 critic_loss=90732760268.8000 entropy=17.7790 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 134100] reward=-126560123.8 actor_loss=0.1812 critic_loss=106203469981.5385 entropy=17.7928 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 134100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-629020.8 mean_steps=14.1
|
|
[Episode 134110] reward=-113557397.7 actor_loss=0.3177 critic_loss=91401063992.8889 entropy=17.7679 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 134120] reward=-121596420.4 actor_loss=0.2272 critic_loss=98817129221.6889 entropy=17.7578 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 134120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515316.0 mean_steps=13.8
|
|
[Episode 134130] reward=-118357937.7 actor_loss=0.2323 critic_loss=99475871744.0000 entropy=17.7533 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 134140] reward=-121752126.2 actor_loss=0.2771 critic_loss=101734010606.9333 entropy=17.7401 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 134140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537497.4 mean_steps=13.1
|
|
[Episode 134150] reward=-116749782.7 actor_loss=0.3907 critic_loss=95305039207.7838 entropy=17.7258 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 134160] reward=-121734717.6 actor_loss=0.2718 critic_loss=105084393285.8182 entropy=17.7287 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 134160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-520172.2 mean_steps=14.8
|
|
[Episode 134170] reward=-225650615.5 actor_loss=0.3516 critic_loss=39250337050715.0234 entropy=17.7406 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 134180] reward=-119942019.8 actor_loss=0.2454 critic_loss=103800249275.7333 entropy=17.7360 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 134180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543592.5 mean_steps=13.2
|
|
[Episode 134190] reward=-116887087.2 actor_loss=0.3039 critic_loss=95337662464.0000 entropy=17.7298 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 134200] reward=-119218466.8 actor_loss=0.2597 critic_loss=100538562717.5385 entropy=17.7089 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 134200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530585.0 mean_steps=14.4
|
|
[Episode 134210] reward=-119767102.2 actor_loss=0.2966 critic_loss=100691236945.9200 entropy=17.6978 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 134220] reward=-113346721.7 actor_loss=0.3485 critic_loss=93070701545.2444 entropy=17.7055 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 134220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-478519.2 mean_steps=15.6
|
|
[Episode 134230] reward=-122705739.7 actor_loss=0.3021 critic_loss=115303726694.4000 entropy=17.7123 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 134240] reward=-120000735.0 actor_loss=0.2615 critic_loss=104802958189.7143 entropy=17.7128 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 134240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566241.8 mean_steps=13.9
|
|
[Episode 134250] reward=-111071537.7 actor_loss=0.3131 critic_loss=97791053923.9024 entropy=17.7013 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 134260] reward=-118870777.4 actor_loss=0.3466 critic_loss=92848971776.0000 entropy=17.6929 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 134260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505027.7 mean_steps=12.9
|
|
[Episode 134270] reward=-188365283.7 actor_loss=0.1856 critic_loss=12310275261144.1777 entropy=17.6901 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 134280] reward=-122123638.2 actor_loss=0.2384 critic_loss=103604820195.5556 entropy=17.6955 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 134280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422426.7 mean_steps=15.4
|
|
[Episode 134290] reward=-111607359.5 actor_loss=0.3849 critic_loss=105280003896.1951 entropy=17.6950 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 134300] reward=-120900164.8 actor_loss=0.2409 critic_loss=107669327754.9714 entropy=17.6961 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 134300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619855.8 mean_steps=12.7
|
|
[Episode 134310] reward=-122678144.9 actor_loss=0.2625 critic_loss=114375459180.0889 entropy=17.6974 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 134320] reward=-114834061.3 actor_loss=0.2743 critic_loss=99609612379.0222 entropy=17.6857 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 134320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-640719.9 mean_steps=12.3
|
|
[Episode 134330] reward=-115225249.8 actor_loss=0.3363 critic_loss=95644528548.9778 entropy=17.6719 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 134340] reward=-119435551.6 actor_loss=0.3774 critic_loss=103267356580.9778 entropy=17.6588 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 134340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479948.6 mean_steps=14.1
|
|
[Episode 134350] reward=-122304617.2 actor_loss=0.3210 critic_loss=102923013870.9333 entropy=17.6544 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 134360] reward=-118410945.5 actor_loss=0.3255 critic_loss=103389417081.9048 entropy=17.6569 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 134360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560209.1 mean_steps=13.5
|
|
[Episode 134370] reward=-122439747.4 actor_loss=0.2640 critic_loss=99723297353.1429 entropy=17.6328 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 134380] reward=-118109529.8 actor_loss=0.2854 critic_loss=99211079953.0667 entropy=17.6262 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 134380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447949.4 mean_steps=15.7
|
|
[Episode 134390] reward=-119362314.5 actor_loss=0.2682 critic_loss=98983742259.2000 entropy=17.6371 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 134400] reward=-118477940.2 actor_loss=0.2146 critic_loss=101225576946.1622 entropy=17.6397 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 134400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519615.8 mean_steps=13.6
|
|
[Episode 134410] reward=-117668841.3 actor_loss=0.3890 critic_loss=97357645188.4138 entropy=17.6558 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 134420] reward=-113148186.2 actor_loss=0.3326 critic_loss=90669499278.2222 entropy=17.6243 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 134420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-673242.9 mean_steps=12.2
|
|
[Episode 134430] reward=-115062095.6 actor_loss=0.2499 critic_loss=93175407555.7647 entropy=17.6282 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 134440] reward=-118560504.1 actor_loss=0.2658 critic_loss=99232353484.8000 entropy=17.6315 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 134440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454297.2 mean_steps=14.5
|
|
[Episode 134450] reward=-120012271.1 actor_loss=0.2427 critic_loss=185840726550.2609 entropy=17.6313 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 134460] reward=-121170856.1 actor_loss=0.3741 critic_loss=136402269277.0909 entropy=17.6602 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 134460] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-356601.8 mean_steps=17.1
|
|
[Episode 134470] reward=-119512022.6 actor_loss=0.2249 critic_loss=100413523456.0000 entropy=17.6448 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 134480] reward=-118445573.6 actor_loss=0.3238 critic_loss=98587900495.6444 entropy=17.6527 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 134480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551180.7 mean_steps=13.5
|
|
[Episode 134490] reward=-111064233.2 actor_loss=0.2591 critic_loss=87160160000.0000 entropy=17.6682 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 134500] reward=-115756596.0 actor_loss=0.3588 critic_loss=99753291138.8445 entropy=17.6629 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 134500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506789.2 mean_steps=14.0
|
|
[Episode 134510] reward=-123545845.5 actor_loss=0.2931 critic_loss=109538438609.4545 entropy=17.6588 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 134520] reward=-119120403.5 actor_loss=0.4223 critic_loss=97121043992.3810 entropy=17.6646 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 134520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521736.3 mean_steps=15.3
|
|
[Episode 134530] reward=-120470412.8 actor_loss=0.2611 critic_loss=98504033219.7647 entropy=17.6762 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 134540] reward=-124668871.3 actor_loss=0.2826 critic_loss=102717443458.8445 entropy=17.6742 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 134540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-456659.2 mean_steps=15.9
|
|
[Episode 134550] reward=-117342976.8 actor_loss=0.2538 critic_loss=93762389934.0800 entropy=17.6816 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 134560] reward=-120388128.9 actor_loss=0.3205 critic_loss=99426633728.0000 entropy=17.6861 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 134560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490030.2 mean_steps=14.2
|
|
[Episode 134570] reward=-120281471.6 actor_loss=0.3508 critic_loss=96893076236.1905 entropy=17.6838 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 134580] reward=-119675773.7 actor_loss=0.1761 critic_loss=97143651920.8421 entropy=17.6850 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 134580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511641.9 mean_steps=15.1
|
|
[Episode 134590] reward=-120955018.2 actor_loss=0.2608 critic_loss=99669436850.4242 entropy=17.6977 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 134600] reward=-118476629.0 actor_loss=0.2716 critic_loss=97839675922.9630 entropy=17.6874 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 134600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510427.2 mean_steps=14.2
|
|
[Episode 134610] reward=-120842017.4 actor_loss=0.3174 critic_loss=95979762119.1111 entropy=17.6930 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 134620] reward=-117914904.3 actor_loss=0.3351 critic_loss=93185017856.0000 entropy=17.6854 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 134620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484480.1 mean_steps=14.4
|
|
[Episode 134630] reward=-119093488.3 actor_loss=0.2707 critic_loss=98646273807.0588 entropy=17.6649 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 134640] reward=-116384429.3 actor_loss=0.3465 critic_loss=96868809113.6000 entropy=17.6547 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 134640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-476055.8 mean_steps=15.8
|
|
[Episode 134650] reward=-113200027.8 actor_loss=0.3132 critic_loss=90149686112.7111 entropy=17.6590 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 134660] reward=-122204884.2 actor_loss=0.2700 critic_loss=100611140266.6667 entropy=17.6739 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 134660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551199.3 mean_steps=13.7
|
|
[Episode 134670] reward=-125543528.2 actor_loss=0.2360 critic_loss=143270256158.1176 entropy=17.6610 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 134680] reward=-111337572.7 actor_loss=0.2722 critic_loss=92835797479.6190 entropy=17.6696 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 134680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463962.7 mean_steps=15.2
|
|
[Episode 134690] reward=-119079602.6 actor_loss=0.1813 critic_loss=97579314455.2727 entropy=17.6878 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 134700] reward=-121444790.5 actor_loss=0.2801 critic_loss=99573776019.9111 entropy=17.6835 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 134700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559107.2 mean_steps=13.6
|
|
[Episode 134710] reward=-127553645.7 actor_loss=0.2167 critic_loss=106664461974.5882 entropy=17.6814 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 134720] reward=-114095768.6 actor_loss=0.2461 critic_loss=90713496689.7778 entropy=17.7025 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 134720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489414.8 mean_steps=15.1
|
|
[Episode 134730] reward=-120155064.1 actor_loss=0.2870 critic_loss=97428237152.7111 entropy=17.7008 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 134740] reward=-122338926.5 actor_loss=0.3031 critic_loss=147456446268.9524 entropy=17.6961 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 134740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458806.9 mean_steps=15.1
|
|
[Episode 134750] reward=-116821080.0 actor_loss=0.3028 critic_loss=96043870890.6667 entropy=17.6956 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 134760] reward=-116951373.1 actor_loss=0.3035 critic_loss=94667505208.8889 entropy=17.6914 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 134760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438429.5 mean_steps=14.4
|
|
[Episode 134770] reward=-126422486.4 actor_loss=0.1977 critic_loss=125763798099.0270 entropy=17.7055 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 134780] reward=-118927885.8 actor_loss=0.3335 critic_loss=105317289873.2973 entropy=17.7148 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 134780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587254.6 mean_steps=12.8
|
|
[Episode 134790] reward=-118649737.5 actor_loss=0.2980 critic_loss=99625365308.9524 entropy=17.7228 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 134800] reward=-117346683.6 actor_loss=0.3079 critic_loss=98381323819.8857 entropy=17.7268 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 134800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-617369.1 mean_steps=13.7
|
|
[Episode 134810] reward=-116023033.3 actor_loss=0.3272 critic_loss=99654824521.1429 entropy=17.7188 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 134820] reward=-123129635.9 actor_loss=0.2777 critic_loss=125339109914.9474 entropy=17.7106 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 134820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505513.3 mean_steps=13.3
|
|
[Episode 134830] reward=-118858375.0 actor_loss=0.3401 critic_loss=101020296923.4286 entropy=17.7151 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 134840] reward=-118973484.5 actor_loss=0.2873 critic_loss=100977554654.6087 entropy=17.6934 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 134840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-488807.3 mean_steps=13.1
|
|
[Episode 134850] reward=-118025130.2 actor_loss=0.2459 critic_loss=101780021854.8148 entropy=17.6841 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 134860] reward=-119552922.7 actor_loss=0.2020 critic_loss=102734404403.2000 entropy=17.6902 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 134860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453221.6 mean_steps=14.8
|
|
[Episode 134870] reward=-114093808.4 actor_loss=0.3648 critic_loss=94770592426.6667 entropy=17.6901 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 134880] reward=-119506205.3 actor_loss=0.2809 critic_loss=102937234545.7778 entropy=17.6940 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 134880] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-643465.2 mean_steps=11.4
|
|
[Episode 134890] reward=-127436205.0 actor_loss=0.3450 critic_loss=525469111989.6774 entropy=17.6667 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 134900] reward=-121396889.7 actor_loss=0.2777 critic_loss=95266234823.1111 entropy=17.6501 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 134900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482602.9 mean_steps=14.0
|
|
[Episode 134910] reward=-115279596.4 actor_loss=0.3173 critic_loss=92968488960.0000 entropy=17.6466 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 134920] reward=-119838233.0 actor_loss=0.3025 critic_loss=99151814084.4651 entropy=17.6315 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 134920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395343.9 mean_steps=16.0
|
|
[Episode 134930] reward=-118263342.0 actor_loss=0.4276 critic_loss=93577229405.0909 entropy=17.6339 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 134940] reward=-121373226.5 actor_loss=0.3031 critic_loss=98499340970.6667 entropy=17.6127 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 134940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516408.0 mean_steps=14.4
|
|
[Episode 134950] reward=-121994249.7 actor_loss=0.1972 critic_loss=100236924427.3778 entropy=17.6086 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 134960] reward=-118155354.4 actor_loss=0.5404 critic_loss=96872335132.4444 entropy=17.6045 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 134960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641338.7 mean_steps=12.2
|
|
[Episode 134970] reward=-118611565.2 actor_loss=0.3337 critic_loss=95325206685.5385 entropy=17.6060 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 134980] reward=-119725043.1 actor_loss=0.2703 critic_loss=98607467373.7143 entropy=17.6013 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 134980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521618.2 mean_steps=14.1
|
|
[Episode 134990] reward=-120314369.9 actor_loss=0.2669 critic_loss=106026648997.6471 entropy=17.6151 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 135000] reward=-120677551.4 actor_loss=0.3226 critic_loss=99376587298.1333 entropy=17.6165 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 135000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-687766.8 mean_steps=11.6
|
|
[Episode 135010] reward=-117174919.5 actor_loss=0.2969 critic_loss=97497580397.7143 entropy=17.5963 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 135020] reward=-121070323.3 actor_loss=0.2286 critic_loss=97908387416.2759 entropy=17.6088 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 135020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474290.0 mean_steps=14.0
|
|
[Episode 135030] reward=-113497840.1 actor_loss=0.3299 critic_loss=105954117290.6667 entropy=17.6089 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 135040] reward=-125211983.7 actor_loss=0.3608 critic_loss=266249150685.4054 entropy=17.6177 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 135040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-459008.8 mean_steps=13.4
|
|
[Episode 135050] reward=-118589526.9 actor_loss=0.3062 critic_loss=106827716689.9200 entropy=17.6235 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 135060] reward=-120062894.6 actor_loss=0.3184 critic_loss=100401005040.4848 entropy=17.6241 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 135060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-390545.6 mean_steps=15.8
|
|
[Episode 135070] reward=-121657773.2 actor_loss=0.2149 critic_loss=99534198101.3333 entropy=17.6270 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 135080] reward=-113038497.0 actor_loss=0.2809 critic_loss=102723557014.5882 entropy=17.6354 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 135080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483253.2 mean_steps=15.8
|
|
[Episode 135090] reward=-116909235.6 actor_loss=0.3224 critic_loss=97644180639.2889 entropy=17.6436 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 135100] reward=-117019516.0 actor_loss=0.3361 critic_loss=100693514295.3513 entropy=17.6574 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 135100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616164.5 mean_steps=12.8
|
|
[Episode 135110] reward=-126524134.5 actor_loss=0.2049 critic_loss=109046618112.0000 entropy=17.6628 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 135120] reward=-113154679.3 actor_loss=0.2765 critic_loss=93399809084.2353 entropy=17.6494 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 135120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-485863.7 mean_steps=15.8
|
|
[Episode 135130] reward=-116769078.0 actor_loss=0.2551 critic_loss=102256644733.1555 entropy=17.6410 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 135140] reward=-146735717.2 actor_loss=0.2444 critic_loss=1749004579999.2888 entropy=17.6545 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 135140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566583.7 mean_steps=13.4
|
|
[Episode 135150] reward=-118951554.0 actor_loss=0.2413 critic_loss=96503069354.6667 entropy=17.6726 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 135160] reward=-120083422.6 actor_loss=0.3141 critic_loss=102393216113.7778 entropy=17.6726 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 135160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558992.6 mean_steps=13.6
|
|
[Episode 135170] reward=-116869786.1 actor_loss=0.3242 critic_loss=100348789516.1905 entropy=17.6822 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 135180] reward=-117536990.1 actor_loss=0.2807 critic_loss=112186338417.7778 entropy=17.6845 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 135180] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-696785.4 mean_steps=11.7
|
|
[Episode 135190] reward=-118620567.8 actor_loss=0.2703 critic_loss=100156295936.0000 entropy=17.6882 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 135200] reward=-119841935.7 actor_loss=0.3693 critic_loss=103157208405.3333 entropy=17.6808 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 135200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510717.5 mean_steps=14.2
|
|
[Episode 135210] reward=-114448248.2 actor_loss=0.2810 critic_loss=93457134023.1111 entropy=17.6705 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 135220] reward=-113998918.1 actor_loss=0.3816 critic_loss=99486478628.5714 entropy=17.6568 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 135220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641863.9 mean_steps=12.3
|
|
[Episode 135230] reward=-120296637.2 actor_loss=0.3465 critic_loss=98072325747.6129 entropy=17.6493 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 135240] reward=-118576299.7 actor_loss=0.2770 critic_loss=96923804672.0000 entropy=17.6516 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 135240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533690.9 mean_steps=13.3
|
|
[Episode 135250] reward=-114348586.4 actor_loss=0.2381 critic_loss=100072031846.4000 entropy=17.6276 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 135260] reward=-119312767.2 actor_loss=0.2796 critic_loss=94207881853.1555 entropy=17.6077 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 135260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565396.8 mean_steps=13.9
|
|
[Episode 135270] reward=-117828468.6 actor_loss=0.2947 critic_loss=98892807976.4211 entropy=17.6036 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 135280] reward=-119690195.7 actor_loss=0.2739 critic_loss=99749535334.4000 entropy=17.6058 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 135280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519453.5 mean_steps=13.2
|
|
[Episode 135290] reward=-120410403.4 actor_loss=0.2879 critic_loss=98571664588.8000 entropy=17.6087 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 135300] reward=-123229795.5 actor_loss=0.2234 critic_loss=102319067331.0476 entropy=17.6032 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 135300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521808.5 mean_steps=14.5
|
|
[Episode 135310] reward=-119139282.1 actor_loss=0.2344 critic_loss=98031448974.2222 entropy=17.6040 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 135320] reward=-123589642.8 actor_loss=0.2853 critic_loss=354670512264.5333 entropy=17.6092 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 135320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473176.2 mean_steps=15.2
|
|
[Episode 135330] reward=-116880766.1 actor_loss=0.3126 critic_loss=96735253845.3333 entropy=17.6190 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 135340] reward=-115569540.5 actor_loss=0.3766 critic_loss=94271499741.8667 entropy=17.6425 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 135340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387858.7 mean_steps=15.1
|
|
[Episode 135350] reward=-113619608.0 actor_loss=0.3421 critic_loss=94213012210.5263 entropy=17.6494 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 135360] reward=-120290465.4 actor_loss=0.2976 critic_loss=153731588681.1429 entropy=17.6584 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 135360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569139.3 mean_steps=13.6
|
|
[Episode 135370] reward=-118395573.3 actor_loss=0.2847 critic_loss=139042738176.0000 entropy=17.6635 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 135380] reward=-605819367.6 actor_loss=0.2610 critic_loss=416359657207671.4375 entropy=17.6712 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 135380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429022.1 mean_steps=15.7
|
|
[Episode 135390] reward=-116602044.1 actor_loss=0.2851 critic_loss=94805121069.5111 entropy=17.7146 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 135400] reward=-125434190.7 actor_loss=0.1560 critic_loss=146129751848.4211 entropy=17.7102 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 135400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614432.9 mean_steps=12.6
|
|
[Episode 135410] reward=-119599123.5 actor_loss=0.3579 critic_loss=107980079923.2000 entropy=17.7260 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 135420] reward=-122986333.5 actor_loss=0.3252 critic_loss=141717433912.8889 entropy=17.7580 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 135420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521879.4 mean_steps=13.9
|
|
[Episode 135430] reward=-117835409.4 actor_loss=0.3199 critic_loss=101431811185.7778 entropy=17.7580 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 135440] reward=-120291359.9 actor_loss=0.3473 critic_loss=133384375149.7143 entropy=17.7807 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 135440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576328.1 mean_steps=13.7
|
|
[Episode 135450] reward=-119312705.1 actor_loss=0.3086 critic_loss=118994779477.3333 entropy=17.7806 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 135460] reward=-119606438.8 actor_loss=0.2538 critic_loss=106507713035.9070 entropy=17.7694 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 135460] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-331501.0 mean_steps=17.5
|
|
[Episode 135470] reward=-117837648.4 actor_loss=0.2895 critic_loss=103686655836.1600 entropy=17.7749 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 135480] reward=-560741988.1 actor_loss=0.2845 critic_loss=261757287408162.1250 entropy=17.7739 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 135480] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-294491.8 mean_steps=17.1
|
|
[Episode 135490] reward=-116539539.5 actor_loss=0.3073 critic_loss=106768743173.6889 entropy=17.7976 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 135500] reward=-123988742.0 actor_loss=0.1872 critic_loss=106974943823.6444 entropy=17.7973 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 135500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588086.6 mean_steps=13.8
|
|
[Episode 135510] reward=-116022891.7 actor_loss=0.2598 critic_loss=99827712364.0889 entropy=17.8062 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 135520] reward=-177638679.9 actor_loss=0.1757 critic_loss=11959334501671.8223 entropy=17.8168 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 135520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-640290.8 mean_steps=12.8
|
|
[Episode 135530] reward=-112617001.8 actor_loss=0.3378 critic_loss=95568946244.2667 entropy=17.8282 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 135540] reward=-119590287.8 actor_loss=0.2389 critic_loss=103416148391.7241 entropy=17.8325 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 135540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-329657.9 mean_steps=15.4
|
|
[Episode 135550] reward=-120329163.3 actor_loss=0.2603 critic_loss=101297404245.3333 entropy=17.8640 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 135560] reward=-120724718.0 actor_loss=0.3817 critic_loss=103730237680.9412 entropy=17.8587 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 135560] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-707049.0 mean_steps=10.8
|
|
[Episode 135570] reward=-118946981.6 actor_loss=0.2681 critic_loss=104526944098.4615 entropy=17.8380 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 135580] reward=-124617506.2 actor_loss=0.2239 critic_loss=106873515576.8889 entropy=17.8247 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 135580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449366.9 mean_steps=14.5
|
|
[Episode 135590] reward=-120206773.3 actor_loss=0.2779 critic_loss=103902498637.9130 entropy=17.8233 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 135600] reward=-118751595.3 actor_loss=0.3182 critic_loss=101307891396.9231 entropy=17.8152 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 135600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-430462.9 mean_steps=13.5
|
|
[Episode 135610] reward=-122690220.4 actor_loss=0.3056 critic_loss=111977495552.0000 entropy=17.8210 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 135620] reward=-126041793.8 actor_loss=0.2512 critic_loss=111949567850.1463 entropy=17.8199 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 135620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-650126.1 mean_steps=11.2
|
|
[Episode 135630] reward=-148709562.4 actor_loss=0.2191 critic_loss=4253647197525.3335 entropy=17.8067 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 135640] reward=-114659731.1 actor_loss=0.4146 critic_loss=95825597235.2000 entropy=17.8183 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 135640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-526503.9 mean_steps=14.8
|
|
[Episode 135650] reward=-123872039.9 actor_loss=0.1949 critic_loss=112069290120.5333 entropy=17.7906 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 135660] reward=-123848848.8 actor_loss=0.2704 critic_loss=104398910464.0000 entropy=17.7971 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 135660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-568174.1 mean_steps=12.5
|
|
[Episode 135670] reward=-597550887.6 actor_loss=7.4387 critic_loss=643518394361141.6250 entropy=17.7905 approx_kl=0.0042 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 135680] reward=-119562371.8 actor_loss=0.3024 critic_loss=99497513597.1555 entropy=17.8012 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 135680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-575388.6 mean_steps=14.4
|
|
[Episode 135690] reward=-118439470.0 actor_loss=0.2932 critic_loss=101709925195.2941 entropy=17.8091 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 135700] reward=-121975868.5 actor_loss=0.1673 critic_loss=101337122406.4000 entropy=17.7997 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 135700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453824.9 mean_steps=15.7
|
|
[Episode 135710] reward=-194439290.3 actor_loss=0.3136 critic_loss=13192780234283.8848 entropy=17.7933 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 135720] reward=-128686071.7 actor_loss=0.3369 critic_loss=197365948416.0000 entropy=17.8201 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 135720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553965.5 mean_steps=13.3
|
|
[Episode 135730] reward=-117384932.3 actor_loss=0.2952 critic_loss=91171133147.4286 entropy=17.8294 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 135740] reward=-123399362.8 actor_loss=0.2668 critic_loss=123509914191.6444 entropy=17.8225 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 135740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-707474.7 mean_steps=13.7
|
|
[Episode 135750] reward=-122466576.2 actor_loss=0.2104 critic_loss=136718286665.9556 entropy=17.8241 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 135760] reward=-120560904.2 actor_loss=0.2251 critic_loss=104169490568.5333 entropy=17.8291 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 135760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409465.3 mean_steps=14.9
|
|
[Episode 135770] reward=-115580062.2 actor_loss=0.3413 critic_loss=97646089193.2444 entropy=17.8249 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 135780] reward=-118494983.2 actor_loss=0.3779 critic_loss=127476442316.8000 entropy=17.8221 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 135780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579929.2 mean_steps=12.6
|
|
[Episode 135790] reward=-118839844.0 actor_loss=0.3237 critic_loss=99776908122.8387 entropy=17.8085 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 135800] reward=-117204706.1 actor_loss=0.3071 critic_loss=98537857706.6667 entropy=17.8207 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 135800] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-700471.8 mean_steps=11.6
|
|
[Episode 135810] reward=-121558710.4 actor_loss=0.2169 critic_loss=101231557632.0000 entropy=17.8188 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 135820] reward=-118699949.8 actor_loss=0.3268 critic_loss=101806424064.0000 entropy=17.8331 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 135820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477648.9 mean_steps=14.6
|
|
[Episode 135830] reward=-119339175.4 actor_loss=0.2477 critic_loss=101084295987.2000 entropy=17.8401 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 135840] reward=-120040434.5 actor_loss=0.3647 critic_loss=102979183957.3333 entropy=17.8443 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 135840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597204.7 mean_steps=12.9
|
|
[Episode 135850] reward=-123495462.8 actor_loss=0.2762 critic_loss=127863482880.0000 entropy=17.8309 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 135860] reward=-118207455.2 actor_loss=0.2859 critic_loss=104436070347.4872 entropy=17.8069 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 135860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476358.5 mean_steps=14.8
|
|
[Episode 135870] reward=-118717889.0 actor_loss=0.2081 critic_loss=94046628704.7111 entropy=17.7987 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 135880] reward=-121900964.2 actor_loss=0.2752 critic_loss=104035856699.0769 entropy=17.7987 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 135880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520386.7 mean_steps=12.9
|
|
[Episode 135890] reward=-125749658.2 actor_loss=0.2170 critic_loss=116185207552.0000 entropy=17.7836 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 135900] reward=-120599209.5 actor_loss=0.2683 critic_loss=102043448241.2308 entropy=17.8014 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 135900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433072.8 mean_steps=15.6
|
|
[Episode 135910] reward=-122975800.6 actor_loss=0.2098 critic_loss=101656984024.6154 entropy=17.7917 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 135920] reward=-116700599.9 actor_loss=0.2648 critic_loss=106260029083.8261 entropy=17.7801 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 135920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-459920.4 mean_steps=13.7
|
|
[Episode 135930] reward=-114778837.2 actor_loss=0.3889 critic_loss=100828484822.3256 entropy=17.7822 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 135940] reward=-122920297.6 actor_loss=0.2857 critic_loss=113327388769.5238 entropy=17.7756 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 135940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597751.8 mean_steps=12.6
|
|
[Episode 135950] reward=-122950802.9 actor_loss=0.2116 critic_loss=107501795035.4286 entropy=17.7883 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 135960] reward=-120105769.8 actor_loss=0.3015 critic_loss=101193883101.8667 entropy=17.7956 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 135960] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-704371.0 mean_steps=9.4
|
|
[Episode 135970] reward=-118029403.7 actor_loss=0.3824 critic_loss=99418377056.7111 entropy=17.8009 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 135980] reward=-115614971.3 actor_loss=0.3340 critic_loss=89832627313.7778 entropy=17.7893 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 135980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-525312.1 mean_steps=14.8
|
|
[Episode 135990] reward=-120588978.7 actor_loss=0.1583 critic_loss=102804294126.3448 entropy=17.7678 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 136000] reward=-117967360.1 actor_loss=0.3113 critic_loss=134904077729.1852 entropy=17.7648 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 136000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644139.6 mean_steps=11.9
|
|
[Episode 136010] reward=-116380860.9 actor_loss=0.2086 critic_loss=95818176632.4706 entropy=17.7586 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 136020] reward=-123251232.4 actor_loss=0.2691 critic_loss=102808378392.9756 entropy=17.7635 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 136020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496543.0 mean_steps=13.9
|
|
[Episode 136030] reward=-125859526.1 actor_loss=0.2440 critic_loss=134164217856.0000 entropy=17.7900 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 136040] reward=-121313657.1 actor_loss=0.3852 critic_loss=105754391799.1724 entropy=17.7949 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 136040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-466195.6 mean_steps=13.8
|
|
[Episode 136050] reward=-120097096.2 actor_loss=0.3270 critic_loss=99684157732.5714 entropy=17.7930 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 136060] reward=-115638157.7 actor_loss=0.3248 critic_loss=98859641608.8276 entropy=17.7868 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 136060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457194.6 mean_steps=14.8
|
|
[Episode 136070] reward=-121808634.4 actor_loss=0.3044 critic_loss=111068367360.0000 entropy=17.7880 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 136080] reward=-123148731.1 actor_loss=0.2969 critic_loss=110845532979.2000 entropy=17.7881 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 136080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495353.2 mean_steps=15.0
|
|
[Episode 136090] reward=-115409277.3 actor_loss=0.3416 critic_loss=93881253888.0000 entropy=17.7870 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 136100] reward=-124682715.6 actor_loss=0.2655 critic_loss=109924072379.7333 entropy=17.7821 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 136100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539379.5 mean_steps=14.2
|
|
[Episode 136110] reward=-118390293.0 actor_loss=0.3099 critic_loss=98438006374.4000 entropy=17.7782 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 136120] reward=-122109006.9 actor_loss=0.2836 critic_loss=104055861747.5122 entropy=17.7814 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 136120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568218.6 mean_steps=13.6
|
|
[Episode 136130] reward=-122223688.4 actor_loss=0.2840 critic_loss=139691675113.7391 entropy=17.7787 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 136140] reward=-117707233.7 actor_loss=0.3312 critic_loss=98593774055.6190 entropy=17.7742 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 136140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532675.4 mean_steps=13.9
|
|
[Episode 136150] reward=-122854721.5 actor_loss=0.2707 critic_loss=107792687104.0000 entropy=17.7647 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 136160] reward=-120067555.8 actor_loss=0.3156 critic_loss=104072343669.0286 entropy=17.7580 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 136160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414523.1 mean_steps=15.2
|
|
[Episode 136170] reward=-120053969.6 actor_loss=0.3147 critic_loss=104579055908.5714 entropy=17.7539 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 136180] reward=-127809497.2 actor_loss=0.2923 critic_loss=757924863453.8667 entropy=17.7355 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 136180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453268.9 mean_steps=15.6
|
|
[Episode 136190] reward=-121617182.3 actor_loss=0.3339 critic_loss=101988814112.8205 entropy=17.7450 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 136200] reward=-121013217.6 actor_loss=0.1850 critic_loss=115741812326.4000 entropy=17.7575 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 136200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512666.0 mean_steps=14.2
|
|
[Episode 136210] reward=-115747679.1 actor_loss=0.2837 critic_loss=95704473409.4884 entropy=17.7580 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 136220] reward=-117274125.9 actor_loss=0.2953 critic_loss=103486658605.5111 entropy=17.7618 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 136220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571668.8 mean_steps=12.4
|
|
[Episode 136230] reward=-114066513.1 actor_loss=0.2106 critic_loss=94335004398.9333 entropy=17.7834 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 136240] reward=-114915683.0 actor_loss=0.2639 critic_loss=95947858235.0769 entropy=17.8059 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 136240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-426013.7 mean_steps=16.2
|
|
[Episode 136250] reward=-116605502.7 actor_loss=0.3710 critic_loss=90386811380.6222 entropy=17.8031 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 136260] reward=-117850992.4 actor_loss=0.2604 critic_loss=94738974674.4889 entropy=17.8121 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 136260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586047.8 mean_steps=12.6
|
|
[Episode 136270] reward=-121403572.8 actor_loss=0.3486 critic_loss=108404462205.1555 entropy=17.8246 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 136280] reward=-114548352.2 actor_loss=0.3322 critic_loss=90388499324.7179 entropy=17.8025 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 136280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475755.2 mean_steps=14.7
|
|
[Episode 136290] reward=-118144429.7 actor_loss=0.3342 critic_loss=136009805948.1212 entropy=17.8008 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 136300] reward=-123000094.5 actor_loss=0.3072 critic_loss=102178627211.6364 entropy=17.7942 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 136300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-604698.7 mean_steps=11.9
|
|
[Episode 136310] reward=-118120986.8 actor_loss=0.1528 critic_loss=94324246482.4889 entropy=17.7913 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 136320] reward=-130189404.6 actor_loss=0.2193 critic_loss=116676986321.4545 entropy=17.7935 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 136320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-584665.5 mean_steps=13.4
|
|
[Episode 136330] reward=-123928817.4 actor_loss=0.2280 critic_loss=105286427917.4737 entropy=17.7933 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 136340] reward=-122998214.6 actor_loss=0.1955 critic_loss=101759997899.4872 entropy=17.7973 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 136340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-654162.9 mean_steps=12.2
|
|
[Episode 136350] reward=-121813362.7 actor_loss=0.2780 critic_loss=102247404885.3333 entropy=17.7779 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 136360] reward=-117763011.4 actor_loss=0.2260 critic_loss=94532156165.6889 entropy=17.7677 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 136360] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-308558.0 mean_steps=17.1
|
|
[Episode 136370] reward=-120155909.1 actor_loss=0.3206 critic_loss=100300381297.7778 entropy=17.7636 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 136380] reward=-119089732.9 actor_loss=0.2495 critic_loss=103386012967.8222 entropy=17.7856 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 136380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434807.1 mean_steps=15.2
|
|
[Episode 136390] reward=-120466052.3 actor_loss=0.2652 critic_loss=96982774761.2444 entropy=17.7741 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 136400] reward=-121226654.2 actor_loss=0.2412 critic_loss=98198070886.4000 entropy=17.7657 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 136400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467426.5 mean_steps=14.6
|
|
[Episode 136410] reward=-122892274.4 actor_loss=0.2565 critic_loss=99918351928.8889 entropy=17.7530 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 136420] reward=-125109034.5 actor_loss=0.2164 critic_loss=104987504781.2414 entropy=17.7410 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 136420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613571.5 mean_steps=12.5
|
|
[Episode 136430] reward=-124526617.0 actor_loss=0.2179 critic_loss=104848500615.5294 entropy=17.7425 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 136440] reward=-120208865.4 actor_loss=0.2681 critic_loss=100793091125.8947 entropy=17.7356 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 136440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-549927.7 mean_steps=12.4
|
|
[Episode 136450] reward=-127270575.1 actor_loss=0.2367 critic_loss=107553582443.3548 entropy=17.7307 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 136460] reward=-123649670.1 actor_loss=0.2924 critic_loss=109944302627.3103 entropy=17.7265 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 136460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-512883.7 mean_steps=14.8
|
|
[Episode 136470] reward=-122840231.3 actor_loss=0.1553 critic_loss=101402380217.3793 entropy=17.7154 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 136480] reward=-121352878.4 actor_loss=0.3264 critic_loss=108995653905.0667 entropy=17.7119 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 136480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457037.8 mean_steps=14.8
|
|
[Episode 136490] reward=-114520017.4 actor_loss=0.2334 critic_loss=119226365893.4857 entropy=17.7121 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 136500] reward=-120326306.4 actor_loss=0.2598 critic_loss=99060127607.4667 entropy=17.7088 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 136500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462434.4 mean_steps=14.4
|
|
[Episode 136510] reward=-116455261.9 actor_loss=0.3016 critic_loss=102200127214.9333 entropy=17.7100 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 136520] reward=-116264538.0 actor_loss=0.2874 critic_loss=95796779300.5714 entropy=17.7117 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 136520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424593.3 mean_steps=15.6
|
|
[Episode 136530] reward=-121404194.0 actor_loss=0.2888 critic_loss=103080716970.6667 entropy=17.7134 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 136540] reward=-123417647.1 actor_loss=0.2244 critic_loss=109797401600.0000 entropy=17.7055 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 136540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548941.4 mean_steps=13.5
|
|
[Episode 136550] reward=-121316810.6 actor_loss=0.3064 critic_loss=109278738356.1481 entropy=17.7141 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 136560] reward=-124350112.9 actor_loss=0.2332 critic_loss=114039741406.9677 entropy=17.7068 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 136560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475573.0 mean_steps=13.8
|
|
[Episode 136570] reward=-119616039.9 actor_loss=0.3217 critic_loss=106386881649.7778 entropy=17.7095 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 136580] reward=-119971029.3 actor_loss=0.2867 critic_loss=100712417159.5294 entropy=17.7104 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 136580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-631748.8 mean_steps=13.8
|
|
[Episode 136590] reward=-116323823.8 actor_loss=0.4183 critic_loss=100797687808.0000 entropy=17.7072 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 136600] reward=-123583539.6 actor_loss=0.2959 critic_loss=105933740950.0690 entropy=17.7132 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 136600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-558544.9 mean_steps=14.2
|
|
[Episode 136610] reward=-119723990.1 actor_loss=0.3099 critic_loss=100700659712.0000 entropy=17.7044 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 136620] reward=-118655531.4 actor_loss=0.2298 critic_loss=102946080875.7895 entropy=17.7032 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 136620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-520376.5 mean_steps=14.9
|
|
[Episode 136630] reward=-123157009.0 actor_loss=0.1799 critic_loss=99853413034.6667 entropy=17.6958 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 136640] reward=-116031318.2 actor_loss=0.3588 critic_loss=96524902400.0000 entropy=17.6955 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 136640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487807.7 mean_steps=15.0
|
|
[Episode 136650] reward=-120089485.8 actor_loss=0.2691 critic_loss=99563665408.0000 entropy=17.6917 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 136660] reward=-120801949.4 actor_loss=0.3168 critic_loss=98929953645.7143 entropy=17.6902 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 136660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490698.4 mean_steps=14.1
|
|
[Episode 136670] reward=-122992946.5 actor_loss=0.3508 critic_loss=110584821760.0000 entropy=17.6700 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 136680] reward=-118165507.1 actor_loss=0.2765 critic_loss=98863127945.8462 entropy=17.6781 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 136680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539713.8 mean_steps=14.2
|
|
[Episode 136690] reward=-115105829.0 actor_loss=0.3196 critic_loss=95007327191.0400 entropy=17.6813 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 136700] reward=-120908401.4 actor_loss=0.3195 critic_loss=114840519448.7742 entropy=17.6854 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 136700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474420.3 mean_steps=14.8
|
|
[Episode 136710] reward=-117503413.3 actor_loss=0.2488 critic_loss=92744595389.9355 entropy=17.6779 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 136720] reward=-122746562.5 actor_loss=0.2600 critic_loss=101058225152.0000 entropy=17.6726 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 136720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510097.6 mean_steps=14.1
|
|
[Episode 136730] reward=-118476691.3 actor_loss=0.2825 critic_loss=95255880499.2000 entropy=17.6729 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 136740] reward=-123168270.8 actor_loss=0.2197 critic_loss=100344482762.1053 entropy=17.6843 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 136740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348258.3 mean_steps=16.9
|
|
[Episode 136750] reward=-121092485.4 actor_loss=0.2787 critic_loss=101599235276.8000 entropy=17.6817 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 136760] reward=-117754282.7 actor_loss=0.3095 critic_loss=106269495751.1111 entropy=17.6815 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 136760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535220.1 mean_steps=14.3
|
|
[Episode 136770] reward=-122777360.4 actor_loss=0.2286 critic_loss=102066681669.8182 entropy=17.6811 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 136780] reward=-123566527.7 actor_loss=0.2029 critic_loss=116224733830.7368 entropy=17.6725 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 136780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585356.4 mean_steps=12.9
|
|
[Episode 136790] reward=-118353366.5 actor_loss=0.3724 critic_loss=99421299624.2286 entropy=17.6778 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 136800] reward=-122778644.5 actor_loss=0.2760 critic_loss=101831286535.7576 entropy=17.6735 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 136800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-506032.0 mean_steps=14.9
|
|
[Episode 136810] reward=-124099763.1 actor_loss=0.2235 critic_loss=102324777704.7273 entropy=17.6699 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 136820] reward=-119649956.6 actor_loss=0.3244 critic_loss=91586446586.3111 entropy=17.6682 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 136820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395325.6 mean_steps=16.1
|
|
[Episode 136830] reward=-120836572.9 actor_loss=0.3094 critic_loss=105970968064.0000 entropy=17.6652 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 136840] reward=-119950529.8 actor_loss=0.3498 critic_loss=97794953647.1579 entropy=17.6823 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 136840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615796.4 mean_steps=13.1
|
|
[Episode 136850] reward=-123617509.7 actor_loss=0.2652 critic_loss=102057137374.6087 entropy=17.6872 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 136860] reward=-119938176.3 actor_loss=0.3764 critic_loss=96903744898.8445 entropy=17.6979 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 136860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469972.2 mean_steps=14.9
|
|
[Episode 136870] reward=-123000775.4 actor_loss=0.2767 critic_loss=104451775190.7097 entropy=17.6848 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 136880] reward=-116673739.9 actor_loss=0.4138 critic_loss=90354475855.4483 entropy=17.6910 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 136880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447198.0 mean_steps=15.6
|
|
[Episode 136890] reward=-117538280.2 actor_loss=0.3691 critic_loss=94715635239.3846 entropy=17.6940 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 136900] reward=-123817135.4 actor_loss=0.2756 critic_loss=107396913493.3333 entropy=17.6876 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 136900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418595.0 mean_steps=15.5
|
|
[Episode 136910] reward=-115439423.4 actor_loss=0.3307 critic_loss=95472226063.0588 entropy=17.6786 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 136920] reward=-119579328.6 actor_loss=0.3234 critic_loss=95675516928.0000 entropy=17.6755 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 136920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476835.4 mean_steps=14.7
|
|
[Episode 136930] reward=-119914041.9 actor_loss=0.3683 critic_loss=99678323078.0952 entropy=17.6672 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 136940] reward=-118394193.1 actor_loss=0.3415 critic_loss=99668370718.7200 entropy=17.6713 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 136940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-659158.1 mean_steps=13.2
|
|
[Episode 136950] reward=-125664219.5 actor_loss=0.3184 critic_loss=103354095957.3333 entropy=17.6701 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 136960] reward=-122506268.7 actor_loss=0.4044 critic_loss=99241598660.9231 entropy=17.6703 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 136960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-427452.7 mean_steps=16.4
|
|
[Episode 136970] reward=-119836977.0 actor_loss=0.2430 critic_loss=99696962218.6667 entropy=17.6666 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 136980] reward=-117908668.7 actor_loss=0.3026 critic_loss=94485090713.6000 entropy=17.6726 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 136980] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-332718.5 mean_steps=17.7
|
|
[Episode 136990] reward=-121198472.2 actor_loss=0.2811 critic_loss=99880489494.2609 entropy=17.6695 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 137000] reward=-119239446.0 actor_loss=0.2416 critic_loss=104336315733.3333 entropy=17.6638 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 137000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476832.5 mean_steps=14.6
|
|
[Episode 137010] reward=-123040988.2 actor_loss=0.3005 critic_loss=105827320627.2000 entropy=17.6609 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 137020] reward=-120684690.4 actor_loss=0.2893 critic_loss=99886737250.4615 entropy=17.6670 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 137020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595712.6 mean_steps=12.7
|
|
[Episode 137030] reward=-125473148.8 actor_loss=0.3568 critic_loss=163440831829.3333 entropy=17.6651 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 137040] reward=-116067667.0 actor_loss=0.2997 critic_loss=99287974980.2667 entropy=17.6627 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 137040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432587.2 mean_steps=15.2
|
|
[Episode 137050] reward=-116666541.2 actor_loss=0.1936 critic_loss=93450600448.0000 entropy=17.6723 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 137060] reward=-119664588.7 actor_loss=0.1872 critic_loss=97961333555.2000 entropy=17.6663 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 137060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-455665.9 mean_steps=13.8
|
|
[Episode 137070] reward=-121375331.4 actor_loss=0.3029 critic_loss=98529912044.3077 entropy=17.6537 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 137080] reward=-117865431.4 actor_loss=0.2445 critic_loss=94281951232.0000 entropy=17.6551 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 137080] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-653651.7 mean_steps=11.1
|
|
[Episode 137090] reward=-123266088.2 actor_loss=0.3676 critic_loss=100016363074.7826 entropy=17.6692 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 137100] reward=-120063551.7 actor_loss=0.2661 critic_loss=97780270694.4000 entropy=17.6746 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 137100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431469.6 mean_steps=15.4
|
|
[Episode 137110] reward=-119169392.9 actor_loss=0.3143 critic_loss=94019489336.8889 entropy=17.6647 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 137120] reward=-121796396.5 actor_loss=0.3317 critic_loss=100383222579.2000 entropy=17.6678 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 137120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465548.9 mean_steps=14.9
|
|
[Episode 137130] reward=-119139315.5 actor_loss=0.3622 critic_loss=94270519351.3513 entropy=17.6685 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 137140] reward=-118350144.3 actor_loss=0.1529 critic_loss=96756297852.1212 entropy=17.6469 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 137140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619320.1 mean_steps=13.3
|
|
[Episode 137150] reward=-113927157.2 actor_loss=0.2897 critic_loss=91547792384.0000 entropy=17.6439 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 137160] reward=-118756070.2 actor_loss=0.3460 critic_loss=98440661530.9474 entropy=17.6440 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 137160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-390234.3 mean_steps=17.2
|
|
[Episode 137170] reward=-117429995.0 actor_loss=0.2478 critic_loss=96039559495.6800 entropy=17.6473 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 137180] reward=-123956308.7 actor_loss=0.2566 critic_loss=503782117961.1429 entropy=17.6441 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 137180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525254.5 mean_steps=14.2
|
|
[Episode 137190] reward=-123876025.4 actor_loss=0.2312 critic_loss=100044906496.0000 entropy=17.6538 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 137200] reward=-136447808.0 actor_loss=0.2907 critic_loss=1689957072010.3784 entropy=17.6507 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 137200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-498701.2 mean_steps=13.0
|
|
[Episode 137210] reward=-120338955.5 actor_loss=0.2542 critic_loss=96625214805.3333 entropy=17.6467 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 137220] reward=-123331602.2 actor_loss=0.2395 critic_loss=97851173707.2941 entropy=17.6389 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 137220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464314.8 mean_steps=13.7
|
|
[Episode 137230] reward=-124702726.1 actor_loss=0.3353 critic_loss=105642018230.8571 entropy=17.6193 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 137240] reward=-116132401.1 actor_loss=0.4034 critic_loss=101336719716.1739 entropy=17.6094 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 137240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427829.8 mean_steps=14.4
|
|
[Episode 137250] reward=-119000580.2 actor_loss=0.2912 critic_loss=92411450221.7143 entropy=17.5952 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 137260] reward=-120506087.7 actor_loss=0.3463 critic_loss=97463935162.1818 entropy=17.5915 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 137260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382221.6 mean_steps=16.0
|
|
[Episode 137270] reward=-119661005.4 actor_loss=0.2253 critic_loss=96080045982.4762 entropy=17.6007 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 137280] reward=-115577410.5 actor_loss=0.2954 critic_loss=94796727016.7273 entropy=17.6082 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 137280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547447.5 mean_steps=14.2
|
|
[Episode 137290] reward=-127817417.6 actor_loss=0.3383 critic_loss=622304805228.0889 entropy=17.6067 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 137300] reward=-116239756.3 actor_loss=0.2232 critic_loss=99623232471.0400 entropy=17.6090 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 137300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497320.8 mean_steps=14.8
|
|
[Episode 137310] reward=-120830053.8 actor_loss=0.2724 critic_loss=98718345079.4667 entropy=17.6232 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 137320] reward=-121342675.2 actor_loss=0.2570 critic_loss=105604363059.2000 entropy=17.6227 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 137320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604892.9 mean_steps=12.7
|
|
[Episode 137330] reward=-122436088.1 actor_loss=0.2031 critic_loss=92924653056.0000 entropy=17.6307 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 137340] reward=-121190329.4 actor_loss=0.2379 critic_loss=100798741162.6667 entropy=17.6438 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 137340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-472833.2 mean_steps=15.2
|
|
[Episode 137350] reward=-118590922.2 actor_loss=0.2834 critic_loss=93844779349.3333 entropy=17.6459 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 137360] reward=-121620324.1 actor_loss=0.2266 critic_loss=97197856888.4706 entropy=17.6456 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 137360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510821.6 mean_steps=14.9
|
|
[Episode 137370] reward=-120330567.9 actor_loss=0.2290 critic_loss=96213305555.8621 entropy=17.6589 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 137380] reward=-118465716.1 actor_loss=0.2819 critic_loss=92028090368.0000 entropy=17.6565 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 137380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492194.0 mean_steps=15.0
|
|
[Episode 137390] reward=-117069129.2 actor_loss=0.3030 critic_loss=97661444808.3478 entropy=17.6526 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 137400] reward=-120579053.4 actor_loss=0.3584 critic_loss=98372512302.5455 entropy=17.6386 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 137400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-543570.7 mean_steps=14.4
|
|
[Episode 137410] reward=-118168594.3 actor_loss=0.2864 critic_loss=100899346742.3030 entropy=17.6271 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 137420] reward=-122831742.5 actor_loss=0.4156 critic_loss=197177426805.6216 entropy=17.6127 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 137420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461006.1 mean_steps=13.6
|
|
[Episode 137430] reward=-125338800.0 actor_loss=0.3100 critic_loss=241045736886.8571 entropy=17.6223 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 137440] reward=-125192486.2 actor_loss=0.2774 critic_loss=105726035057.7778 entropy=17.6182 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 137440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547547.2 mean_steps=14.4
|
|
[Episode 137450] reward=-116591348.5 actor_loss=0.2801 critic_loss=94395257241.6000 entropy=17.6078 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 137460] reward=-116834952.9 actor_loss=0.2760 critic_loss=93737123371.8857 entropy=17.6066 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 137460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470926.7 mean_steps=13.8
|
|
[Episode 137470] reward=-124474762.0 actor_loss=0.2123 critic_loss=98851923968.0000 entropy=17.6037 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 137480] reward=-121285191.6 actor_loss=0.2590 critic_loss=98470669347.3103 entropy=17.6284 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 137480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434143.0 mean_steps=14.3
|
|
[Episode 137490] reward=-118898236.0 actor_loss=0.2478 critic_loss=98680483840.0000 entropy=17.6279 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 137500] reward=-120104094.2 actor_loss=0.2478 critic_loss=97539024304.3556 entropy=17.6312 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 137500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-410430.6 mean_steps=14.4
|
|
[Episode 137510] reward=-117999438.5 actor_loss=0.2957 critic_loss=92621288314.4348 entropy=17.6255 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 137520] reward=-113441042.2 actor_loss=0.3344 critic_loss=86863804739.3684 entropy=17.6333 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 137520] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-343426.4 mean_steps=17.3
|
|
[Episode 137530] reward=-117841447.8 actor_loss=0.3211 critic_loss=100006893122.7826 entropy=17.6399 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 137540] reward=-119542721.7 actor_loss=0.3994 critic_loss=95841983519.0303 entropy=17.6403 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 137540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-499698.1 mean_steps=13.2
|
|
[Episode 137550] reward=-120010356.4 actor_loss=0.2264 critic_loss=94550243448.4706 entropy=17.6423 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 137560] reward=-119078855.8 actor_loss=0.3753 critic_loss=99078493735.3846 entropy=17.6519 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 137560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-587267.9 mean_steps=11.6
|
|
[Episode 137570] reward=-122904148.5 actor_loss=0.3396 critic_loss=103474532443.0222 entropy=17.6438 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 137580] reward=-117695441.8 actor_loss=0.3106 critic_loss=93819490700.3871 entropy=17.6425 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 137580] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-670296.3 mean_steps=11.3
|
|
[Episode 137590] reward=-120638966.5 actor_loss=0.2601 critic_loss=95655986972.4444 entropy=17.6417 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 137600] reward=-120351170.4 actor_loss=0.3499 critic_loss=113838167040.0000 entropy=17.6452 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 137600] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-710812.3 mean_steps=10.6
|
|
[Episode 137610] reward=-116779456.2 actor_loss=0.3730 critic_loss=91114711941.1200 entropy=17.6532 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 137620] reward=-118714770.7 actor_loss=0.3158 critic_loss=101705366732.8000 entropy=17.6567 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 137620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613221.0 mean_steps=12.8
|
|
[Episode 137630] reward=-118330848.2 actor_loss=0.2901 critic_loss=94781319655.6190 entropy=17.6566 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 137640] reward=-117188669.2 actor_loss=0.3315 critic_loss=96217945027.7647 entropy=17.6554 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 137640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437395.7 mean_steps=15.6
|
|
[Episode 137650] reward=-120954889.3 actor_loss=0.2643 critic_loss=96262064810.6667 entropy=17.6560 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 137660] reward=-118044133.3 actor_loss=0.2874 critic_loss=94004994412.0889 entropy=17.6659 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 137660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-645459.7 mean_steps=13.1
|
|
[Episode 137670] reward=-119671860.6 actor_loss=0.2205 critic_loss=104574220970.6667 entropy=17.6649 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 137680] reward=-121800472.2 actor_loss=0.2165 critic_loss=98875161302.7097 entropy=17.6561 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 137680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-586573.1 mean_steps=14.0
|
|
[Episode 137690] reward=-122754733.8 actor_loss=0.2602 critic_loss=99163527115.4872 entropy=17.6664 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 137700] reward=-121128165.0 actor_loss=0.3382 critic_loss=91538008018.4889 entropy=17.6684 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 137700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-587586.6 mean_steps=11.5
|
|
[Episode 137710] reward=-120335231.8 actor_loss=0.3328 critic_loss=95792021777.0667 entropy=17.6769 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 137720] reward=-119119810.2 actor_loss=0.3117 critic_loss=96136136173.0370 entropy=17.6694 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 137720] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-248927.5 mean_steps=18.9
|
|
[Episode 137730] reward=-114290253.7 actor_loss=0.2875 critic_loss=93464390980.6829 entropy=17.6627 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 137740] reward=-122393610.4 actor_loss=0.2681 critic_loss=97214927257.6000 entropy=17.6621 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 137740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-596761.9 mean_steps=11.6
|
|
[Episode 137750] reward=-119035893.8 actor_loss=0.3328 critic_loss=98618689268.8696 entropy=17.6615 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 137760] reward=-119989682.6 actor_loss=0.3790 critic_loss=95912582912.0000 entropy=17.6668 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 137760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-374729.3 mean_steps=14.8
|
|
[Episode 137770] reward=-118112324.7 actor_loss=0.2821 critic_loss=97157670297.6000 entropy=17.6710 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 137780] reward=-113509743.8 actor_loss=0.3598 critic_loss=93344944947.2000 entropy=17.6747 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 137780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-401789.3 mean_steps=14.6
|
|
[Episode 137790] reward=-115814564.4 actor_loss=0.2851 critic_loss=92325334584.8889 entropy=17.6702 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 137800] reward=-121849802.2 actor_loss=0.2952 critic_loss=103729002694.1935 entropy=17.6754 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 137800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420374.9 mean_steps=16.1
|
|
[Episode 137810] reward=-120299654.4 actor_loss=0.2389 critic_loss=99715309240.3200 entropy=17.6750 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 137820] reward=-121186779.2 actor_loss=0.2504 critic_loss=99458690720.9143 entropy=17.6743 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 137820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416656.5 mean_steps=15.1
|
|
[Episode 137830] reward=-120648671.8 actor_loss=0.3224 critic_loss=100895521792.0000 entropy=17.6640 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 137840] reward=-117975214.1 actor_loss=0.2046 critic_loss=93841694720.0000 entropy=17.6622 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 137840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517673.7 mean_steps=14.8
|
|
[Episode 137850] reward=-117824672.9 actor_loss=0.3513 critic_loss=90871815239.4419 entropy=17.6543 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 137860] reward=-129038207.8 actor_loss=0.3388 critic_loss=425913166506.6667 entropy=17.6496 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 137860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615127.9 mean_steps=12.8
|
|
[Episode 137870] reward=-118504809.2 actor_loss=0.2769 critic_loss=96367621120.0000 entropy=17.6495 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 137880] reward=-115077891.3 actor_loss=0.3328 critic_loss=119140504064.0000 entropy=17.6534 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 137880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-643918.8 mean_steps=13.1
|
|
[Episode 137890] reward=-122868202.6 actor_loss=0.3949 critic_loss=101300885796.5714 entropy=17.6531 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 137900] reward=-118907519.5 actor_loss=0.4557 critic_loss=99290081224.6487 entropy=17.6566 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 137900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493373.0 mean_steps=14.1
|
|
[Episode 137910] reward=-123920498.6 actor_loss=0.2181 critic_loss=100385760436.7059 entropy=17.6426 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 137920] reward=-117917590.1 actor_loss=0.3429 critic_loss=99563780850.5263 entropy=17.6429 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 137920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484749.7 mean_steps=13.7
|
|
[Episode 137930] reward=-118133573.1 actor_loss=0.2573 critic_loss=93974947430.4000 entropy=17.6624 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 137940] reward=-115639725.3 actor_loss=0.2782 critic_loss=94299459492.9778 entropy=17.6581 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 137940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-540173.8 mean_steps=15.1
|
|
[Episode 137950] reward=-122661119.2 actor_loss=0.2031 critic_loss=102480896000.0000 entropy=17.6559 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 137960] reward=-114214330.5 actor_loss=0.4343 critic_loss=95854365731.3103 entropy=17.6525 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 137960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452373.8 mean_steps=13.8
|
|
[Episode 137970] reward=-125384388.3 actor_loss=0.2364 critic_loss=101901952388.4138 entropy=17.6454 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 137980] reward=-117689439.5 actor_loss=0.2648 critic_loss=96422701267.8621 entropy=17.6512 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 137980] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-218570.3 mean_steps=18.6
|
|
[Episode 137990] reward=-117745982.7 actor_loss=0.2836 critic_loss=96841754851.5556 entropy=17.6500 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 138000] reward=-124240663.5 actor_loss=0.1501 critic_loss=102681054585.2632 entropy=17.6545 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 138000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-655564.3 mean_steps=12.0
|
|
[Episode 138010] reward=-118326874.4 actor_loss=0.2770 critic_loss=99142733660.1600 entropy=17.6476 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 138020] reward=-124796759.7 actor_loss=0.2911 critic_loss=104129648587.4872 entropy=17.6408 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 138020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-498041.8 mean_steps=13.2
|
|
[Episode 138030] reward=-118613976.8 actor_loss=0.2655 critic_loss=96375367270.4000 entropy=17.6337 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 138040] reward=-120879514.0 actor_loss=0.3375 critic_loss=98991278762.6667 entropy=17.6290 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 138040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436216.6 mean_steps=15.3
|
|
[Episode 138050] reward=-115719049.1 actor_loss=0.2849 critic_loss=94389911552.0000 entropy=17.6389 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 138060] reward=-120240234.6 actor_loss=0.2513 critic_loss=98948706304.0000 entropy=17.6453 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 138060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484477.9 mean_steps=14.1
|
|
[Episode 138070] reward=-115158894.4 actor_loss=0.3140 critic_loss=91810619674.4828 entropy=17.6385 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 138080] reward=-115580583.1 actor_loss=0.3253 critic_loss=167778864660.4800 entropy=17.6365 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 138080] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-323598.7 mean_steps=17.4
|
|
[Episode 138090] reward=-123798817.3 actor_loss=0.2688 critic_loss=98940866454.9744 entropy=17.6316 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 138100] reward=-123803869.1 actor_loss=0.2328 critic_loss=101520049584.3556 entropy=17.6298 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 138100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-568751.6 mean_steps=15.6
|
|
[Episode 138110] reward=-122482165.3 actor_loss=0.2343 critic_loss=96940130304.0000 entropy=17.6295 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 138120] reward=-123307672.0 actor_loss=0.3411 critic_loss=101076532601.2632 entropy=17.6275 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 138120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470001.4 mean_steps=13.9
|
|
[Episode 138130] reward=-115935533.8 actor_loss=0.1880 critic_loss=90417493214.6087 entropy=17.6148 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 138140] reward=-118533036.3 actor_loss=0.2754 critic_loss=95887242725.0526 entropy=17.6087 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 138140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454445.5 mean_steps=15.8
|
|
[Episode 138150] reward=-123035051.7 actor_loss=0.2733 critic_loss=100389862561.6842 entropy=17.6012 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 138160] reward=-120912865.4 actor_loss=0.3543 critic_loss=98763768588.1905 entropy=17.5946 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 138160] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-373222.8 mean_steps=17.8
|
|
[Episode 138170] reward=-126148481.6 actor_loss=0.2990 critic_loss=108126977765.5172 entropy=17.5872 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 138180] reward=-122095940.3 actor_loss=0.2749 critic_loss=102956927385.6000 entropy=17.5878 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 138180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510581.3 mean_steps=13.2
|
|
[Episode 138190] reward=-122495140.4 actor_loss=0.2226 critic_loss=99664407600.7619 entropy=17.5871 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 138200] reward=-118475369.6 actor_loss=0.4459 critic_loss=93909211403.1304 entropy=17.5893 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 138200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478610.9 mean_steps=15.2
|
|
[Episode 138210] reward=-115578894.8 actor_loss=0.4036 critic_loss=95857857589.8947 entropy=17.6040 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 138220] reward=-118665190.4 actor_loss=0.3108 critic_loss=98925329612.8000 entropy=17.6037 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 138220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420693.4 mean_steps=15.5
|
|
[Episode 138230] reward=-122036429.7 actor_loss=0.3092 critic_loss=101255531897.2632 entropy=17.5905 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 138240] reward=-119056141.0 actor_loss=0.2335 critic_loss=96264125500.2353 entropy=17.5793 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 138240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442937.4 mean_steps=15.8
|
|
[Episode 138250] reward=-119473894.1 actor_loss=0.2497 critic_loss=99310063400.4211 entropy=17.5796 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 138260] reward=-125292317.5 actor_loss=0.3617 critic_loss=207778764012.3077 entropy=17.5791 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 138260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-636922.2 mean_steps=12.1
|
|
[Episode 138270] reward=-122252262.3 actor_loss=0.2835 critic_loss=100018277976.2759 entropy=17.5700 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 138280] reward=-116701647.9 actor_loss=0.2957 critic_loss=90061843296.7111 entropy=17.5660 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 138280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517521.6 mean_steps=15.2
|
|
[Episode 138290] reward=-116485283.7 actor_loss=0.2485 critic_loss=90737109768.8276 entropy=17.5687 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 138300] reward=-118397492.7 actor_loss=0.3547 critic_loss=97224851797.3333 entropy=17.5629 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 138300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-638891.7 mean_steps=13.2
|
|
[Episode 138310] reward=-114288703.7 actor_loss=0.3480 critic_loss=89719302656.0000 entropy=17.5627 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 138320] reward=-117607985.3 actor_loss=0.2856 critic_loss=92614580711.6190 entropy=17.5577 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 138320] success_rate=0.750 qp_infeasible_rate=0.250 mean_return=-213555.8 mean_steps=19.4
|
|
[Episode 138330] reward=-117600308.1 actor_loss=0.2766 critic_loss=92813664711.1111 entropy=17.5630 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 138340] reward=-118743575.9 actor_loss=0.2937 critic_loss=93005759556.2667 entropy=17.5621 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 138340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408351.4 mean_steps=15.7
|
|
[Episode 138350] reward=-119680459.9 actor_loss=0.2809 critic_loss=95719480360.9600 entropy=17.5688 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 138360] reward=-122132170.0 actor_loss=0.3060 critic_loss=97114945243.4286 entropy=17.5721 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 138360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589043.0 mean_steps=13.7
|
|
[Episode 138370] reward=-125245823.1 actor_loss=0.2017 critic_loss=105857266741.8947 entropy=17.5629 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 138380] reward=-118161248.9 actor_loss=0.3120 critic_loss=96490856448.0000 entropy=17.5889 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 138380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-527607.1 mean_steps=12.6
|
|
[Episode 138390] reward=-117505841.4 actor_loss=0.2916 critic_loss=95854489972.3636 entropy=17.5800 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 138400] reward=-121291984.3 actor_loss=0.4261 critic_loss=100252548904.4211 entropy=17.5841 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1517 front_blocked=0
|
|
[Eval 138400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-434001.7 mean_steps=13.7
|
|
[Episode 138410] reward=-116361423.4 actor_loss=0.2644 critic_loss=93574049897.0256 entropy=17.6036 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 138420] reward=-119863540.1 actor_loss=0.3648 critic_loss=103738552950.1538 entropy=17.5917 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 138420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353260.7 mean_steps=16.4
|
|
[Episode 138430] reward=-124704297.0 actor_loss=0.2619 critic_loss=102533011397.4857 entropy=17.5998 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 138440] reward=-114484371.1 actor_loss=0.2378 critic_loss=92593788245.3333 entropy=17.5973 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 138440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612076.4 mean_steps=12.7
|
|
[Episode 138450] reward=-124706428.7 actor_loss=0.2193 critic_loss=106883641233.2973 entropy=17.5964 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 138460] reward=-123841393.4 actor_loss=0.1993 critic_loss=118980606439.6190 entropy=17.5815 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 138460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412823.4 mean_steps=15.6
|
|
[Episode 138470] reward=-117021951.9 actor_loss=0.3085 critic_loss=91646426843.4286 entropy=17.5777 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 138480] reward=-114954758.2 actor_loss=0.3681 critic_loss=88225828700.1600 entropy=17.5719 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 138480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492534.8 mean_steps=14.8
|
|
[Episode 138490] reward=-115531238.7 actor_loss=0.2761 critic_loss=89631653456.8421 entropy=17.5683 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 138500] reward=-118084751.4 actor_loss=0.3560 critic_loss=97596032000.0000 entropy=17.5583 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 138500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449680.9 mean_steps=14.8
|
|
[Episode 138510] reward=-120657209.4 actor_loss=0.3526 critic_loss=99793361998.7692 entropy=17.5615 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 138520] reward=-122603052.4 actor_loss=0.2847 critic_loss=103247880856.2162 entropy=17.5731 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 138520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-344393.1 mean_steps=16.8
|
|
[Episode 138530] reward=-123285553.3 actor_loss=0.2860 critic_loss=108041090779.4286 entropy=17.5586 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 138540] reward=-121623821.7 actor_loss=0.3579 critic_loss=95581410157.7143 entropy=17.5496 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 138540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515204.7 mean_steps=13.2
|
|
[Episode 138550] reward=-117710502.4 actor_loss=0.2445 critic_loss=92519835805.5385 entropy=17.5399 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 138560] reward=-123679037.1 actor_loss=0.3267 critic_loss=99309944645.8182 entropy=17.5511 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 138560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494455.4 mean_steps=13.8
|
|
[Episode 138570] reward=-118844095.8 actor_loss=0.3329 critic_loss=98049480637.9355 entropy=17.5524 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 138580] reward=-121272335.6 actor_loss=0.3289 critic_loss=94017712947.2000 entropy=17.5648 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 138580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516417.4 mean_steps=14.2
|
|
[Episode 138590] reward=-119843068.1 actor_loss=0.3299 critic_loss=92577694418.8235 entropy=17.5696 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 138600] reward=-112848989.8 actor_loss=0.2808 critic_loss=91564613367.7419 entropy=17.5734 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 138600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-375659.6 mean_steps=15.1
|
|
[Episode 138610] reward=-115702441.0 actor_loss=0.2910 critic_loss=92394444305.6552 entropy=17.5736 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 138620] reward=-115124258.0 actor_loss=0.2499 critic_loss=90015629808.4848 entropy=17.5885 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 138620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-327738.9 mean_steps=17.1
|
|
[Episode 138630] reward=-123800743.8 actor_loss=0.2850 critic_loss=102426915893.8947 entropy=17.5856 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 138640] reward=-119113667.7 actor_loss=0.3399 critic_loss=103441139939.5556 entropy=17.5835 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 138640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-347396.1 mean_steps=16.9
|
|
[Episode 138650] reward=-122054613.1 actor_loss=0.2578 critic_loss=99891897230.2222 entropy=17.5728 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 138660] reward=-122301037.4 actor_loss=0.3021 critic_loss=102570217472.0000 entropy=17.5647 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 138660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-523084.5 mean_steps=15.2
|
|
[Episode 138670] reward=-123814189.6 actor_loss=0.2273 critic_loss=107803148288.0000 entropy=17.5562 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 138680] reward=-135064557.9 actor_loss=0.2529 critic_loss=1046514845614.0800 entropy=17.5455 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 138680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612961.0 mean_steps=13.0
|
|
[Episode 138690] reward=-119325776.7 actor_loss=0.2737 critic_loss=100041517899.2941 entropy=17.5446 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 138700] reward=-113025213.0 actor_loss=0.3848 critic_loss=91436638952.7273 entropy=17.5652 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 138700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573013.7 mean_steps=13.8
|
|
[Episode 138710] reward=-113874656.1 actor_loss=0.3157 critic_loss=99056893669.5172 entropy=17.5511 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 138720] reward=-119556617.7 actor_loss=0.3072 critic_loss=92135616102.4000 entropy=17.5574 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 138720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511514.6 mean_steps=14.8
|
|
[Episode 138730] reward=-123106749.4 actor_loss=0.2744 critic_loss=100884073434.0741 entropy=17.5621 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 138740] reward=-118122680.1 actor_loss=0.3472 critic_loss=92324690147.5556 entropy=17.5685 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 138740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433125.3 mean_steps=14.5
|
|
[Episode 138750] reward=-122853220.3 actor_loss=0.2387 critic_loss=97838085851.4286 entropy=17.5668 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 138760] reward=-122392546.2 actor_loss=0.3098 critic_loss=101411700736.0000 entropy=17.5672 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 138760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425248.0 mean_steps=14.2
|
|
[Episode 138770] reward=-118006475.7 actor_loss=0.3357 critic_loss=90670356007.3846 entropy=17.5919 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 138780] reward=-114438844.3 actor_loss=0.4482 critic_loss=93218327405.7143 entropy=17.5827 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 138780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-562979.8 mean_steps=11.4
|
|
[Episode 138790] reward=-116574158.4 actor_loss=0.3725 critic_loss=94306722860.5217 entropy=17.5913 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 138800] reward=-113530866.5 actor_loss=0.3031 critic_loss=92815113728.0000 entropy=17.5867 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 138800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442688.1 mean_steps=14.5
|
|
[Episode 138810] reward=-119880718.8 actor_loss=0.2827 critic_loss=100169302298.4828 entropy=17.5881 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 138820] reward=-116081053.7 actor_loss=0.3599 critic_loss=93137875478.2609 entropy=17.5922 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 138820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519011.2 mean_steps=12.9
|
|
[Episode 138830] reward=-126072177.4 actor_loss=0.2001 critic_loss=103085402219.7895 entropy=17.5907 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 138840] reward=-117827558.6 actor_loss=0.2892 critic_loss=95800154112.0000 entropy=17.5933 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 138840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551773.6 mean_steps=12.4
|
|
[Episode 138850] reward=-118743373.5 actor_loss=0.2726 critic_loss=95277457839.1579 entropy=17.5941 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 138860] reward=-123770409.5 actor_loss=0.3283 critic_loss=105740076974.0800 entropy=17.6027 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 138860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-406022.9 mean_steps=15.1
|
|
[Episode 138870] reward=-179760278.4 actor_loss=0.3272 critic_loss=13954710596630.7559 entropy=17.6080 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 138880] reward=-123393750.7 actor_loss=0.2442 critic_loss=107595774022.6207 entropy=17.6216 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 138880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-643099.6 mean_steps=12.3
|
|
[Episode 138890] reward=-119664242.3 actor_loss=0.2856 critic_loss=96802791739.0769 entropy=17.6179 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 138900] reward=-115747256.2 actor_loss=0.2328 critic_loss=87787281825.1852 entropy=17.6335 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 138900] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-698969.7 mean_steps=11.7
|
|
[Episode 138910] reward=-123805234.0 actor_loss=0.3471 critic_loss=103421572674.7826 entropy=17.6341 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 138920] reward=-119675977.0 actor_loss=0.3001 critic_loss=94724791748.4651 entropy=17.6514 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 138920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-619669.4 mean_steps=12.1
|
|
[Episode 138930] reward=-234983305.0 actor_loss=0.2221 critic_loss=38427152694931.9141 entropy=17.6298 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 138940] reward=-116977512.5 actor_loss=0.2490 critic_loss=95855094930.2857 entropy=17.6322 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 138940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413886.9 mean_steps=15.5
|
|
[Episode 138950] reward=-120065443.2 actor_loss=0.2698 critic_loss=136184834730.6667 entropy=17.6396 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 138960] reward=-117525104.0 actor_loss=0.3222 critic_loss=97274990592.0000 entropy=17.6314 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 138960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-649190.4 mean_steps=12.1
|
|
[Episode 138970] reward=-118653727.6 actor_loss=0.3381 critic_loss=96109839104.0000 entropy=17.6351 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 138980] reward=-116971516.0 actor_loss=0.2465 critic_loss=95289860096.0000 entropy=17.6326 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 138980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413300.4 mean_steps=15.9
|
|
[Episode 138990] reward=-120170995.2 actor_loss=0.3631 critic_loss=99511595223.5789 entropy=17.6214 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 139000] reward=-117600133.7 actor_loss=0.2860 critic_loss=94492967302.0952 entropy=17.6018 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 139000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472078.2 mean_steps=14.7
|
|
[Episode 139010] reward=-119108762.5 actor_loss=0.2408 critic_loss=101981960835.6572 entropy=17.6014 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 139020] reward=-121970531.7 actor_loss=0.2141 critic_loss=112710627958.1538 entropy=17.5938 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 139020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-588425.7 mean_steps=11.8
|
|
[Episode 139030] reward=-117329641.2 actor_loss=0.3724 critic_loss=91929230512.5517 entropy=17.5941 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 139040] reward=-119362998.4 actor_loss=0.1993 critic_loss=91351957894.0952 entropy=17.5982 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 139040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450125.5 mean_steps=15.3
|
|
[Episode 139050] reward=-113982267.9 actor_loss=0.3048 critic_loss=93680064170.6667 entropy=17.6085 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 139060] reward=-115632815.1 actor_loss=0.3729 critic_loss=89262666911.2889 entropy=17.6060 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 139060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551956.0 mean_steps=13.7
|
|
[Episode 139070] reward=-121338523.7 actor_loss=0.3874 critic_loss=279962814843.2593 entropy=17.5941 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 139080] reward=-121320777.8 actor_loss=0.2923 critic_loss=94309880081.0667 entropy=17.5939 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 139080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-617598.5 mean_steps=13.6
|
|
[Episode 139090] reward=-118184765.3 actor_loss=0.3407 critic_loss=94425351437.4737 entropy=17.5704 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 139100] reward=-121470737.8 actor_loss=0.2641 critic_loss=99723550392.3200 entropy=17.5768 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 139100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603214.5 mean_steps=12.2
|
|
[Episode 139110] reward=-120267906.9 actor_loss=0.3146 critic_loss=101611197062.7368 entropy=17.5850 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 139120] reward=-115957129.9 actor_loss=0.3198 critic_loss=92341778502.6207 entropy=17.5887 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 139120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460247.4 mean_steps=14.7
|
|
[Episode 139130] reward=-117804131.2 actor_loss=0.3417 critic_loss=101129274665.2903 entropy=17.5897 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 139140] reward=-175669193.8 actor_loss=0.3395 critic_loss=12462661304320.0000 entropy=17.5802 approx_kl=0.0013 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 139140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560874.1 mean_steps=13.4
|
|
[Episode 139150] reward=-134986195.7 actor_loss=0.2872 critic_loss=938697850880.0000 entropy=17.5696 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 139160] reward=-114254990.2 actor_loss=0.2594 critic_loss=99925489987.3684 entropy=17.5839 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 139160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531132.8 mean_steps=13.0
|
|
[Episode 139170] reward=-123102151.1 actor_loss=0.3295 critic_loss=104690986916.9778 entropy=17.5832 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 139180] reward=-117593815.3 actor_loss=0.2684 critic_loss=106624368640.0000 entropy=17.6081 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 139180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555729.3 mean_steps=13.2
|
|
[Episode 139190] reward=-123286392.9 actor_loss=0.3982 critic_loss=105298607217.7778 entropy=17.6097 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 139200] reward=-115292733.3 actor_loss=0.2929 critic_loss=91906270640.3556 entropy=17.5970 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 139200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-551089.0 mean_steps=14.3
|
|
[Episode 139210] reward=-120545336.6 actor_loss=0.3216 critic_loss=119432960323.3684 entropy=17.5862 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 139220] reward=-121975579.5 actor_loss=0.3216 critic_loss=117880598983.1111 entropy=17.6063 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 139220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461703.0 mean_steps=14.8
|
|
[Episode 139230] reward=-121426962.1 actor_loss=0.3179 critic_loss=101906059910.7368 entropy=17.5899 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 139240] reward=-121077863.4 actor_loss=0.2712 critic_loss=113825904981.3333 entropy=17.5998 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 139240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-507609.1 mean_steps=15.0
|
|
[Episode 139250] reward=-115845025.7 actor_loss=0.4179 critic_loss=90203969243.4286 entropy=17.5964 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 139260] reward=-117894557.3 actor_loss=0.3611 critic_loss=96442560827.0769 entropy=17.5996 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 139260] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-698945.9 mean_steps=11.6
|
|
[Episode 139270] reward=-118923059.5 actor_loss=0.2556 critic_loss=94869541319.1111 entropy=17.6120 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 139280] reward=-195559910.2 actor_loss=3.3171 critic_loss=16992078863473.7773 entropy=17.6170 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 139280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-371241.3 mean_steps=16.9
|
|
[Episode 139290] reward=-116957656.4 actor_loss=0.3031 critic_loss=88887295488.0000 entropy=17.6407 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 139300] reward=-122048448.6 actor_loss=0.2067 critic_loss=97851469732.9778 entropy=17.6343 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 139300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-505761.1 mean_steps=12.1
|
|
[Episode 139310] reward=-116180291.4 actor_loss=0.2360 critic_loss=91822497245.8667 entropy=17.6167 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 139320] reward=-113327274.1 actor_loss=0.2816 critic_loss=88799322476.0889 entropy=17.6145 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 139320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-499577.6 mean_steps=13.3
|
|
[Episode 139330] reward=-122182434.6 actor_loss=0.2656 critic_loss=98615544700.7179 entropy=17.6222 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 139340] reward=-121883099.5 actor_loss=0.3032 critic_loss=103607769245.5385 entropy=17.6362 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 139340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-610380.8 mean_steps=11.7
|
|
[Episode 139350] reward=-121944448.0 actor_loss=0.1439 critic_loss=99482702825.2444 entropy=17.6482 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 139360] reward=-116360724.5 actor_loss=0.2909 critic_loss=93643943177.4815 entropy=17.6440 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 139360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574192.7 mean_steps=13.2
|
|
[Episode 139370] reward=-122884390.5 actor_loss=0.3294 critic_loss=104427506710.7556 entropy=17.6464 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 139380] reward=-119810382.8 actor_loss=0.3107 critic_loss=98814666997.7600 entropy=17.6409 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 139380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448515.6 mean_steps=15.8
|
|
[Episode 139390] reward=-113077939.1 actor_loss=0.3334 critic_loss=91801102290.4889 entropy=17.6157 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 139400] reward=-121262069.1 actor_loss=0.2759 critic_loss=98412168260.2667 entropy=17.6153 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 139400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457634.5 mean_steps=14.4
|
|
[Episode 139410] reward=-116615558.2 actor_loss=0.2944 critic_loss=89556489654.8571 entropy=17.6096 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 139420] reward=-122358904.2 actor_loss=0.3410 critic_loss=101892342988.8000 entropy=17.6255 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 139420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450235.4 mean_steps=15.3
|
|
[Episode 139430] reward=-116777430.2 actor_loss=0.3324 critic_loss=92271141179.0769 entropy=17.6350 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 139440] reward=-120149070.5 actor_loss=0.3101 critic_loss=95768815458.4615 entropy=17.6378 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 139440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485737.9 mean_steps=13.7
|
|
[Episode 139450] reward=-117141514.5 actor_loss=0.2732 critic_loss=93958942238.1176 entropy=17.6382 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 139460] reward=-121190443.1 actor_loss=0.3028 critic_loss=97584199387.4286 entropy=17.6340 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 139460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-391522.1 mean_steps=14.8
|
|
[Episode 139470] reward=-117494502.7 actor_loss=0.2408 critic_loss=100158050212.9778 entropy=17.6270 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 139480] reward=-119731729.5 actor_loss=0.3254 critic_loss=99092247347.2000 entropy=17.6436 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 139480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546102.5 mean_steps=14.2
|
|
[Episode 139490] reward=-121041464.3 actor_loss=0.3119 critic_loss=97228818793.4118 entropy=17.6548 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 139500] reward=-117712082.2 actor_loss=0.3274 critic_loss=98438116875.3778 entropy=17.6402 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 139500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565669.6 mean_steps=13.3
|
|
[Episode 139510] reward=-118415167.3 actor_loss=0.2771 critic_loss=104114354734.5455 entropy=17.6340 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 139520] reward=-120895665.3 actor_loss=0.2811 critic_loss=98902859344.8421 entropy=17.6409 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 139520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-541406.8 mean_steps=14.9
|
|
[Episode 139530] reward=-122550239.5 actor_loss=0.2919 critic_loss=108335897555.4783 entropy=17.6389 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 139540] reward=-118644728.8 actor_loss=0.2623 critic_loss=92632882122.1053 entropy=17.6289 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 139540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464472.7 mean_steps=14.8
|
|
[Episode 139550] reward=-122753126.7 actor_loss=0.3413 critic_loss=104359619379.2000 entropy=17.6251 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 139560] reward=-121161684.2 actor_loss=0.2629 critic_loss=96715362125.9130 entropy=17.6280 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 139560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-586120.8 mean_steps=13.9
|
|
[Episode 139570] reward=-119250838.0 actor_loss=0.3909 critic_loss=95329440256.0000 entropy=17.6300 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 139580] reward=-113049402.5 actor_loss=0.3530 critic_loss=94595770813.2174 entropy=17.6184 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 139580] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-263040.5 mean_steps=17.1
|
|
[Episode 139590] reward=-120574015.9 actor_loss=0.2374 critic_loss=94847597646.7692 entropy=17.6056 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 139600] reward=-123619567.3 actor_loss=0.2609 critic_loss=110622282703.2381 entropy=17.5989 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 139600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379003.8 mean_steps=16.1
|
|
[Episode 139610] reward=-122432457.4 actor_loss=0.2563 critic_loss=99211660970.6667 entropy=17.6050 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 139620] reward=-114427222.6 actor_loss=0.2717 critic_loss=91407888860.2791 entropy=17.5822 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 139620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-497407.0 mean_steps=12.8
|
|
[Episode 139630] reward=-116851899.7 actor_loss=0.2347 critic_loss=91366258187.3778 entropy=17.5770 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 139640] reward=-115825790.2 actor_loss=0.3230 critic_loss=89460414145.4222 entropy=17.5698 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 139640] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-590550.7 mean_steps=11.9
|
|
[Episode 139650] reward=-118802850.9 actor_loss=0.2897 critic_loss=98078194346.6667 entropy=17.5745 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 139660] reward=-117593087.9 actor_loss=0.3367 critic_loss=94024041130.6667 entropy=17.5818 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 139660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461144.7 mean_steps=14.7
|
|
[Episode 139670] reward=-121245935.2 actor_loss=0.2277 critic_loss=96590294016.0000 entropy=17.5811 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 139680] reward=-119063924.9 actor_loss=0.3009 critic_loss=92672782973.1555 entropy=17.5938 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 139680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477967.9 mean_steps=14.6
|
|
[Episode 139690] reward=-114696566.2 actor_loss=0.2518 critic_loss=85108573915.4286 entropy=17.6028 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 139700] reward=-122752091.3 actor_loss=0.3280 critic_loss=98647243889.7778 entropy=17.6066 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 139700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-646067.1 mean_steps=13.0
|
|
[Episode 139710] reward=-117999918.2 actor_loss=0.2798 critic_loss=93175777780.6222 entropy=17.6041 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 139720] reward=-121824876.0 actor_loss=0.3497 critic_loss=99084093116.6316 entropy=17.6094 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 139720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582377.4 mean_steps=12.6
|
|
[Episode 139730] reward=-121908060.2 actor_loss=0.2682 critic_loss=97625286610.4889 entropy=17.5995 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 139740] reward=-120114356.3 actor_loss=0.3028 critic_loss=90985712298.6667 entropy=17.5947 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 139740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513591.5 mean_steps=14.1
|
|
[Episode 139750] reward=-118915368.8 actor_loss=0.2194 critic_loss=95852783934.5778 entropy=17.6312 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 139760] reward=-117867315.2 actor_loss=0.2191 critic_loss=95375643261.1555 entropy=17.6184 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 139760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420457.4 mean_steps=14.5
|
|
[Episode 139770] reward=-121522827.5 actor_loss=0.3367 critic_loss=94311834146.1333 entropy=17.6228 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 139780] reward=-122567347.8 actor_loss=0.3011 critic_loss=96077109839.6444 entropy=17.6175 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 139780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604131.5 mean_steps=13.0
|
|
[Episode 139790] reward=-124507623.9 actor_loss=0.2922 critic_loss=103924963503.5429 entropy=17.6191 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 139800] reward=-119223115.8 actor_loss=0.2809 critic_loss=92862788830.6087 entropy=17.6212 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 139800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471163.8 mean_steps=14.8
|
|
[Episode 139810] reward=-121704672.8 actor_loss=0.3836 critic_loss=95832089356.1905 entropy=17.6152 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 139820] reward=-118070285.2 actor_loss=0.1899 critic_loss=92260450676.3636 entropy=17.6249 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 139820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516784.8 mean_steps=14.4
|
|
[Episode 139830] reward=-121161530.3 actor_loss=0.3330 critic_loss=97420784799.2889 entropy=17.6252 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 139840] reward=-121086637.2 actor_loss=0.3707 critic_loss=97882880000.0000 entropy=17.6186 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 139840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482136.1 mean_steps=14.8
|
|
[Episode 139850] reward=-112940524.8 actor_loss=0.3415 critic_loss=89997083404.1905 entropy=17.6161 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 139860] reward=-126464911.4 actor_loss=0.2919 critic_loss=267431660622.7692 entropy=17.6135 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 139860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556580.0 mean_steps=13.4
|
|
[Episode 139870] reward=-117030360.5 actor_loss=0.3216 critic_loss=92545701569.4222 entropy=17.6164 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 139880] reward=-116283053.0 actor_loss=0.3119 critic_loss=90570256508.1212 entropy=17.6076 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 139880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-601358.8 mean_steps=13.6
|
|
[Episode 139890] reward=-121835084.6 actor_loss=0.2917 critic_loss=99195622042.7907 entropy=17.5918 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 139900] reward=-116169835.3 actor_loss=0.2311 critic_loss=99557102023.1111 entropy=17.5970 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 139900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398086.6 mean_steps=16.4
|
|
[Episode 139910] reward=-121523652.2 actor_loss=0.4412 critic_loss=101461879466.6667 entropy=17.5898 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 139920] reward=-119524519.1 actor_loss=0.3539 critic_loss=94791415515.4286 entropy=17.6098 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 139920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598517.3 mean_steps=12.6
|
|
[Episode 139930] reward=-116214119.2 actor_loss=0.4214 critic_loss=96753161082.4348 entropy=17.6039 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 139940] reward=-120449017.0 actor_loss=0.3302 critic_loss=95206982906.3111 entropy=17.6013 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 139940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390916.3 mean_steps=15.0
|
|
[Episode 139950] reward=-116285758.4 actor_loss=0.2063 critic_loss=96796795335.1111 entropy=17.5955 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 139960] reward=-112933910.3 actor_loss=0.2824 critic_loss=87833195679.2889 entropy=17.5916 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 139960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534314.0 mean_steps=13.2
|
|
[Episode 139970] reward=-119392579.2 actor_loss=0.3323 critic_loss=92522921437.8667 entropy=17.6031 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 139980] reward=-120014376.5 actor_loss=0.2432 critic_loss=99661162046.4390 entropy=17.6098 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 139980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405100.0 mean_steps=16.4
|
|
[Episode 139990] reward=-118703512.6 actor_loss=0.2192 critic_loss=97731342791.1111 entropy=17.6059 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 140000] reward=-115830669.4 actor_loss=0.2583 critic_loss=97053398873.9460 entropy=17.5948 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 140000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569679.7 mean_steps=13.8
|
|
[Episode 140010] reward=-113732407.5 actor_loss=0.3381 critic_loss=96955816813.7143 entropy=17.5990 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 140020] reward=-111250607.1 actor_loss=0.3142 critic_loss=97370155762.5263 entropy=17.5824 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 140020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599274.1 mean_steps=12.8
|
|
[Episode 140030] reward=-121655783.3 actor_loss=0.3371 critic_loss=169139979264.0000 entropy=17.5771 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 140040] reward=-113299118.7 actor_loss=0.3052 critic_loss=91690421283.3103 entropy=17.5759 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 140040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537696.8 mean_steps=14.3
|
|
[Episode 140050] reward=-120603556.8 actor_loss=0.1734 critic_loss=104433996361.1429 entropy=17.5659 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 140060] reward=-114505909.6 actor_loss=0.3704 critic_loss=91232037187.3684 entropy=17.5763 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 140060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-639919.3 mean_steps=12.2
|
|
[Episode 140070] reward=-118255554.4 actor_loss=0.3511 critic_loss=95606653558.1538 entropy=17.5828 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 140080] reward=-115819903.5 actor_loss=0.2866 critic_loss=95426811904.0000 entropy=17.5828 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 140080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-388765.3 mean_steps=15.7
|
|
[Episode 140090] reward=-111620750.4 actor_loss=0.5239 critic_loss=93166258537.4118 entropy=17.5978 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1523 front_blocked=0
|
|
[Episode 140100] reward=-120868643.9 actor_loss=0.2870 critic_loss=102056359134.6087 entropy=17.5943 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 140100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511442.3 mean_steps=15.1
|
|
[Episode 140110] reward=-123095516.9 actor_loss=0.2143 critic_loss=101650203368.7273 entropy=17.5974 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 140120] reward=-117257295.5 actor_loss=0.2765 critic_loss=91366579053.7143 entropy=17.6239 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 140120] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359092.4 mean_steps=16.9
|
|
[Episode 140130] reward=-117877391.0 actor_loss=0.2618 critic_loss=88593410912.7111 entropy=17.6097 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 140140] reward=-126500999.1 actor_loss=0.2771 critic_loss=103699719054.2222 entropy=17.5991 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 140140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515208.8 mean_steps=14.3
|
|
[Episode 140150] reward=-118845998.7 actor_loss=0.2240 critic_loss=93883168194.5600 entropy=17.6195 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 140160] reward=-116751948.8 actor_loss=0.3850 critic_loss=91976085985.8824 entropy=17.6188 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 140160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394780.7 mean_steps=15.3
|
|
[Episode 140170] reward=-119454945.0 actor_loss=0.4252 critic_loss=94906339913.1429 entropy=17.6190 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 140180] reward=-117775102.3 actor_loss=0.3860 critic_loss=99762233070.9333 entropy=17.6240 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 140180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441556.4 mean_steps=14.7
|
|
[Episode 140190] reward=-122289052.3 actor_loss=0.2013 critic_loss=94281528466.2857 entropy=17.6166 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 140200] reward=-121574211.4 actor_loss=0.3491 critic_loss=108315525120.0000 entropy=17.6163 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 140200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-625446.6 mean_steps=13.9
|
|
[Episode 140210] reward=-119155408.5 actor_loss=0.2459 critic_loss=103014158861.1282 entropy=17.6230 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 140220] reward=-122569007.9 actor_loss=0.3152 critic_loss=157790823219.2000 entropy=17.6262 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 140220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538774.8 mean_steps=14.3
|
|
[Episode 140230] reward=-116698771.3 actor_loss=0.3502 critic_loss=95370870784.0000 entropy=17.6324 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 140240] reward=-122074124.1 actor_loss=0.1364 critic_loss=96739922739.2000 entropy=17.6297 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 140240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539944.9 mean_steps=14.0
|
|
[Episode 140250] reward=-118770534.5 actor_loss=0.2553 critic_loss=108136809652.7059 entropy=17.6272 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 140260] reward=-116553010.7 actor_loss=0.3938 critic_loss=94819541333.3333 entropy=17.6364 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 140260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518855.8 mean_steps=14.0
|
|
[Episode 140270] reward=-123195392.9 actor_loss=0.2690 critic_loss=100099358492.4444 entropy=17.6538 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 140280] reward=-118136148.6 actor_loss=0.3087 critic_loss=96661201676.1905 entropy=17.6526 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 140280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518872.3 mean_steps=14.2
|
|
[Episode 140290] reward=-124119544.8 actor_loss=0.2438 critic_loss=100530245299.8919 entropy=17.6627 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 140300] reward=-116240791.2 actor_loss=0.3594 critic_loss=95179519590.4000 entropy=17.6593 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 140300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566985.4 mean_steps=13.2
|
|
[Episode 140310] reward=-118715802.1 actor_loss=0.4169 critic_loss=94891384832.0000 entropy=17.6631 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 140320] reward=-116599278.9 actor_loss=0.3009 critic_loss=96836829866.6667 entropy=17.6630 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 140320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568734.9 mean_steps=13.5
|
|
[Episode 140330] reward=-113067410.5 actor_loss=0.2801 critic_loss=86981447771.0222 entropy=17.6561 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 140340] reward=-120358895.3 actor_loss=0.3092 critic_loss=94296844242.4889 entropy=17.6400 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 140340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567549.3 mean_steps=13.5
|
|
[Episode 140350] reward=-124977057.2 actor_loss=0.3312 critic_loss=338478465752.1778 entropy=17.6334 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 140360] reward=-112186022.4 actor_loss=0.3642 critic_loss=86894613215.1795 entropy=17.6405 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 140360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487353.1 mean_steps=14.0
|
|
[Episode 140370] reward=-116508238.5 actor_loss=0.2825 critic_loss=93288436895.2889 entropy=17.6484 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 140380] reward=-116775402.4 actor_loss=0.3666 critic_loss=93007078838.8571 entropy=17.6481 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 140380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-401403.8 mean_steps=14.1
|
|
[Episode 140390] reward=-124458432.8 actor_loss=0.2796 critic_loss=102797758358.9744 entropy=17.6560 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 140400] reward=-117284255.5 actor_loss=0.2754 critic_loss=94279927808.0000 entropy=17.6556 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 140400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441954.3 mean_steps=14.4
|
|
[Episode 140410] reward=-115786476.3 actor_loss=0.2353 critic_loss=95696549205.3333 entropy=17.6642 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 140420] reward=-129365353.3 actor_loss=0.3364 critic_loss=1348525195264.0000 entropy=17.6588 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 140420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531300.8 mean_steps=14.2
|
|
[Episode 140430] reward=-118402610.6 actor_loss=0.3559 critic_loss=94386660693.3333 entropy=17.6644 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 140440] reward=-122157823.5 actor_loss=0.2072 critic_loss=97172077869.1765 entropy=17.6549 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 140440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442556.2 mean_steps=14.2
|
|
[Episode 140450] reward=-120730571.8 actor_loss=0.3550 critic_loss=98714250267.6757 entropy=17.6522 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 140460] reward=-118222843.1 actor_loss=0.3038 critic_loss=91985950996.7568 entropy=17.6518 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 140460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458853.5 mean_steps=14.8
|
|
[Episode 140470] reward=-115849048.8 actor_loss=0.2850 critic_loss=92113290910.8965 entropy=17.6554 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 140480] reward=-119264570.9 actor_loss=0.2342 critic_loss=97554207695.2381 entropy=17.6605 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 140480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-401889.3 mean_steps=14.3
|
|
[Episode 140490] reward=-117458507.0 actor_loss=0.3361 critic_loss=99648589948.1212 entropy=17.6630 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 140500] reward=-117371202.1 actor_loss=0.2888 critic_loss=93587297985.4222 entropy=17.6517 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 140500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453735.4 mean_steps=15.6
|
|
[Episode 140510] reward=-118368389.1 actor_loss=0.2564 critic_loss=94746841193.0256 entropy=17.6340 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 140520] reward=-121990356.8 actor_loss=0.2904 critic_loss=95332897587.2000 entropy=17.6104 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 140520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-697069.5 mean_steps=12.5
|
|
[Episode 140530] reward=-115720494.1 actor_loss=0.3019 critic_loss=89274009401.8065 entropy=17.6104 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 140540] reward=-119872944.7 actor_loss=0.2521 critic_loss=96243132006.4000 entropy=17.6232 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 140540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518947.6 mean_steps=14.1
|
|
[Episode 140550] reward=-117267265.6 actor_loss=0.3004 critic_loss=96428186055.1111 entropy=17.6168 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 140560] reward=-119592782.8 actor_loss=0.3735 critic_loss=100789750286.6286 entropy=17.6223 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 140560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529233.1 mean_steps=13.2
|
|
[Episode 140570] reward=-117023842.3 actor_loss=0.3013 critic_loss=94409267281.9200 entropy=17.6222 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 140580] reward=-124423288.0 actor_loss=0.1918 critic_loss=105993087946.1053 entropy=17.6267 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 140580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450845.2 mean_steps=15.4
|
|
[Episode 140590] reward=-119863490.0 actor_loss=0.2677 critic_loss=99646910945.8824 entropy=17.6210 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 140600] reward=-118604673.5 actor_loss=0.3489 critic_loss=90189627719.6800 entropy=17.6142 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 140600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471363.9 mean_steps=14.9
|
|
[Episode 140610] reward=-118567369.7 actor_loss=0.2443 critic_loss=97270194566.0952 entropy=17.6125 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 140620] reward=-121233099.3 actor_loss=0.2051 critic_loss=97003502203.5862 entropy=17.6151 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 140620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-631376.5 mean_steps=13.0
|
|
[Episode 140630] reward=-117066983.2 actor_loss=0.3485 critic_loss=92298743534.9333 entropy=17.6432 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 140640] reward=-118492018.1 actor_loss=0.2705 critic_loss=91585709670.4000 entropy=17.6333 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 140640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572286.3 mean_steps=12.7
|
|
[Episode 140650] reward=-116904932.4 actor_loss=0.2177 critic_loss=90062092970.6667 entropy=17.6432 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 140660] reward=-118994153.1 actor_loss=0.3049 critic_loss=102990155971.0476 entropy=17.6331 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 140660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454812.2 mean_steps=14.6
|
|
[Episode 140670] reward=-115693605.5 actor_loss=0.3537 critic_loss=90594637204.8372 entropy=17.6262 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 140680] reward=-115757229.6 actor_loss=0.2880 critic_loss=93137279299.3684 entropy=17.6122 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 140680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563017.9 mean_steps=13.7
|
|
[Episode 140690] reward=-118906936.4 actor_loss=0.2131 critic_loss=94282953159.1111 entropy=17.6153 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 140700] reward=-120940413.5 actor_loss=0.3296 critic_loss=101561453016.6154 entropy=17.6226 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 140700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589762.7 mean_steps=12.6
|
|
[Episode 140710] reward=-122014900.1 actor_loss=0.3635 critic_loss=105104295058.2857 entropy=17.6270 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 140720] reward=-116635641.6 actor_loss=0.3056 critic_loss=99064868217.2632 entropy=17.6168 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 140720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533322.1 mean_steps=14.4
|
|
[Episode 140730] reward=-114630055.8 actor_loss=0.3112 critic_loss=94379000180.3636 entropy=17.6133 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 140740] reward=-119695428.8 actor_loss=0.3871 critic_loss=100256915065.9048 entropy=17.6145 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 140740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-606462.6 mean_steps=13.6
|
|
[Episode 140750] reward=-122302236.3 actor_loss=0.3351 critic_loss=99350781952.0000 entropy=17.6047 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 140760] reward=-125590044.9 actor_loss=0.2763 critic_loss=104131190784.0000 entropy=17.6042 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 140760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-634662.7 mean_steps=12.9
|
|
[Episode 140770] reward=-118559788.0 actor_loss=0.2646 critic_loss=94871526487.7714 entropy=17.6052 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 140780] reward=-120120714.3 actor_loss=0.2292 critic_loss=96972064358.4000 entropy=17.6024 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 140780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508445.9 mean_steps=13.9
|
|
[Episode 140790] reward=-119811342.8 actor_loss=0.2092 critic_loss=93360463325.8667 entropy=17.6061 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 140800] reward=-121992861.7 actor_loss=0.2415 critic_loss=96994423876.2667 entropy=17.6060 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 140800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572631.0 mean_steps=12.1
|
|
[Episode 140810] reward=-119878698.8 actor_loss=0.3207 critic_loss=97485097252.5714 entropy=17.6085 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 140820] reward=-120005662.9 actor_loss=0.2433 critic_loss=97546087992.8889 entropy=17.5955 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 140820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-528517.3 mean_steps=15.3
|
|
[Episode 140830] reward=-126767993.0 actor_loss=0.2654 critic_loss=103258419317.0286 entropy=17.6040 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 140840] reward=-117165904.1 actor_loss=0.3947 critic_loss=96580444641.8824 entropy=17.6002 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 140840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-593860.5 mean_steps=11.7
|
|
[Episode 140850] reward=-118466270.7 actor_loss=0.3086 critic_loss=94490421384.5333 entropy=17.6038 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 140860] reward=-115456418.2 actor_loss=0.3168 critic_loss=95809534262.3030 entropy=17.6154 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 140860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521572.0 mean_steps=15.2
|
|
[Episode 140870] reward=-119765761.4 actor_loss=0.3243 critic_loss=96309552469.3333 entropy=17.6168 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 140880] reward=-120121495.3 actor_loss=0.3160 critic_loss=99481948532.3636 entropy=17.6153 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 140880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544637.3 mean_steps=14.2
|
|
[Episode 140890] reward=-119898484.2 actor_loss=0.3268 critic_loss=96355398577.2308 entropy=17.6025 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 140900] reward=-122065282.9 actor_loss=0.2256 critic_loss=96305444484.7407 entropy=17.5911 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 140900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-485088.9 mean_steps=15.7
|
|
[Episode 140910] reward=-125845631.1 actor_loss=0.2433 critic_loss=99040767353.2632 entropy=17.6008 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 140920] reward=-114738191.5 actor_loss=0.2653 critic_loss=91073916059.1515 entropy=17.5971 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 140920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528510.1 mean_steps=14.2
|
|
[Episode 140930] reward=-121183262.3 actor_loss=0.2377 critic_loss=96111803505.7778 entropy=17.5804 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 140940] reward=-119230765.6 actor_loss=0.2301 critic_loss=94665840640.0000 entropy=17.5849 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 140940] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-750814.3 mean_steps=10.6
|
|
[Episode 140950] reward=-115848166.9 actor_loss=0.3381 critic_loss=97310676992.0000 entropy=17.5849 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 140960] reward=-119556667.1 actor_loss=0.2819 critic_loss=95113129502.1176 entropy=17.5819 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 140960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557101.0 mean_steps=13.5
|
|
[Episode 140970] reward=-119095851.3 actor_loss=0.1952 critic_loss=92494227456.0000 entropy=17.5808 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 140980] reward=-113062661.7 actor_loss=0.4030 critic_loss=93150845898.1053 entropy=17.5744 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 140980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525898.1 mean_steps=13.9
|
|
[Episode 140990] reward=-119931995.9 actor_loss=0.3075 critic_loss=95803194026.6667 entropy=17.5764 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 141000] reward=-118612078.2 actor_loss=0.4271 critic_loss=93403717259.6364 entropy=17.5786 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 141000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-557992.0 mean_steps=14.2
|
|
[Episode 141010] reward=-126049353.6 actor_loss=0.2591 critic_loss=106512804249.6000 entropy=17.5746 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 141020] reward=-118221543.5 actor_loss=0.3138 critic_loss=95501868828.4444 entropy=17.5819 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 141020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-595331.4 mean_steps=15.2
|
|
[Episode 141030] reward=-115406259.7 actor_loss=0.3506 critic_loss=94369346027.5200 entropy=17.5862 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 141040] reward=-116541247.2 actor_loss=0.3178 critic_loss=111020425216.0000 entropy=17.5879 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 141040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561153.7 mean_steps=13.7
|
|
[Episode 141050] reward=-119946212.8 actor_loss=0.2569 critic_loss=94228588619.8519 entropy=17.5923 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 141060] reward=-123302922.1 actor_loss=0.2644 critic_loss=304116695040.0000 entropy=17.5852 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 141060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-360505.5 mean_steps=16.8
|
|
[Episode 141070] reward=-114635637.3 actor_loss=0.2243 critic_loss=90324527786.6667 entropy=17.6026 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 141080] reward=-121725800.9 actor_loss=0.2063 critic_loss=98002399356.1212 entropy=17.5917 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 141080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-391355.6 mean_steps=15.2
|
|
[Episode 141090] reward=-121190905.2 actor_loss=0.2842 critic_loss=95559998902.8571 entropy=17.5864 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 141100] reward=-113495134.0 actor_loss=0.2830 critic_loss=94443251076.4138 entropy=17.5938 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 141100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423529.8 mean_steps=14.4
|
|
[Episode 141110] reward=-114889366.9 actor_loss=0.2453 critic_loss=92390331103.1795 entropy=17.5893 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 141120] reward=-124071780.2 actor_loss=0.2061 critic_loss=98252607803.0769 entropy=17.5686 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 141120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532070.2 mean_steps=13.2
|
|
[Episode 141130] reward=-120490073.5 actor_loss=0.2999 critic_loss=100784796725.8947 entropy=17.5519 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 141140] reward=-117888706.3 actor_loss=0.2741 critic_loss=95847891688.7273 entropy=17.5580 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 141140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485552.1 mean_steps=14.0
|
|
[Episode 141150] reward=-119926875.1 actor_loss=0.2358 critic_loss=101172637988.5714 entropy=17.5654 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 141160] reward=-117844503.7 actor_loss=0.3117 critic_loss=94927469568.0000 entropy=17.5626 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 141160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459288.0 mean_steps=14.6
|
|
[Episode 141170] reward=-133071700.4 actor_loss=0.3078 critic_loss=798707608234.6666 entropy=17.5580 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 141180] reward=-120724722.4 actor_loss=0.2801 critic_loss=90932644085.7600 entropy=17.5633 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 141180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550754.4 mean_steps=13.4
|
|
[Episode 141190] reward=-122174207.1 actor_loss=0.2420 critic_loss=98159375629.4737 entropy=17.5604 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 141200] reward=-123728332.0 actor_loss=0.3189 critic_loss=101262191177.1429 entropy=17.5781 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 141200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553606.9 mean_steps=14.1
|
|
[Episode 141210] reward=-119446099.2 actor_loss=0.3487 critic_loss=95755426394.3529 entropy=17.5748 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 141220] reward=-122896120.8 actor_loss=0.2757 critic_loss=102866551552.0000 entropy=17.5810 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 141220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-355327.9 mean_steps=15.8
|
|
[Episode 141230] reward=-120970233.7 actor_loss=0.3125 critic_loss=100319514472.2963 entropy=17.5824 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 141240] reward=-118592652.2 actor_loss=0.3757 critic_loss=96572279322.9474 entropy=17.5861 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 141240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483864.0 mean_steps=15.1
|
|
[Episode 141250] reward=-125857750.2 actor_loss=0.2224 critic_loss=103317123584.0000 entropy=17.5812 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 141260] reward=-118447682.3 actor_loss=0.2525 critic_loss=94007662525.9355 entropy=17.5941 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 141260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-589652.6 mean_steps=14.4
|
|
[Episode 141270] reward=-120648683.8 actor_loss=0.3077 critic_loss=96700981930.6667 entropy=17.6044 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 141280] reward=-119169902.8 actor_loss=0.2156 critic_loss=97902546488.8889 entropy=17.5937 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 141280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544101.7 mean_steps=14.3
|
|
[Episode 141290] reward=-123675521.7 actor_loss=0.1788 critic_loss=99663810560.0000 entropy=17.5821 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 141300] reward=-119024739.0 actor_loss=0.2510 critic_loss=90929296935.3846 entropy=17.5801 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 141300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560608.7 mean_steps=13.6
|
|
[Episode 141310] reward=-113969165.0 actor_loss=0.2758 critic_loss=89117010890.1053 entropy=17.5801 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 141320] reward=-123222721.2 actor_loss=0.3504 critic_loss=104806078464.0000 entropy=17.5827 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 141320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-338886.7 mean_steps=15.8
|
|
[Episode 141330] reward=-122830750.6 actor_loss=0.1930 critic_loss=102525485836.1905 entropy=17.5957 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 141340] reward=-123362819.3 actor_loss=0.2453 critic_loss=100368771832.6857 entropy=17.6012 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 141340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-608611.9 mean_steps=13.4
|
|
[Episode 141350] reward=-120625059.7 actor_loss=0.3070 critic_loss=98221281553.0667 entropy=17.5999 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 141360] reward=-120288602.0 actor_loss=0.2853 critic_loss=100438465779.8095 entropy=17.6008 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 141360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-491036.2 mean_steps=15.8
|
|
[Episode 141370] reward=-117487332.7 actor_loss=0.3369 critic_loss=91609268224.0000 entropy=17.6025 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 141380] reward=-115414113.0 actor_loss=0.2815 critic_loss=95035351255.5789 entropy=17.6082 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 141380] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-649078.0 mean_steps=12.1
|
|
[Episode 141390] reward=-112706804.5 actor_loss=0.3111 critic_loss=95721291385.9048 entropy=17.6037 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 141400] reward=-118923583.7 actor_loss=0.2054 critic_loss=111284291584.0000 entropy=17.5985 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 141400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-506689.5 mean_steps=15.9
|
|
[Episode 141410] reward=-123786224.5 actor_loss=0.2956 critic_loss=176608506171.0769 entropy=17.6058 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 141420] reward=-115904891.3 actor_loss=0.3626 critic_loss=91402346496.0000 entropy=17.6062 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 141420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552538.3 mean_steps=13.2
|
|
[Episode 141430] reward=-116239749.6 actor_loss=0.4234 critic_loss=89069716868.4138 entropy=17.6020 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 141440] reward=-116000438.8 actor_loss=0.3631 critic_loss=94396488282.3529 entropy=17.6050 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 141440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504209.1 mean_steps=13.9
|
|
[Episode 141450] reward=-119164450.3 actor_loss=0.3159 critic_loss=96927028653.4194 entropy=17.6124 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 141460] reward=-121985640.6 actor_loss=0.2658 critic_loss=102361784801.8824 entropy=17.6168 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 141460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444331.7 mean_steps=15.7
|
|
[Episode 141470] reward=-117477046.2 actor_loss=0.3508 critic_loss=96065566630.9565 entropy=17.6316 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 141480] reward=-114328516.9 actor_loss=0.3279 critic_loss=89694332173.4737 entropy=17.6370 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 141480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454045.6 mean_steps=15.6
|
|
[Episode 141490] reward=-117740441.4 actor_loss=0.1823 critic_loss=94598134247.6190 entropy=17.6487 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 141500] reward=-118751354.3 actor_loss=0.2209 critic_loss=95835286966.8571 entropy=17.6441 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 141500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473073.2 mean_steps=14.6
|
|
[Episode 141510] reward=-118712639.3 actor_loss=0.3841 critic_loss=100112495567.2381 entropy=17.6443 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 141520] reward=-120748040.7 actor_loss=0.2755 critic_loss=101766115913.1429 entropy=17.6393 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 141520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-572393.7 mean_steps=14.3
|
|
[Episode 141530] reward=-122967746.8 actor_loss=0.2833 critic_loss=95360766976.0000 entropy=17.6494 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 141540] reward=-120205203.1 actor_loss=0.2673 critic_loss=96964928892.3428 entropy=17.6591 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 141540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392632.4 mean_steps=15.9
|
|
[Episode 141550] reward=-133626790.0 actor_loss=0.3212 critic_loss=1072222165037.5111 entropy=17.6538 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 141560] reward=-131726209.1 actor_loss=0.3007 critic_loss=706822697695.1794 entropy=17.6686 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 141560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404289.8 mean_steps=16.1
|
|
[Episode 141570] reward=-120828978.5 actor_loss=0.2679 critic_loss=99279296853.3333 entropy=17.6928 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 141580] reward=-195890949.0 actor_loss=0.7483 critic_loss=28420658010521.6016 entropy=17.6864 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 141580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467209.3 mean_steps=14.8
|
|
[Episode 141590] reward=-114017474.7 actor_loss=0.3235 critic_loss=91676395838.5778 entropy=17.6877 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 141600] reward=-120538276.6 actor_loss=0.2130 critic_loss=94621668329.2444 entropy=17.6785 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 141600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587540.9 mean_steps=13.6
|
|
[Episode 141610] reward=-121052049.8 actor_loss=0.3496 critic_loss=97419635370.6667 entropy=17.6740 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 141620] reward=-119348429.4 actor_loss=0.2627 critic_loss=97573537382.4000 entropy=17.6712 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 141620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457084.2 mean_steps=15.3
|
|
[Episode 141630] reward=-124664706.9 actor_loss=0.2210 critic_loss=104003690496.0000 entropy=17.6702 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 141640] reward=-126722287.9 actor_loss=0.1465 critic_loss=101114134710.0444 entropy=17.6654 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 141640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447105.7 mean_steps=15.2
|
|
[Episode 141650] reward=-122334194.2 actor_loss=0.3282 critic_loss=100935374004.7059 entropy=17.6597 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 141660] reward=-122965598.8 actor_loss=0.2822 critic_loss=98868694379.3548 entropy=17.6655 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 141660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567941.7 mean_steps=12.5
|
|
[Episode 141670] reward=-119494590.3 actor_loss=0.2830 critic_loss=104988709205.3333 entropy=17.6676 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 141680] reward=-115422518.5 actor_loss=0.2823 critic_loss=91778694952.4211 entropy=17.6658 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 141680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390684.6 mean_steps=14.9
|
|
[Episode 141690] reward=-123024958.9 actor_loss=0.2029 critic_loss=101434543074.7429 entropy=17.6646 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 141700] reward=-115562724.0 actor_loss=0.3331 critic_loss=99322437275.8261 entropy=17.6602 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 141700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474224.6 mean_steps=14.6
|
|
[Episode 141710] reward=-120425807.0 actor_loss=0.2187 critic_loss=194428261469.0909 entropy=17.6618 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 141720] reward=-131183052.4 actor_loss=0.4740 critic_loss=1480898624443.7334 entropy=17.6731 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 141720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-622998.9 mean_steps=12.8
|
|
[Episode 141730] reward=-121550123.2 actor_loss=0.2633 critic_loss=98060632473.6000 entropy=17.6869 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 141740] reward=-117307403.3 actor_loss=0.3465 critic_loss=102329814616.2759 entropy=17.7192 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 141740] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-382580.7 mean_steps=17.9
|
|
[Episode 141750] reward=-118173327.9 actor_loss=0.2938 critic_loss=103290617673.9556 entropy=17.7119 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 141760] reward=-116293753.8 actor_loss=0.2682 critic_loss=88081392162.1333 entropy=17.7126 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 141760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536087.6 mean_steps=14.0
|
|
[Episode 141770] reward=-119381255.4 actor_loss=0.3299 critic_loss=97036763363.5556 entropy=17.7117 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 141780] reward=-113106417.1 actor_loss=0.3347 critic_loss=89313065551.6444 entropy=17.7106 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 141780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-606692.1 mean_steps=13.3
|
|
[Episode 141790] reward=-128608752.3 actor_loss=0.3154 critic_loss=706100838400.0000 entropy=17.7059 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 141800] reward=-118669848.2 actor_loss=0.2316 critic_loss=112357891210.3784 entropy=17.7046 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 141800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580762.7 mean_steps=12.7
|
|
[Episode 141810] reward=-119378931.4 actor_loss=0.3625 critic_loss=106137696119.4667 entropy=17.6980 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 141820] reward=-121364616.7 actor_loss=0.3338 critic_loss=119115458087.3846 entropy=17.6955 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 141820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-568476.3 mean_steps=14.3
|
|
[Episode 141830] reward=-118960733.5 actor_loss=0.2740 critic_loss=92695563667.3939 entropy=17.7143 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 141840] reward=-116605236.4 actor_loss=0.2734 critic_loss=90785270883.0968 entropy=17.7107 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 141840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-665756.6 mean_steps=12.3
|
|
[Episode 141850] reward=-113767439.3 actor_loss=0.2628 critic_loss=102375644842.6667 entropy=17.7124 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 141860] reward=-119452222.5 actor_loss=0.2914 critic_loss=95213076197.5172 entropy=17.7042 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 141860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505583.8 mean_steps=13.1
|
|
[Episode 141870] reward=-123172802.8 actor_loss=0.2786 critic_loss=112352766313.4118 entropy=17.6979 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 141880] reward=-121546846.4 actor_loss=0.1755 critic_loss=96867843315.8095 entropy=17.6967 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 141880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-631374.5 mean_steps=14.0
|
|
[Episode 141890] reward=-119503103.2 actor_loss=0.3278 critic_loss=94638870900.3636 entropy=17.6884 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 141900] reward=-117355289.7 actor_loss=0.2928 critic_loss=89911181312.0000 entropy=17.6760 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 141900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-607071.1 mean_steps=11.9
|
|
[Episode 141910] reward=-127357890.7 actor_loss=0.2605 critic_loss=114693419766.5185 entropy=17.6733 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 141920] reward=-116951189.2 actor_loss=0.3216 critic_loss=96553980723.2000 entropy=17.6765 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 141920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-387753.8 mean_steps=14.1
|
|
[Episode 141930] reward=-116128604.5 actor_loss=0.1745 critic_loss=91620210567.5294 entropy=17.6773 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 141940] reward=-122749605.7 actor_loss=0.2439 critic_loss=95549243684.5714 entropy=17.6712 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 141940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572798.9 mean_steps=13.2
|
|
[Episode 141950] reward=-116105936.7 actor_loss=0.2499 critic_loss=93558114393.0435 entropy=17.6714 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 141960] reward=-122010368.1 actor_loss=0.2862 critic_loss=99843579904.0000 entropy=17.6654 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 141960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458897.5 mean_steps=14.6
|
|
[Episode 141970] reward=-121638223.8 actor_loss=0.3650 critic_loss=108178682228.3636 entropy=17.6564 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 141980] reward=-145194911.7 actor_loss=0.2246 critic_loss=2817951986847.2891 entropy=17.6683 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 141980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-610011.0 mean_steps=13.4
|
|
[Episode 141990] reward=-120910660.1 actor_loss=0.2036 critic_loss=96416769398.6341 entropy=17.6654 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 142000] reward=-118257429.9 actor_loss=0.2299 critic_loss=94955660668.3428 entropy=17.6632 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 142000] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-323910.6 mean_steps=17.2
|
|
[Episode 142010] reward=-120582869.4 actor_loss=0.2850 critic_loss=95042892148.3636 entropy=17.6636 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 142020] reward=-122840198.4 actor_loss=0.2110 critic_loss=101295515337.6970 entropy=17.6693 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 142020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-386870.9 mean_steps=16.6
|
|
[Episode 142030] reward=-118969269.5 actor_loss=0.3539 critic_loss=101931872876.6061 entropy=17.6719 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 142040] reward=-120400417.5 actor_loss=0.2934 critic_loss=96961223186.9630 entropy=17.6618 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 142040] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-646662.1 mean_steps=11.1
|
|
[Episode 142050] reward=-120250204.2 actor_loss=0.2774 critic_loss=94601418306.7826 entropy=17.6709 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 142060] reward=-120373172.7 actor_loss=0.2090 critic_loss=99897324465.2308 entropy=17.6710 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 142060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587587.4 mean_steps=13.4
|
|
[Episode 142070] reward=-119695702.5 actor_loss=0.2291 critic_loss=100648881629.8667 entropy=17.6713 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 142080] reward=-123050393.9 actor_loss=0.1749 critic_loss=101622641095.1111 entropy=17.6775 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 142080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463297.9 mean_steps=14.8
|
|
[Episode 142090] reward=-118657253.9 actor_loss=0.2837 critic_loss=94352166502.4000 entropy=17.6860 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 142100] reward=-121506776.8 actor_loss=0.2333 critic_loss=103551574357.3333 entropy=17.6903 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 142100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-643189.5 mean_steps=13.1
|
|
[Episode 142110] reward=-125615366.4 actor_loss=0.2653 critic_loss=118151599157.8947 entropy=17.6924 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 142120] reward=-117468057.5 actor_loss=0.3027 critic_loss=94493766451.2000 entropy=17.6919 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 142120] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-311999.0 mean_steps=16.4
|
|
[Episode 142130] reward=-117533827.9 actor_loss=0.2494 critic_loss=91777691374.9333 entropy=17.7068 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 142140] reward=-122743527.7 actor_loss=0.2465 critic_loss=97264617472.0000 entropy=17.7086 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 142140] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642800.9 mean_steps=12.2
|
|
[Episode 142150] reward=-118285548.4 actor_loss=0.3269 critic_loss=120210678033.0667 entropy=17.7102 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 142160] reward=-122148295.5 actor_loss=0.2894 critic_loss=97752202240.0000 entropy=17.7044 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 142160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-1308534.4 mean_steps=18.2
|
|
[Episode 142170] reward=-116452467.3 actor_loss=0.3188 critic_loss=91091598579.8095 entropy=17.6986 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 142180] reward=-125227875.6 actor_loss=0.2677 critic_loss=100805023061.3333 entropy=17.6954 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 142180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371652.9 mean_steps=15.9
|
|
[Episode 142190] reward=-118533712.8 actor_loss=0.3282 critic_loss=94924582274.8445 entropy=17.6952 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 142200] reward=-123968096.6 actor_loss=0.2924 critic_loss=101201621538.1333 entropy=17.6900 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 142200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-632890.5 mean_steps=12.8
|
|
[Episode 142210] reward=-119402690.3 actor_loss=0.2372 critic_loss=95909247853.7143 entropy=17.6912 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 142220] reward=-121833020.6 actor_loss=0.2771 critic_loss=99568516300.8000 entropy=17.6758 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 142220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553305.0 mean_steps=14.6
|
|
[Episode 142230] reward=-114465286.7 actor_loss=0.4570 critic_loss=90144252294.0952 entropy=17.6750 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 142240] reward=-117263432.8 actor_loss=0.3839 critic_loss=100680643379.2000 entropy=17.6734 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 142240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532557.5 mean_steps=13.7
|
|
[Episode 142250] reward=-119855782.4 actor_loss=0.3352 critic_loss=99201972286.0606 entropy=17.6664 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 142260] reward=-115557299.2 actor_loss=0.3790 critic_loss=91796605269.3333 entropy=17.6697 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 142260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-364367.7 mean_steps=16.6
|
|
[Episode 142270] reward=-119936169.2 actor_loss=0.3135 critic_loss=97679084748.8000 entropy=17.6798 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 142280] reward=-123312865.5 actor_loss=0.2342 critic_loss=93805708720.3556 entropy=17.6622 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 142280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556249.2 mean_steps=14.4
|
|
[Episode 142290] reward=-112638402.1 actor_loss=0.2338 critic_loss=91133619768.8889 entropy=17.6602 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 142300] reward=-120027526.2 actor_loss=0.3989 critic_loss=98650955776.0000 entropy=17.6687 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 142300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487619.1 mean_steps=14.2
|
|
[Episode 142310] reward=-118321165.8 actor_loss=0.3231 critic_loss=98684211846.7368 entropy=17.6649 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 142320] reward=-120068516.5 actor_loss=0.2667 critic_loss=94907437532.2791 entropy=17.6520 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 142320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474377.1 mean_steps=14.6
|
|
[Episode 142330] reward=-120620698.7 actor_loss=0.3189 critic_loss=94704250738.7586 entropy=17.6542 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 142340] reward=-122985423.8 actor_loss=0.2984 critic_loss=116405931497.7391 entropy=17.6444 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 142340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402174.6 mean_steps=15.2
|
|
[Episode 142350] reward=-120959311.7 actor_loss=0.2168 critic_loss=95420234137.6000 entropy=17.6523 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 142360] reward=-122209441.2 actor_loss=0.2704 critic_loss=180516163420.1600 entropy=17.6503 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 142360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507357.9 mean_steps=13.7
|
|
[Episode 142370] reward=-121285252.3 actor_loss=0.2420 critic_loss=127380738211.8400 entropy=17.6316 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 142380] reward=-117984655.4 actor_loss=0.2397 critic_loss=93993318741.3333 entropy=17.6369 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 142380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-524476.0 mean_steps=14.6
|
|
[Episode 142390] reward=-117914620.2 actor_loss=0.1732 critic_loss=92249193503.0303 entropy=17.6546 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 142400] reward=-121531143.3 actor_loss=0.2587 critic_loss=95672340338.7586 entropy=17.6392 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 142400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614511.8 mean_steps=12.0
|
|
[Episode 142410] reward=-115884722.7 actor_loss=0.3141 critic_loss=93619338808.8889 entropy=17.6278 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 142420] reward=-119499699.9 actor_loss=0.2190 critic_loss=93975139802.5366 entropy=17.6142 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 142420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496270.6 mean_steps=13.8
|
|
[Episode 142430] reward=-124149991.6 actor_loss=0.2365 critic_loss=107469605915.6757 entropy=17.6156 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 142440] reward=-120102400.4 actor_loss=0.2820 critic_loss=96541555916.8000 entropy=17.6210 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 142440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416048.8 mean_steps=16.1
|
|
[Episode 142450] reward=-117332078.6 actor_loss=0.2984 critic_loss=86942948498.2857 entropy=17.6102 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 142460] reward=-118957813.2 actor_loss=0.2838 critic_loss=94016901120.0000 entropy=17.6030 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 142460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-602722.0 mean_steps=13.9
|
|
[Episode 142470] reward=-119094810.7 actor_loss=0.3209 critic_loss=92527803323.7333 entropy=17.5947 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 142480] reward=-115119176.8 actor_loss=0.3434 critic_loss=92233750820.5714 entropy=17.6029 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 142480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523891.5 mean_steps=13.2
|
|
[Episode 142490] reward=-116683589.0 actor_loss=0.2389 critic_loss=100909563141.9535 entropy=17.6002 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 142500] reward=-118444050.6 actor_loss=0.3407 critic_loss=94254814373.1613 entropy=17.5795 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 142500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-429668.6 mean_steps=16.6
|
|
[Episode 142510] reward=-123219991.7 actor_loss=0.3284 critic_loss=98137109740.3077 entropy=17.5772 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 142520] reward=-117800660.3 actor_loss=0.2384 critic_loss=89983996905.2444 entropy=17.5646 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 142520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537812.7 mean_steps=13.3
|
|
[Episode 142530] reward=-117764269.1 actor_loss=0.2791 critic_loss=97838030848.0000 entropy=17.5454 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 142540] reward=-121883891.2 actor_loss=0.2255 critic_loss=96983715202.8445 entropy=17.5304 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 142540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556595.0 mean_steps=13.2
|
|
[Episode 142550] reward=-114797029.5 actor_loss=0.2967 critic_loss=92013472426.6667 entropy=17.5244 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 142560] reward=-110339562.3 actor_loss=0.3437 critic_loss=87798859730.4889 entropy=17.5239 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 142560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378963.9 mean_steps=15.7
|
|
[Episode 142570] reward=-116470321.4 actor_loss=0.3937 critic_loss=92048331389.1555 entropy=17.5366 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 142580] reward=-120512719.5 actor_loss=0.2844 critic_loss=94010368728.1778 entropy=17.5307 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 142580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429799.8 mean_steps=14.3
|
|
[Episode 142590] reward=-117600945.3 actor_loss=0.2564 critic_loss=93286702080.0000 entropy=17.5315 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 142600] reward=-123500122.3 actor_loss=0.2537 critic_loss=99360696177.1163 entropy=17.5223 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 142600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431417.3 mean_steps=15.4
|
|
[Episode 142610] reward=-116226982.9 actor_loss=0.2730 critic_loss=135501753184.7111 entropy=17.5124 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 142620] reward=-118587021.5 actor_loss=0.3043 critic_loss=93547320206.2222 entropy=17.5209 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 142620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563044.2 mean_steps=12.2
|
|
[Episode 142630] reward=-115730867.4 actor_loss=0.1813 critic_loss=87411997991.8222 entropy=17.5410 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 142640] reward=-121737675.1 actor_loss=0.3227 critic_loss=96189848090.9474 entropy=17.5419 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 142640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-611873.0 mean_steps=13.8
|
|
[Episode 142650] reward=-116654963.3 actor_loss=0.2504 critic_loss=96216728750.8293 entropy=17.5489 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 142660] reward=-109104964.0 actor_loss=0.3913 critic_loss=89050078139.7333 entropy=17.5455 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 142660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-392469.9 mean_steps=14.9
|
|
[Episode 142670] reward=-120558808.7 actor_loss=0.2559 critic_loss=102264077312.0000 entropy=17.5522 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 142680] reward=-118593222.0 actor_loss=0.3090 critic_loss=105577428309.3333 entropy=17.5576 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 142680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527712.9 mean_steps=14.2
|
|
[Episode 142690] reward=-117937959.0 actor_loss=0.2678 critic_loss=92130398448.9412 entropy=17.5550 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 142700] reward=-114303744.4 actor_loss=0.3077 critic_loss=93140876747.0345 entropy=17.5473 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 142700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-567296.4 mean_steps=11.8
|
|
[Episode 142710] reward=-118394595.7 actor_loss=0.2468 critic_loss=91635533960.5333 entropy=17.5493 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 142720] reward=-121487783.0 actor_loss=0.2506 critic_loss=98038698075.0222 entropy=17.5532 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 142720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540889.4 mean_steps=13.9
|
|
[Episode 142730] reward=-117150129.2 actor_loss=0.2742 critic_loss=94352889856.0000 entropy=17.5686 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 142740] reward=-115658117.5 actor_loss=0.1951 critic_loss=87019863335.8222 entropy=17.5690 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 142740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-531190.0 mean_steps=14.8
|
|
[Episode 142750] reward=-115819222.4 actor_loss=0.2921 critic_loss=90296386810.3111 entropy=17.5598 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 142760] reward=-117804675.2 actor_loss=0.3090 critic_loss=87884623598.9333 entropy=17.5510 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 142760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566641.3 mean_steps=12.7
|
|
[Episode 142770] reward=-118303214.7 actor_loss=0.2897 critic_loss=93831770316.8000 entropy=17.5637 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 142780] reward=-119509517.1 actor_loss=0.1556 critic_loss=93150882838.7556 entropy=17.5615 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 142780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-424734.6 mean_steps=16.0
|
|
[Episode 142790] reward=-116700738.1 actor_loss=0.2066 critic_loss=94848312206.2222 entropy=17.5389 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 142800] reward=-121264882.0 actor_loss=0.2380 critic_loss=95079258339.5556 entropy=17.5302 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 142800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-514488.4 mean_steps=13.0
|
|
[Episode 142810] reward=-118207307.1 actor_loss=0.2610 critic_loss=93331799517.8667 entropy=17.5443 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 142820] reward=-115693567.3 actor_loss=0.3531 critic_loss=87388704312.8889 entropy=17.5468 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 142820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487546.5 mean_steps=13.9
|
|
[Episode 142830] reward=-113117076.9 actor_loss=0.3067 critic_loss=88699762733.5111 entropy=17.5574 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 142840] reward=-119493405.3 actor_loss=0.2486 critic_loss=97508980053.3333 entropy=17.5391 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 142840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-569857.1 mean_steps=14.8
|
|
[Episode 142850] reward=-119551749.8 actor_loss=0.3821 critic_loss=97475676478.5778 entropy=17.5289 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 142860] reward=-119607475.6 actor_loss=0.2387 critic_loss=93508336571.7333 entropy=17.5324 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 142860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-635424.4 mean_steps=13.2
|
|
[Episode 142870] reward=-119664390.3 actor_loss=0.3753 critic_loss=92872182603.2941 entropy=17.5270 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 142880] reward=-119722411.0 actor_loss=0.3138 critic_loss=94619889539.8788 entropy=17.5143 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 142880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-300329.5 mean_steps=16.4
|
|
[Episode 142890] reward=-119154589.6 actor_loss=0.3647 critic_loss=97184993280.0000 entropy=17.5220 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 142900] reward=-121141057.6 actor_loss=0.2742 critic_loss=97357492315.0222 entropy=17.5225 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 142900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530912.4 mean_steps=13.5
|
|
[Episode 142910] reward=-132966628.2 actor_loss=0.3290 critic_loss=1192322588125.8667 entropy=17.5483 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 142920] reward=-118920969.6 actor_loss=0.2984 critic_loss=91316593550.2222 entropy=17.5530 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 142920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-579314.4 mean_steps=13.3
|
|
[Episode 142930] reward=-116844447.3 actor_loss=0.3689 critic_loss=98975318835.2000 entropy=17.5398 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 142940] reward=-121812048.0 actor_loss=0.2295 critic_loss=331032359647.1795 entropy=17.5478 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 142940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-384375.5 mean_steps=15.1
|
|
[Episode 142950] reward=-121006734.0 actor_loss=0.2223 critic_loss=93500762720.8649 entropy=17.5606 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 142960] reward=-118478915.8 actor_loss=0.3861 critic_loss=95209178726.4000 entropy=17.5560 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 142960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472098.0 mean_steps=13.3
|
|
[Episode 142970] reward=-112879536.3 actor_loss=0.2690 critic_loss=91655788071.3846 entropy=17.5633 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 142980] reward=-119104807.2 actor_loss=0.3257 critic_loss=110743061845.3333 entropy=17.5615 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 142980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-539942.5 mean_steps=12.4
|
|
[Episode 142990] reward=-115679347.6 actor_loss=0.3469 critic_loss=94516773410.1333 entropy=17.5560 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 143000] reward=-118128849.5 actor_loss=0.2350 critic_loss=90464645757.1555 entropy=17.5498 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 143000] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-275883.6 mean_steps=17.2
|
|
[Episode 143010] reward=-123848550.0 actor_loss=0.1899 critic_loss=100814871119.6444 entropy=17.5581 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 143020] reward=-116708780.9 actor_loss=0.2948 critic_loss=93059156832.7111 entropy=17.5652 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 143020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573118.9 mean_steps=13.7
|
|
[Episode 143030] reward=-118758108.7 actor_loss=0.2026 critic_loss=90938849034.2400 entropy=17.5402 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 143040] reward=-121913194.4 actor_loss=0.2716 critic_loss=97137002741.7600 entropy=17.5452 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 143040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-475617.0 mean_steps=15.4
|
|
[Episode 143050] reward=-111158552.0 actor_loss=0.3287 critic_loss=89266914508.8000 entropy=17.5428 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 143060] reward=-121727528.6 actor_loss=0.2634 critic_loss=99733903252.2105 entropy=17.5435 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 143060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613159.5 mean_steps=12.5
|
|
[Episode 143070] reward=-121659283.6 actor_loss=0.4069 critic_loss=101307945828.8485 entropy=17.5338 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 143080] reward=-119395868.7 actor_loss=0.2373 critic_loss=95639877859.5556 entropy=17.5194 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 143080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449919.1 mean_steps=15.3
|
|
[Episode 143090] reward=-120902823.2 actor_loss=0.2310 critic_loss=99606550055.3846 entropy=17.5104 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 143100] reward=-119749351.3 actor_loss=0.3026 critic_loss=95530974503.8222 entropy=17.5015 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 143100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477252.6 mean_steps=13.8
|
|
[Episode 143110] reward=-115300062.8 actor_loss=0.3708 critic_loss=97031002298.1818 entropy=17.4931 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 143120] reward=-118426030.1 actor_loss=0.2574 critic_loss=93037911707.8261 entropy=17.5056 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 143120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521428.1 mean_steps=14.2
|
|
[Episode 143130] reward=-119061447.3 actor_loss=0.2727 critic_loss=94018127644.4444 entropy=17.5107 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 143140] reward=-116555575.9 actor_loss=0.3295 critic_loss=111102576584.6487 entropy=17.5042 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 143140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544446.1 mean_steps=12.9
|
|
[Episode 143150] reward=-118697907.2 actor_loss=0.3076 critic_loss=168639363674.3529 entropy=17.5191 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 143160] reward=-119413951.9 actor_loss=0.3772 critic_loss=232818286201.9048 entropy=17.5203 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 143160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623057.6 mean_steps=12.8
|
|
[Episode 143170] reward=-116611113.8 actor_loss=0.2586 critic_loss=91385142914.9767 entropy=17.5057 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 143180] reward=-122156050.0 actor_loss=0.2117 critic_loss=148842441113.6000 entropy=17.5082 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 143180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-622739.1 mean_steps=13.0
|
|
[Episode 143190] reward=-122743609.8 actor_loss=0.2682 critic_loss=94379225987.1219 entropy=17.5117 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 143200] reward=-120777581.6 actor_loss=0.1985 critic_loss=98455193307.4286 entropy=17.4957 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 143200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413217.0 mean_steps=16.4
|
|
[Episode 143210] reward=-116033589.2 actor_loss=0.2857 critic_loss=91982941184.0000 entropy=17.4969 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 143220] reward=-115960174.7 actor_loss=0.1991 critic_loss=98959339835.0769 entropy=17.5050 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 143220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523674.6 mean_steps=14.0
|
|
[Episode 143230] reward=-122916186.7 actor_loss=0.2583 critic_loss=99713603356.4444 entropy=17.5068 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 143240] reward=-117082226.6 actor_loss=0.2527 critic_loss=93550487738.1818 entropy=17.5104 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 143240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549182.6 mean_steps=14.7
|
|
[Episode 143250] reward=-116232646.7 actor_loss=0.3811 critic_loss=93708317660.6897 entropy=17.5027 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 143260] reward=-118087646.8 actor_loss=0.3001 critic_loss=92031830395.2593 entropy=17.4998 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 143260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574554.9 mean_steps=13.6
|
|
[Episode 143270] reward=-116674272.6 actor_loss=0.3291 critic_loss=92237406208.0000 entropy=17.4857 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 143280] reward=-118834184.4 actor_loss=0.3029 critic_loss=90801803537.0667 entropy=17.4674 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 143280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-593463.9 mean_steps=13.5
|
|
[Episode 143290] reward=-114147319.1 actor_loss=0.2387 critic_loss=87214968832.0000 entropy=17.4589 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 143300] reward=-119085540.0 actor_loss=0.2690 critic_loss=93036609536.0000 entropy=17.4757 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 143300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463350.8 mean_steps=13.6
|
|
[Episode 143310] reward=-113193390.3 actor_loss=0.2911 critic_loss=87055908571.4286 entropy=17.4616 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 143320] reward=-123339151.1 actor_loss=0.2485 critic_loss=100458281369.6000 entropy=17.4590 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 143320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644628.8 mean_steps=12.2
|
|
[Episode 143330] reward=-116586380.3 actor_loss=0.3166 critic_loss=92289032432.9412 entropy=17.4570 approx_kl=0.0116 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 143340] reward=-113075579.3 actor_loss=0.3178 critic_loss=89741117978.9474 entropy=17.4585 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 143340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492929.8 mean_steps=13.9
|
|
[Episode 143350] reward=-120202819.6 actor_loss=0.3108 critic_loss=94110934630.4000 entropy=17.4580 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 143360] reward=-121443659.7 actor_loss=0.2838 critic_loss=102307643064.3200 entropy=17.4574 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 143360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-564670.7 mean_steps=14.2
|
|
[Episode 143370] reward=-123656945.7 actor_loss=0.3240 critic_loss=104027224473.6000 entropy=17.4446 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 143380] reward=-115000935.9 actor_loss=0.4155 critic_loss=83142355378.4242 entropy=17.4365 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 143380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509919.8 mean_steps=13.8
|
|
[Episode 143390] reward=-118681578.5 actor_loss=0.2724 critic_loss=103127825612.8000 entropy=17.4384 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 143400] reward=-113375913.3 actor_loss=0.3409 critic_loss=83597127555.8788 entropy=17.4359 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 143400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528173.8 mean_steps=13.8
|
|
[Episode 143410] reward=-122232885.8 actor_loss=0.2366 critic_loss=95606448128.0000 entropy=17.4372 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 143420] reward=-117303190.2 actor_loss=0.2631 critic_loss=93572031374.2222 entropy=17.4473 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 143420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-392976.5 mean_steps=15.1
|
|
[Episode 143430] reward=-118096095.7 actor_loss=0.2686 critic_loss=91925634575.5152 entropy=17.4533 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 143440] reward=-113106175.8 actor_loss=0.3049 critic_loss=85211298710.0690 entropy=17.4596 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 143440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553221.0 mean_steps=14.4
|
|
[Episode 143450] reward=-123285727.3 actor_loss=0.2573 critic_loss=98901065412.9231 entropy=17.4623 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 143460] reward=-121370140.6 actor_loss=0.2803 critic_loss=99105716473.7561 entropy=17.4601 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 143460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489352.3 mean_steps=15.0
|
|
[Episode 143470] reward=-123079640.6 actor_loss=0.2397 critic_loss=97547258441.1429 entropy=17.4647 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 143480] reward=-116987834.5 actor_loss=0.3314 critic_loss=90475662963.6129 entropy=17.4705 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 143480] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-668132.9 mean_steps=11.2
|
|
[Episode 143490] reward=-122818926.4 actor_loss=0.3876 critic_loss=102266380288.0000 entropy=17.4582 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 143500] reward=-117284612.8 actor_loss=0.3351 critic_loss=94585416908.8000 entropy=17.4463 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 143500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540427.0 mean_steps=13.1
|
|
[Episode 143510] reward=-121295278.0 actor_loss=0.3304 critic_loss=99003278287.2381 entropy=17.4506 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 143520] reward=-121801084.6 actor_loss=0.2987 critic_loss=95912392567.4667 entropy=17.4453 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 143520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-314664.0 mean_steps=16.4
|
|
[Episode 143530] reward=-110134893.2 actor_loss=0.3601 critic_loss=82560489335.4667 entropy=17.4428 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 143540] reward=-116749328.1 actor_loss=0.2930 critic_loss=90691196427.3778 entropy=17.4414 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 143540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465922.8 mean_steps=15.2
|
|
[Episode 143550] reward=-117378255.5 actor_loss=0.2569 critic_loss=89369928226.1333 entropy=17.4441 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 143560] reward=-113567910.7 actor_loss=0.3457 critic_loss=91062251064.8889 entropy=17.4461 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 143560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-619558.7 mean_steps=13.8
|
|
[Episode 143570] reward=-111800058.8 actor_loss=0.3365 critic_loss=86082626628.2667 entropy=17.4596 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 143580] reward=-123660661.4 actor_loss=0.2115 critic_loss=95660928022.7556 entropy=17.4774 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 143580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555190.8 mean_steps=13.4
|
|
[Episode 143590] reward=-121522592.9 actor_loss=0.1413 critic_loss=93135577770.6667 entropy=17.5089 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 143600] reward=-119515844.8 actor_loss=0.2501 critic_loss=95948500619.6364 entropy=17.5278 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 143600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-629629.7 mean_steps=12.6
|
|
[Episode 143610] reward=-118288176.3 actor_loss=0.2468 critic_loss=88074450176.0000 entropy=17.5352 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 143620] reward=-113247382.6 actor_loss=0.3389 critic_loss=89974034523.0222 entropy=17.5251 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 143620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507888.4 mean_steps=13.0
|
|
[Episode 143630] reward=-124449132.3 actor_loss=0.0734 critic_loss=97416680399.2381 entropy=17.5275 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 143640] reward=-118860884.8 actor_loss=0.2555 critic_loss=93818082397.0909 entropy=17.5325 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 143640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571578.0 mean_steps=12.3
|
|
[Episode 143650] reward=-115569010.4 actor_loss=0.3390 critic_loss=94775906986.6667 entropy=17.5384 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 143660] reward=-115168591.9 actor_loss=0.3434 critic_loss=92342199910.4000 entropy=17.5374 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 143660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-652148.1 mean_steps=12.8
|
|
[Episode 143670] reward=-116194189.3 actor_loss=0.3449 critic_loss=90463928115.2000 entropy=17.5405 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 143680] reward=-122641408.4 actor_loss=0.3580 critic_loss=116186996121.6000 entropy=17.5575 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 143680] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-255710.6 mean_steps=18.0
|
|
[Episode 143690] reward=-118852896.6 actor_loss=0.2717 critic_loss=97248195041.8824 entropy=17.5534 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 143700] reward=-119773865.4 actor_loss=0.3314 critic_loss=95038861527.5789 entropy=17.5473 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 143700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-370141.9 mean_steps=16.6
|
|
[Episode 143710] reward=-117482137.3 actor_loss=0.2864 critic_loss=94128446857.8462 entropy=17.5533 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 143720] reward=-115215841.2 actor_loss=0.4185 critic_loss=93108415587.0968 entropy=17.5552 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 143720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505156.3 mean_steps=13.8
|
|
[Episode 143730] reward=-114225826.3 actor_loss=0.2863 critic_loss=101138055168.0000 entropy=17.5646 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 143740] reward=-118061045.7 actor_loss=0.1711 critic_loss=94710108866.2069 entropy=17.5620 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 143740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-379360.1 mean_steps=16.5
|
|
[Episode 143750] reward=-123766436.6 actor_loss=0.2796 critic_loss=98584224743.0244 entropy=17.5713 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 143760] reward=-118993355.1 actor_loss=0.2836 critic_loss=94781025043.6923 entropy=17.5702 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 143760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541213.1 mean_steps=13.1
|
|
[Episode 143770] reward=-122024674.0 actor_loss=0.1918 critic_loss=95680207023.5429 entropy=17.5770 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 143780] reward=-114386350.9 actor_loss=0.3973 critic_loss=89474452257.3913 entropy=17.5852 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 143780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-655918.1 mean_steps=12.9
|
|
[Episode 143790] reward=-119368557.5 actor_loss=0.2785 critic_loss=96818896310.8571 entropy=17.6076 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 143800] reward=-114293773.7 actor_loss=0.3412 critic_loss=91823424512.0000 entropy=17.6021 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 143800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597986.7 mean_steps=12.8
|
|
[Episode 143810] reward=-115829555.6 actor_loss=0.4331 critic_loss=92930028430.2222 entropy=17.6037 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 143820] reward=-116038800.9 actor_loss=0.3100 critic_loss=95586201600.0000 entropy=17.5891 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 143820] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-659393.7 mean_steps=11.2
|
|
[Episode 143830] reward=-122074957.1 actor_loss=0.3261 critic_loss=111242016229.0526 entropy=17.5810 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 143840] reward=-118480318.1 actor_loss=0.2402 critic_loss=87383767722.6667 entropy=17.5733 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 143840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444374.1 mean_steps=14.3
|
|
[Episode 143850] reward=-116854969.4 actor_loss=0.2691 critic_loss=94702255265.6842 entropy=17.5823 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 143860] reward=-123684437.6 actor_loss=0.3076 critic_loss=394327109756.1212 entropy=17.5742 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 143860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-1393099.8 mean_steps=17.6
|
|
[Episode 143870] reward=-117336108.4 actor_loss=0.2702 critic_loss=90296658365.2174 entropy=17.5904 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 143880] reward=-118769019.6 actor_loss=0.3029 critic_loss=98833835091.0270 entropy=17.5841 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 143880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472649.1 mean_steps=14.8
|
|
[Episode 143890] reward=-119329074.9 actor_loss=0.3375 critic_loss=96443092867.8788 entropy=17.5883 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 143900] reward=-119359735.5 actor_loss=0.1792 critic_loss=97692050318.2222 entropy=17.6137 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 143900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-527491.9 mean_steps=11.3
|
|
[Episode 143910] reward=-122466688.6 actor_loss=0.2832 critic_loss=98029327226.4348 entropy=17.6259 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 143920] reward=-112639185.2 actor_loss=0.3518 critic_loss=92985388860.9524 entropy=17.6204 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 143920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-677438.8 mean_steps=13.1
|
|
[Episode 143930] reward=-120860768.0 actor_loss=0.3240 critic_loss=106299114540.5217 entropy=17.6155 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 143940] reward=-121130409.9 actor_loss=0.2001 critic_loss=156495200968.3478 entropy=17.6232 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 143940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-605989.0 mean_steps=12.7
|
|
[Episode 143950] reward=-114841528.4 actor_loss=0.2828 critic_loss=93994370914.4615 entropy=17.6111 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 143960] reward=-121834556.6 actor_loss=0.2867 critic_loss=99735683453.0233 entropy=17.6052 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 143960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495833.9 mean_steps=14.1
|
|
[Episode 143970] reward=-116498613.3 actor_loss=0.3381 critic_loss=91610862290.8235 entropy=17.6017 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 143980] reward=-119461300.7 actor_loss=0.2465 critic_loss=97196282096.9412 entropy=17.5834 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 143980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555754.7 mean_steps=13.2
|
|
[Episode 143990] reward=-117699445.7 actor_loss=0.2592 critic_loss=99205742160.8421 entropy=17.5909 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 144000] reward=-118811098.0 actor_loss=0.3141 critic_loss=93561290934.0444 entropy=17.5832 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 144000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546117.4 mean_steps=13.4
|
|
[Episode 144010] reward=-123115847.5 actor_loss=0.2355 critic_loss=192085549966.2222 entropy=17.5813 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 144020] reward=-117537650.2 actor_loss=0.2925 critic_loss=92215450009.6000 entropy=17.5726 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 144020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635325.2 mean_steps=11.9
|
|
[Episode 144030] reward=-119401920.7 actor_loss=0.3253 critic_loss=93358739561.0256 entropy=17.5571 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 144040] reward=-121053542.9 actor_loss=0.2376 critic_loss=103172703678.3590 entropy=17.5789 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 144040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-711143.4 mean_steps=12.8
|
|
[Episode 144050] reward=-119356665.1 actor_loss=0.3624 critic_loss=95522210398.8148 entropy=17.5731 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 144060] reward=-117011648.8 actor_loss=0.3581 critic_loss=96001423951.6444 entropy=17.5706 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 144060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490758.4 mean_steps=13.7
|
|
[Episode 144070] reward=-120131649.9 actor_loss=0.2744 critic_loss=104947252428.8000 entropy=17.5729 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 144080] reward=-121055213.8 actor_loss=0.1825 critic_loss=95595671552.0000 entropy=17.5760 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 144080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553453.4 mean_steps=13.1
|
|
[Episode 144090] reward=-118630313.0 actor_loss=0.3298 critic_loss=94595029583.6444 entropy=17.5565 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 144100] reward=-116709185.0 actor_loss=0.2715 critic_loss=91775388478.2703 entropy=17.5520 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 144100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435317.2 mean_steps=15.1
|
|
[Episode 144110] reward=-118712775.4 actor_loss=0.2601 critic_loss=96000833763.5556 entropy=17.5482 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 144120] reward=-120539827.6 actor_loss=0.2291 critic_loss=96894482841.6000 entropy=17.5385 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 144120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-499329.3 mean_steps=15.7
|
|
[Episode 144130] reward=-115586435.6 actor_loss=0.2675 critic_loss=92323242530.1333 entropy=17.5178 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 144140] reward=-122361950.4 actor_loss=0.2518 critic_loss=99543064393.9556 entropy=17.5208 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 144140] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-564085.0 mean_steps=11.9
|
|
[Episode 144150] reward=-117332398.9 actor_loss=0.3286 critic_loss=91613557613.7143 entropy=17.5145 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 144160] reward=-115573266.7 actor_loss=0.3201 critic_loss=94041784320.0000 entropy=17.4964 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 144160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-396339.7 mean_steps=16.9
|
|
[Episode 144170] reward=-112371510.1 actor_loss=0.3749 critic_loss=85528932352.0000 entropy=17.4923 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 144180] reward=-112857518.2 actor_loss=0.4017 critic_loss=89691495719.8222 entropy=17.5161 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 144180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611622.5 mean_steps=12.6
|
|
[Episode 144190] reward=-116420392.9 actor_loss=0.3425 critic_loss=88266045537.5238 entropy=17.5142 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 144200] reward=-116656808.1 actor_loss=0.2958 critic_loss=90900359486.5778 entropy=17.5216 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 144200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-530876.9 mean_steps=12.0
|
|
[Episode 144210] reward=-120147539.0 actor_loss=0.2569 critic_loss=91595266275.5556 entropy=17.5035 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 144220] reward=-118344341.2 actor_loss=0.2139 critic_loss=89366701488.3556 entropy=17.5046 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 144220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-407766.8 mean_steps=14.9
|
|
[Episode 144230] reward=-117624928.6 actor_loss=0.2434 critic_loss=90883908858.3111 entropy=17.5234 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 144240] reward=-112315449.0 actor_loss=0.2354 critic_loss=88800504490.6667 entropy=17.5154 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 144240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-598975.5 mean_steps=11.8
|
|
[Episode 144250] reward=-115094707.0 actor_loss=0.3435 critic_loss=93962795885.7143 entropy=17.5368 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 144260] reward=-116749522.0 actor_loss=0.3164 critic_loss=93326434713.6000 entropy=17.5560 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 144260] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-689777.7 mean_steps=11.5
|
|
[Episode 144270] reward=-119012301.7 actor_loss=0.2315 critic_loss=95648520704.0000 entropy=17.5535 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 144280] reward=-117718689.6 actor_loss=0.2105 critic_loss=147102536908.8000 entropy=17.5415 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 144280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391686.0 mean_steps=15.4
|
|
[Episode 144290] reward=-118889569.1 actor_loss=0.3124 critic_loss=97096344644.2667 entropy=17.5366 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 144300] reward=-117807187.2 actor_loss=0.2135 critic_loss=90082197248.0000 entropy=17.5354 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 144300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454853.0 mean_steps=15.3
|
|
[Episode 144310] reward=-119776730.2 actor_loss=0.2946 critic_loss=96185784681.4118 entropy=17.5302 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 144320] reward=-115903568.5 actor_loss=0.2532 critic_loss=87727834286.8293 entropy=17.5206 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 144320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409869.2 mean_steps=15.7
|
|
[Episode 144330] reward=-117154962.6 actor_loss=0.3258 critic_loss=95284868437.3333 entropy=17.5056 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 144340] reward=-118219061.2 actor_loss=0.2954 critic_loss=92325588676.9231 entropy=17.5089 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 144340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547799.6 mean_steps=13.8
|
|
[Episode 144350] reward=-116673181.9 actor_loss=0.3019 critic_loss=87868774455.3513 entropy=17.5130 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 144360] reward=-117765905.9 actor_loss=0.3157 critic_loss=91826619331.7647 entropy=17.5178 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 144360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484207.6 mean_steps=13.2
|
|
[Episode 144370] reward=-111992237.6 actor_loss=0.3090 critic_loss=89447037289.4118 entropy=17.5208 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 144380] reward=-119811046.3 actor_loss=0.2862 critic_loss=94083851087.4483 entropy=17.5201 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 144380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509422.3 mean_steps=12.7
|
|
[Episode 144390] reward=-114830809.8 actor_loss=0.2334 critic_loss=87040936618.6667 entropy=17.5386 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 144400] reward=-113395194.5 actor_loss=0.2523 critic_loss=87004684288.0000 entropy=17.5488 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 144400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496789.4 mean_steps=13.8
|
|
[Episode 144410] reward=-118842582.7 actor_loss=0.2787 critic_loss=92877470651.7333 entropy=17.5491 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 144420] reward=-118909292.6 actor_loss=0.2814 critic_loss=95966181218.4615 entropy=17.5452 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 144420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-468760.5 mean_steps=15.2
|
|
[Episode 144430] reward=-123628310.8 actor_loss=0.3064 critic_loss=103102189216.9143 entropy=17.5582 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 144440] reward=-119544372.4 actor_loss=0.2519 critic_loss=94757158229.3333 entropy=17.5574 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 144440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-530614.0 mean_steps=14.8
|
|
[Episode 144450] reward=-115068693.1 actor_loss=0.3012 critic_loss=88450682507.6364 entropy=17.5676 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 144460] reward=-116954493.0 actor_loss=0.2232 critic_loss=90963010992.3556 entropy=17.5846 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 144460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478137.2 mean_steps=13.5
|
|
[Episode 144470] reward=-116318789.6 actor_loss=0.2981 critic_loss=95424614855.1111 entropy=17.5714 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 144480] reward=-114301547.2 actor_loss=0.3102 critic_loss=91945596820.2105 entropy=17.5724 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 144480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556655.9 mean_steps=14.2
|
|
[Episode 144490] reward=-119567194.0 actor_loss=0.2036 critic_loss=92937735190.7556 entropy=17.5791 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 144500] reward=-116040546.6 actor_loss=0.2939 critic_loss=94074040483.8400 entropy=17.5728 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 144500] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-637785.8 mean_steps=11.1
|
|
[Episode 144510] reward=-115746064.7 actor_loss=0.3228 critic_loss=98252563342.2222 entropy=17.5653 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 144520] reward=-120475583.4 actor_loss=0.3226 critic_loss=95833494708.7059 entropy=17.5601 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 144520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577387.0 mean_steps=12.4
|
|
[Episode 144530] reward=-116481541.2 actor_loss=0.3250 critic_loss=92344607092.3636 entropy=17.5667 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 144540] reward=-113809169.9 actor_loss=0.4060 critic_loss=88004443249.7778 entropy=17.5681 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 144540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499212.1 mean_steps=15.1
|
|
[Episode 144550] reward=-115721512.7 actor_loss=0.3288 critic_loss=88479042218.6667 entropy=17.5654 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 144560] reward=-117623498.8 actor_loss=0.2185 critic_loss=91697177668.2667 entropy=17.5591 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 144560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458091.6 mean_steps=14.7
|
|
[Episode 144570] reward=-121348204.3 actor_loss=0.3560 critic_loss=95151548092.6316 entropy=17.5700 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 144580] reward=-121134214.6 actor_loss=0.2816 critic_loss=95405550040.6154 entropy=17.5507 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 144580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496826.9 mean_steps=13.8
|
|
[Episode 144590] reward=-126273213.6 actor_loss=0.2200 critic_loss=133245352527.6444 entropy=17.5511 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 144600] reward=-110535504.9 actor_loss=0.3510 critic_loss=89083632718.7692 entropy=17.5489 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 144600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-461745.6 mean_steps=12.8
|
|
[Episode 144610] reward=-124017086.0 actor_loss=0.2440 critic_loss=102126392292.3243 entropy=17.5555 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 144620] reward=-117606145.4 actor_loss=0.2805 critic_loss=92309406937.2121 entropy=17.5479 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 144620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505950.7 mean_steps=13.0
|
|
[Episode 144630] reward=-113613790.0 actor_loss=0.3806 critic_loss=85082327332.5714 entropy=17.5504 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 144640] reward=-116883375.3 actor_loss=0.2913 critic_loss=101552363074.7826 entropy=17.5611 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 144640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472636.5 mean_steps=14.6
|
|
[Episode 144650] reward=-120248732.0 actor_loss=0.3437 critic_loss=95278734774.8571 entropy=17.5560 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 144660] reward=-120145349.9 actor_loss=0.3504 critic_loss=92547344856.6154 entropy=17.5679 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 144660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474884.2 mean_steps=13.7
|
|
[Episode 144670] reward=-119800875.1 actor_loss=0.2380 critic_loss=91011145435.4286 entropy=17.5666 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 144680] reward=-121541278.6 actor_loss=0.2767 critic_loss=95357680951.6522 entropy=17.5721 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 144680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440694.8 mean_steps=14.4
|
|
[Episode 144690] reward=-113105413.9 actor_loss=0.3026 critic_loss=84248556813.4737 entropy=17.5732 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 144700] reward=-119042640.7 actor_loss=0.3784 critic_loss=93548839227.0769 entropy=17.5699 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 144700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433209.6 mean_steps=14.2
|
|
[Episode 144710] reward=-126443797.4 actor_loss=0.2317 critic_loss=101105992238.5455 entropy=17.5753 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 144720] reward=-117641147.3 actor_loss=0.2654 critic_loss=92206441758.7200 entropy=17.5835 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 144720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487596.0 mean_steps=13.8
|
|
[Episode 144730] reward=-117276785.4 actor_loss=0.2996 critic_loss=93064171246.9333 entropy=17.5812 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 144740] reward=-114058578.3 actor_loss=0.2539 critic_loss=86047259111.6190 entropy=17.5841 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 144740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540628.4 mean_steps=14.2
|
|
[Episode 144750] reward=-117307873.0 actor_loss=0.4066 critic_loss=93781745336.3200 entropy=17.5844 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 144760] reward=-123991656.1 actor_loss=0.1878 critic_loss=96787310803.8621 entropy=17.5871 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 144760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509046.0 mean_steps=12.9
|
|
[Episode 144770] reward=-116584588.9 actor_loss=0.2479 critic_loss=91123061289.5135 entropy=17.5924 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 144780] reward=-119891142.6 actor_loss=0.3991 critic_loss=95016443221.3333 entropy=17.5896 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 144780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576567.5 mean_steps=13.6
|
|
[Episode 144790] reward=-115430669.6 actor_loss=0.4059 critic_loss=88266222738.2857 entropy=17.5820 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 144800] reward=-121022241.2 actor_loss=0.3259 critic_loss=97677557328.8421 entropy=17.5724 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 144800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504969.0 mean_steps=13.7
|
|
[Episode 144810] reward=-118653748.8 actor_loss=0.2814 critic_loss=98600825949.0909 entropy=17.5788 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 144820] reward=-122671603.8 actor_loss=0.2274 critic_loss=91988462432.7111 entropy=17.5708 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 144820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589786.5 mean_steps=13.5
|
|
[Episode 144830] reward=-117032636.1 actor_loss=0.2685 critic_loss=89017115306.6667 entropy=17.5706 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 144840] reward=-122935061.8 actor_loss=0.2753 critic_loss=101889599733.7600 entropy=17.5779 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 144840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598118.9 mean_steps=12.4
|
|
[Episode 144850] reward=-122943485.5 actor_loss=0.3407 critic_loss=97033023355.8710 entropy=17.5683 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 144860] reward=-119524834.3 actor_loss=0.3581 critic_loss=93574123378.7586 entropy=17.5654 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 144860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500429.0 mean_steps=13.7
|
|
[Episode 144870] reward=-119275591.2 actor_loss=0.2197 critic_loss=94544999461.9259 entropy=17.5666 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 144880] reward=-119224643.0 actor_loss=0.2487 critic_loss=95706148217.2632 entropy=17.5703 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 144880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-476717.4 mean_steps=12.8
|
|
[Episode 144890] reward=-120396439.8 actor_loss=0.3113 critic_loss=97087515648.0000 entropy=17.5868 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 144900] reward=-113706818.4 actor_loss=0.2398 critic_loss=87704353240.6154 entropy=17.6052 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 144900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509580.9 mean_steps=13.7
|
|
[Episode 144910] reward=-119910516.9 actor_loss=0.2374 critic_loss=125625503288.8889 entropy=17.6030 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 144920] reward=-121798323.8 actor_loss=0.1843 critic_loss=96335548631.5789 entropy=17.5842 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 144920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539605.7 mean_steps=13.2
|
|
[Episode 144930] reward=-126276911.7 actor_loss=0.2568 critic_loss=104094776797.8667 entropy=17.5895 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 144940] reward=-109669154.2 actor_loss=0.2703 critic_loss=81656032096.7111 entropy=17.5962 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 144940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378896.0 mean_steps=15.8
|
|
[Episode 144950] reward=-122036725.7 actor_loss=0.2711 critic_loss=97187063053.4737 entropy=17.6044 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 144960] reward=-112983151.1 actor_loss=0.2235 critic_loss=84761380864.0000 entropy=17.6073 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 144960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528524.5 mean_steps=13.2
|
|
[Episode 144970] reward=-119917656.6 actor_loss=0.2524 critic_loss=93007280020.2105 entropy=17.6050 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 144980] reward=-118599685.3 actor_loss=0.4310 critic_loss=93875926016.0000 entropy=17.5960 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 144980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-640650.3 mean_steps=12.7
|
|
[Episode 144990] reward=-120603128.1 actor_loss=0.2872 critic_loss=94371116236.8000 entropy=17.5912 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 145000] reward=-119892392.3 actor_loss=0.2705 critic_loss=96552676745.8462 entropy=17.5841 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 145000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515464.1 mean_steps=13.6
|
|
[Episode 145010] reward=-117499365.5 actor_loss=0.3130 critic_loss=90326367649.1852 entropy=17.5781 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 145020] reward=-119003847.5 actor_loss=0.3528 critic_loss=93607827228.4444 entropy=17.5858 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 145020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-472458.7 mean_steps=15.2
|
|
[Episode 145030] reward=-116398939.6 actor_loss=0.2623 critic_loss=91744260818.8235 entropy=17.5922 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 145040] reward=-115322709.1 actor_loss=0.2694 critic_loss=89546045147.4286 entropy=17.5828 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 145040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351582.4 mean_steps=15.8
|
|
[Episode 145050] reward=-113461609.1 actor_loss=0.4272 critic_loss=87877394432.0000 entropy=17.5830 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 145060] reward=-121606900.3 actor_loss=0.2234 critic_loss=98044248420.1739 entropy=17.5758 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 145060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-492397.8 mean_steps=12.8
|
|
[Episode 145070] reward=-118454006.3 actor_loss=0.2344 critic_loss=98142226204.4444 entropy=17.5848 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 145080] reward=-116954890.9 actor_loss=0.2582 critic_loss=93692567369.9556 entropy=17.5869 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 145080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496475.2 mean_steps=14.8
|
|
[Episode 145090] reward=-122491169.2 actor_loss=0.2779 critic_loss=97481097636.1026 entropy=17.6064 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 145100] reward=-118662054.5 actor_loss=0.2929 critic_loss=100241024341.3333 entropy=17.5986 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 145100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437156.1 mean_steps=15.3
|
|
[Episode 145110] reward=-123846729.3 actor_loss=0.1894 critic_loss=99204759552.0000 entropy=17.5925 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 145120] reward=-116764547.2 actor_loss=0.3859 critic_loss=100690772514.1333 entropy=17.5839 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 145120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500487.6 mean_steps=13.7
|
|
[Episode 145130] reward=-114745495.5 actor_loss=0.2852 critic_loss=91793838970.4348 entropy=17.5795 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 145140] reward=-122401953.9 actor_loss=0.3274 critic_loss=96404953770.6667 entropy=17.5857 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 145140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565448.7 mean_steps=13.1
|
|
[Episode 145150] reward=-122266190.0 actor_loss=0.3709 critic_loss=101201287850.6667 entropy=17.5897 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 145160] reward=-117625537.8 actor_loss=0.3027 critic_loss=89716806997.3333 entropy=17.5845 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 145160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-478667.2 mean_steps=15.5
|
|
[Episode 145170] reward=-121012036.1 actor_loss=0.3239 critic_loss=95482567720.9600 entropy=17.5841 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 145180] reward=-119990727.4 actor_loss=0.2184 critic_loss=103644609331.2000 entropy=17.5792 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 145180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-423263.7 mean_steps=13.5
|
|
[Episode 145190] reward=-123145211.3 actor_loss=0.3588 critic_loss=97094328926.8148 entropy=17.5784 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 145200] reward=-119675261.9 actor_loss=0.3281 critic_loss=92927016528.8421 entropy=17.5676 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 145200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538704.4 mean_steps=13.9
|
|
[Episode 145210] reward=-118794528.5 actor_loss=0.3097 critic_loss=90688699298.9091 entropy=17.5880 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 145220] reward=-115768483.1 actor_loss=0.3100 critic_loss=86487996229.8182 entropy=17.5764 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 145220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-373150.8 mean_steps=15.4
|
|
[Episode 145230] reward=-118213738.2 actor_loss=0.2644 critic_loss=93654521856.0000 entropy=17.5759 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 145240] reward=-121958560.1 actor_loss=0.2981 critic_loss=97087118066.5263 entropy=17.5753 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 145240] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-677752.8 mean_steps=11.2
|
|
[Episode 145250] reward=-124540279.4 actor_loss=0.2253 critic_loss=99487923896.3200 entropy=17.5852 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 145260] reward=-122446125.1 actor_loss=0.3347 critic_loss=98036886683.1515 entropy=17.5798 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 145260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534310.7 mean_steps=13.8
|
|
[Episode 145270] reward=-119546927.6 actor_loss=0.3106 critic_loss=91583754435.0476 entropy=17.5769 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 145280] reward=-121762739.3 actor_loss=0.2955 critic_loss=99192509969.6552 entropy=17.5700 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 145280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-406476.6 mean_steps=15.8
|
|
[Episode 145290] reward=-125228064.1 actor_loss=0.2439 critic_loss=100549204260.5714 entropy=17.5688 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 145300] reward=-117098665.7 actor_loss=0.2558 critic_loss=90222643129.3793 entropy=17.5763 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 145300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476630.2 mean_steps=14.4
|
|
[Episode 145310] reward=-124487223.1 actor_loss=0.1946 critic_loss=97590561626.8387 entropy=17.5963 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 145320] reward=-115753673.5 actor_loss=0.4216 critic_loss=96809880877.1765 entropy=17.5941 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 145320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589415.0 mean_steps=12.3
|
|
[Episode 145330] reward=-117320480.6 actor_loss=0.4419 critic_loss=89732685004.8000 entropy=17.5938 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1523 front_blocked=0
|
|
[Episode 145340] reward=-116906103.2 actor_loss=0.3035 critic_loss=143473343634.2857 entropy=17.6057 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 145340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-356897.4 mean_steps=16.4
|
|
[Episode 145350] reward=-120816205.7 actor_loss=0.2126 critic_loss=89120368230.4000 entropy=17.6091 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 145360] reward=-121512700.5 actor_loss=0.2900 critic_loss=93998586928.7619 entropy=17.6062 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 145360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600951.7 mean_steps=12.4
|
|
[Episode 145370] reward=-113195684.7 actor_loss=0.2829 critic_loss=88917471053.9130 entropy=17.5974 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 145380] reward=-116125085.0 actor_loss=0.3202 critic_loss=92838741654.5882 entropy=17.5932 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 145380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-409614.7 mean_steps=13.7
|
|
[Episode 145390] reward=-118620513.4 actor_loss=0.3494 critic_loss=94744629488.9412 entropy=17.6003 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 145400] reward=-117462561.8 actor_loss=0.4133 critic_loss=94486362473.4118 entropy=17.5902 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 145400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-467129.1 mean_steps=15.2
|
|
[Episode 145410] reward=-119102354.5 actor_loss=0.2995 critic_loss=89558247156.8696 entropy=17.5762 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 145420] reward=-116898256.6 actor_loss=0.3649 critic_loss=92886402951.5294 entropy=17.5781 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 145420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532904.8 mean_steps=12.8
|
|
[Episode 145430] reward=-112448631.3 actor_loss=0.2385 critic_loss=91582936998.9565 entropy=17.5678 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 145440] reward=-122750549.1 actor_loss=0.2149 critic_loss=99212832256.0000 entropy=17.5638 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 145440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528470.7 mean_steps=13.9
|
|
[Episode 145450] reward=-120670798.6 actor_loss=0.2653 critic_loss=93865412537.3793 entropy=17.5711 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 145460] reward=-123242675.0 actor_loss=0.2907 critic_loss=95215094442.6667 entropy=17.5772 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 145460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-584081.4 mean_steps=13.3
|
|
[Episode 145470] reward=-118152596.7 actor_loss=0.2774 critic_loss=91031396625.0667 entropy=17.5738 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 145480] reward=-115262477.2 actor_loss=0.4180 critic_loss=86325218123.2941 entropy=17.5585 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 145480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463051.6 mean_steps=14.1
|
|
[Episode 145490] reward=-117552839.1 actor_loss=0.2587 critic_loss=91887710631.7241 entropy=17.5611 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 145500] reward=-120305593.0 actor_loss=0.3124 critic_loss=95493228972.6512 entropy=17.5658 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 145500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-604157.9 mean_steps=11.7
|
|
[Episode 145510] reward=-120027452.9 actor_loss=0.2722 critic_loss=89132685903.6444 entropy=17.5592 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 145520] reward=-116934192.9 actor_loss=0.3203 critic_loss=87694599661.0370 entropy=17.5682 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 145520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386673.6 mean_steps=15.6
|
|
[Episode 145530] reward=-115367429.2 actor_loss=0.3969 critic_loss=85725957811.8919 entropy=17.5576 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 145540] reward=-120742547.4 actor_loss=0.3207 critic_loss=100339406165.3333 entropy=17.5508 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 145540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559190.9 mean_steps=12.8
|
|
[Episode 145550] reward=-121628743.7 actor_loss=0.2718 critic_loss=92012163830.5185 entropy=17.5568 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 145560] reward=-116458553.7 actor_loss=0.3344 critic_loss=91656370585.6000 entropy=17.5707 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 145560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481385.5 mean_steps=14.5
|
|
[Episode 145570] reward=-118418053.5 actor_loss=0.3744 critic_loss=89423906923.7895 entropy=17.5667 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 145580] reward=-124695318.9 actor_loss=0.2720 critic_loss=98715150715.2593 entropy=17.5550 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 145580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501626.9 mean_steps=13.2
|
|
[Episode 145590] reward=-121365510.0 actor_loss=0.1842 critic_loss=93455422700.3077 entropy=17.5499 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 145600] reward=-120123161.0 actor_loss=0.2791 critic_loss=101062477036.3077 entropy=17.5578 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 145600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419509.5 mean_steps=15.2
|
|
[Episode 145610] reward=-117798940.6 actor_loss=0.2196 critic_loss=91583390378.6667 entropy=17.5585 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 145620] reward=-118568065.1 actor_loss=0.2699 critic_loss=97468461479.7241 entropy=17.5617 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 145620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550081.2 mean_steps=13.3
|
|
[Episode 145630] reward=-118350643.7 actor_loss=0.2881 critic_loss=91010490368.0000 entropy=17.5722 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 145640] reward=-122553650.4 actor_loss=0.2992 critic_loss=94494651572.7059 entropy=17.5619 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 145640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-601829.0 mean_steps=12.8
|
|
[Episode 145650] reward=-122560905.0 actor_loss=0.2111 critic_loss=98331898071.5789 entropy=17.5644 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 145660] reward=-120503154.8 actor_loss=0.2747 critic_loss=91092160785.0667 entropy=17.5663 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 145660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446388.2 mean_steps=14.4
|
|
[Episode 145670] reward=-115969265.8 actor_loss=0.1965 critic_loss=90752729616.5161 entropy=17.5603 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 145680] reward=-124587595.8 actor_loss=0.1850 critic_loss=114017989330.8235 entropy=17.5779 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 145680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-639546.2 mean_steps=15.2
|
|
[Episode 145690] reward=-110938876.6 actor_loss=0.3830 critic_loss=85063602371.0476 entropy=17.5820 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 145700] reward=-118387276.9 actor_loss=0.2757 critic_loss=87148799180.8000 entropy=17.5829 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 145700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439055.0 mean_steps=14.9
|
|
[Episode 145710] reward=-114433265.2 actor_loss=0.3165 critic_loss=88479744000.0000 entropy=17.5866 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 145720] reward=-122309218.5 actor_loss=0.3058 critic_loss=96468369408.0000 entropy=17.5892 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 145720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409359.3 mean_steps=15.2
|
|
[Episode 145730] reward=-117781699.1 actor_loss=0.2258 critic_loss=92912964403.2000 entropy=17.5934 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 145740] reward=-121017961.7 actor_loss=0.3184 critic_loss=90501421738.6667 entropy=17.5941 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 145740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-454410.6 mean_steps=13.9
|
|
[Episode 145750] reward=-116189584.3 actor_loss=0.2412 critic_loss=91455773927.2258 entropy=17.5939 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 145760] reward=-125299578.7 actor_loss=0.2060 critic_loss=98477988116.7568 entropy=17.5894 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 145760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455692.5 mean_steps=14.5
|
|
[Episode 145770] reward=-112238449.9 actor_loss=0.2723 critic_loss=86661573036.6512 entropy=17.5864 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 145780] reward=-117225271.3 actor_loss=0.3412 critic_loss=93143093992.7273 entropy=17.5778 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 145780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502708.8 mean_steps=13.9
|
|
[Episode 145790] reward=-120228996.7 actor_loss=0.2188 critic_loss=92733111227.7333 entropy=17.5771 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 145800] reward=-120704021.4 actor_loss=0.2955 critic_loss=96005503658.6667 entropy=17.5781 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 145800] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-356584.0 mean_steps=16.6
|
|
[Episode 145810] reward=-119687358.0 actor_loss=0.3626 critic_loss=93843519715.5556 entropy=17.5763 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 145820] reward=-114567520.5 actor_loss=0.2737 critic_loss=93635023339.5200 entropy=17.5763 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 145820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459060.3 mean_steps=15.5
|
|
[Episode 145830] reward=-116243709.4 actor_loss=0.3800 critic_loss=93902462429.8667 entropy=17.5732 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 145840] reward=-115876804.7 actor_loss=0.2964 critic_loss=87724078631.3846 entropy=17.5673 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 145840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494092.4 mean_steps=13.7
|
|
[Episode 145850] reward=-116739882.2 actor_loss=0.3180 critic_loss=88675386982.4000 entropy=17.5743 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 145860] reward=-112764852.7 actor_loss=0.3394 critic_loss=90421839872.0000 entropy=17.5840 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 145860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428033.5 mean_steps=15.4
|
|
[Episode 145870] reward=-113185273.4 actor_loss=0.3527 critic_loss=83446433881.0435 entropy=17.5832 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 145880] reward=-117485331.4 actor_loss=0.3018 critic_loss=88676150272.0000 entropy=17.5792 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 145880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575711.5 mean_steps=12.7
|
|
[Episode 145890] reward=-120013261.6 actor_loss=0.3785 critic_loss=94972553789.4400 entropy=17.5879 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 145900] reward=-119993207.0 actor_loss=0.2741 critic_loss=94489844967.2258 entropy=17.5860 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 145900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567773.0 mean_steps=13.6
|
|
[Episode 145910] reward=-119379813.6 actor_loss=0.3379 critic_loss=95046826081.5238 entropy=17.5923 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 145920] reward=-118123148.6 actor_loss=0.1812 critic_loss=91050618997.0286 entropy=17.6011 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 145920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499102.5 mean_steps=14.2
|
|
[Episode 145930] reward=-120696906.2 actor_loss=0.2429 critic_loss=95329574912.0000 entropy=17.6086 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 145940] reward=-115169161.0 actor_loss=0.3212 critic_loss=86190259859.9111 entropy=17.6221 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 145940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545918.5 mean_steps=13.8
|
|
[Episode 145950] reward=-121507609.8 actor_loss=0.3207 critic_loss=97951196306.2857 entropy=17.6288 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 145960] reward=-116761471.0 actor_loss=0.2891 critic_loss=97446709150.4762 entropy=17.6271 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 145960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-471702.3 mean_steps=15.5
|
|
[Episode 145970] reward=-112726470.1 actor_loss=0.3435 critic_loss=90361710416.4571 entropy=17.6293 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 145980] reward=-114986827.5 actor_loss=0.2953 critic_loss=83441356891.0222 entropy=17.6241 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 145980] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-256792.9 mean_steps=17.5
|
|
[Episode 145990] reward=-123665074.4 actor_loss=0.2679 critic_loss=102607703868.9524 entropy=17.6379 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 146000] reward=-119408137.3 actor_loss=0.2297 critic_loss=88942098659.5556 entropy=17.6457 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 146000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-609511.1 mean_steps=12.6
|
|
[Episode 146010] reward=-123120688.0 actor_loss=0.3695 critic_loss=137089667891.2000 entropy=17.6518 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 146020] reward=-108768345.3 actor_loss=0.1969 critic_loss=87178118072.5581 entropy=17.6448 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 146020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479148.0 mean_steps=14.7
|
|
[Episode 146030] reward=-115874329.2 actor_loss=0.2261 critic_loss=90428144025.6000 entropy=17.6520 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 146040] reward=-123615149.1 actor_loss=0.2335 critic_loss=99603664258.8445 entropy=17.6521 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 146040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-565601.6 mean_steps=12.7
|
|
[Episode 146050] reward=-153276816.1 actor_loss=2.5258 critic_loss=5122901380664.8887 entropy=17.6750 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 146060] reward=-121949782.0 actor_loss=0.3696 critic_loss=96601965536.9697 entropy=17.6638 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 146060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537764.5 mean_steps=13.2
|
|
[Episode 146070] reward=-120740414.0 actor_loss=0.2357 critic_loss=96075193457.7778 entropy=17.6437 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 146080] reward=-117826221.4 actor_loss=0.3269 critic_loss=91733623876.2667 entropy=17.6316 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 146080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-531950.4 mean_steps=14.9
|
|
[Episode 146090] reward=-121020143.4 actor_loss=0.3226 critic_loss=101479762944.0000 entropy=17.6270 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 146100] reward=-117061634.7 actor_loss=0.3866 critic_loss=96489131132.8781 entropy=17.6312 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 146100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-428791.5 mean_steps=13.7
|
|
[Episode 146110] reward=-118833873.9 actor_loss=0.2179 critic_loss=93750071296.0000 entropy=17.6192 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 146120] reward=-121121571.8 actor_loss=0.3504 critic_loss=94552317952.0000 entropy=17.6201 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 146120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472541.7 mean_steps=13.6
|
|
[Episode 146130] reward=-119114640.3 actor_loss=0.3125 critic_loss=87897097011.2000 entropy=17.6265 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 146140] reward=-122099011.2 actor_loss=0.2597 critic_loss=99126290176.0000 entropy=17.6279 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 146140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-395560.2 mean_steps=14.8
|
|
[Episode 146150] reward=-116315761.5 actor_loss=0.3450 critic_loss=92727512576.0000 entropy=17.6122 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 146160] reward=-120453451.6 actor_loss=0.2971 critic_loss=107490479812.9231 entropy=17.6098 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 146160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531602.0 mean_steps=14.1
|
|
[Episode 146170] reward=-122378365.5 actor_loss=0.3443 critic_loss=93536691038.3158 entropy=17.6001 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 146180] reward=-118544952.3 actor_loss=0.2296 critic_loss=89694689093.8182 entropy=17.5962 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 146180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436758.8 mean_steps=15.6
|
|
[Episode 146190] reward=-118447221.9 actor_loss=0.3197 critic_loss=94616146193.0667 entropy=17.5928 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 146200] reward=-119899152.9 actor_loss=0.3098 critic_loss=90684030429.8667 entropy=17.5909 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 146200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610514.2 mean_steps=13.1
|
|
[Episode 146210] reward=-117496569.3 actor_loss=0.2712 critic_loss=87671807817.9556 entropy=17.5922 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 146220] reward=-118580557.8 actor_loss=0.2581 critic_loss=118575306251.3778 entropy=17.5957 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 146220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434701.5 mean_steps=14.5
|
|
[Episode 146230] reward=-119832079.6 actor_loss=0.1905 critic_loss=91604748060.4444 entropy=17.5998 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 146240] reward=-111658512.0 actor_loss=0.4411 critic_loss=89342540185.6000 entropy=17.6060 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 146240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440994.6 mean_steps=14.3
|
|
[Episode 146250] reward=-121394738.3 actor_loss=0.3180 critic_loss=98201868333.5111 entropy=17.5948 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 146260] reward=-117545425.0 actor_loss=0.2686 critic_loss=97333450069.3333 entropy=17.6211 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 146260] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-728119.8 mean_steps=10.9
|
|
[Episode 146270] reward=-133459752.6 actor_loss=0.2653 critic_loss=935322205643.0344 entropy=17.6252 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 146280] reward=-122431284.9 actor_loss=0.2270 critic_loss=104816703715.5556 entropy=17.6293 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 146280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-496333.7 mean_steps=12.9
|
|
[Episode 146290] reward=-121717363.7 actor_loss=0.2406 critic_loss=94264686204.5405 entropy=17.6285 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 146300] reward=-114296989.0 actor_loss=0.3982 critic_loss=92494672578.2069 entropy=17.6196 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 146300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527103.7 mean_steps=14.1
|
|
[Episode 146310] reward=-137153876.6 actor_loss=0.3489 critic_loss=1739517504170.6667 entropy=17.6264 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 146320] reward=-114806857.8 actor_loss=0.2850 critic_loss=90510283629.7143 entropy=17.6353 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 146320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-595972.5 mean_steps=12.1
|
|
[Episode 146330] reward=-124902722.6 actor_loss=0.2960 critic_loss=120368658990.5455 entropy=17.6377 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 146340] reward=-121960258.7 actor_loss=0.2179 critic_loss=95525677010.4889 entropy=17.6664 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 146340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563655.9 mean_steps=12.8
|
|
[Episode 146350] reward=-162512162.5 actor_loss=0.3216 critic_loss=6611638843879.6191 entropy=17.6651 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 146360] reward=-135177210.7 actor_loss=0.2544 critic_loss=975934142464.0000 entropy=17.6650 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 146360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479658.1 mean_steps=13.8
|
|
[Episode 146370] reward=-118175928.6 actor_loss=0.3674 critic_loss=93827019697.2308 entropy=17.6829 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 146380] reward=-137400524.2 actor_loss=0.2868 critic_loss=2405870080945.2310 entropy=17.7079 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 146380] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-681854.4 mean_steps=11.8
|
|
[Episode 146390] reward=-450803081.5 actor_loss=0.2474 critic_loss=126217188264474.9531 entropy=17.7291 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 146400] reward=-124725597.8 actor_loss=0.3089 critic_loss=125800364623.6444 entropy=17.7395 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 146400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-529392.6 mean_steps=15.1
|
|
[Episode 146410] reward=-114096573.2 actor_loss=0.3220 critic_loss=89377706439.1111 entropy=17.7313 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 146420] reward=-115735796.2 actor_loss=0.3980 critic_loss=94879486407.1111 entropy=17.7313 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 146420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615812.6 mean_steps=12.8
|
|
[Episode 146430] reward=-125484748.2 actor_loss=0.1742 critic_loss=104055318846.5778 entropy=17.7226 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 146440] reward=-119152128.9 actor_loss=0.2837 critic_loss=106805182032.8421 entropy=17.7204 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 146440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450460.5 mean_steps=15.4
|
|
[Episode 146450] reward=-119483195.4 actor_loss=0.2543 critic_loss=100412795562.6667 entropy=17.7156 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 146460] reward=-116981601.7 actor_loss=0.3905 critic_loss=92756211029.3333 entropy=17.7217 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 146460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412334.0 mean_steps=15.2
|
|
[Episode 146470] reward=-121991649.6 actor_loss=0.2467 critic_loss=173139597721.6000 entropy=17.7548 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 146480] reward=-118801924.8 actor_loss=0.2084 critic_loss=95691066390.7556 entropy=17.7674 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 146480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522917.9 mean_steps=14.1
|
|
[Episode 146490] reward=-122984506.0 actor_loss=0.2301 critic_loss=96903381295.4074 entropy=17.7573 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 146500] reward=-122106574.8 actor_loss=0.2962 critic_loss=95239542296.3810 entropy=17.7468 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 146500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-518269.0 mean_steps=12.2
|
|
[Episode 146510] reward=-122961450.3 actor_loss=0.2688 critic_loss=97444998222.7692 entropy=17.7526 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 146520] reward=-118761519.7 actor_loss=0.2593 critic_loss=99022336534.2609 entropy=17.7500 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 146520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-421880.1 mean_steps=14.2
|
|
[Episode 146530] reward=-118968342.9 actor_loss=0.3058 critic_loss=93887909143.2727 entropy=17.7522 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 146540] reward=-117999086.2 actor_loss=0.3558 critic_loss=96617616179.2000 entropy=17.7455 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 146540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486666.2 mean_steps=13.7
|
|
[Episode 146550] reward=-156219905.6 actor_loss=0.1848 critic_loss=4308661896624.3555 entropy=17.7461 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 146560] reward=-162665785.4 actor_loss=0.3531 critic_loss=6966479211724.7998 entropy=17.7688 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 146560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-532781.6 mean_steps=15.2
|
|
[Episode 146570] reward=-138348619.3 actor_loss=0.2886 critic_loss=1216651975065.6001 entropy=17.7673 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 146580] reward=-123103512.1 actor_loss=0.3543 critic_loss=149097522176.0000 entropy=17.7736 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 146580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431182.5 mean_steps=14.6
|
|
[Episode 146590] reward=-160331389.2 actor_loss=0.2749 critic_loss=4183005303861.8945 entropy=17.7866 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 146600] reward=-123210271.3 actor_loss=0.2853 critic_loss=213624478105.6000 entropy=17.7991 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 146600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-334410.4 mean_steps=17.7
|
|
[Episode 146610] reward=-117912127.1 actor_loss=0.2808 critic_loss=440817672374.0444 entropy=17.8042 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 146620] reward=-125974645.8 actor_loss=0.3280 critic_loss=639660047018.6666 entropy=17.7990 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 146620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-623239.2 mean_steps=10.9
|
|
[Episode 146630] reward=-130609907.0 actor_loss=0.1829 critic_loss=279872830495.0303 entropy=17.7995 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 146640] reward=-120633477.0 actor_loss=0.2830 critic_loss=111685280727.0400 entropy=17.7957 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 146640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459414.0 mean_steps=15.2
|
|
[Episode 146650] reward=-117458891.4 actor_loss=0.2775 critic_loss=98960905011.2000 entropy=17.8020 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 146660] reward=-117677441.5 actor_loss=0.3014 critic_loss=91887731916.8000 entropy=17.8057 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 146660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559354.2 mean_steps=12.9
|
|
[Episode 146670] reward=-119221717.5 actor_loss=0.2589 critic_loss=96031027200.0000 entropy=17.8080 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 146680] reward=-125514210.9 actor_loss=0.3106 critic_loss=101322834466.1333 entropy=17.7976 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 146680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594108.0 mean_steps=12.6
|
|
[Episode 146690] reward=-121538832.4 actor_loss=0.2604 critic_loss=107744601156.2667 entropy=17.8023 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 146700] reward=-118261234.1 actor_loss=0.3978 critic_loss=95499994908.4444 entropy=17.7976 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 146700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-351655.5 mean_steps=17.9
|
|
[Episode 146710] reward=-121810362.1 actor_loss=0.3010 critic_loss=109333778705.0667 entropy=17.7903 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 146720] reward=-114523574.4 actor_loss=0.2749 critic_loss=100075037218.1333 entropy=17.7923 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 146720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-534366.7 mean_steps=11.9
|
|
[Episode 146730] reward=-123915368.1 actor_loss=0.1973 critic_loss=124355371349.3333 entropy=17.7989 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 146740] reward=-118188150.0 actor_loss=0.2888 critic_loss=92091728262.0952 entropy=17.8067 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 146740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435413.1 mean_steps=14.7
|
|
[Episode 146750] reward=-118813039.1 actor_loss=0.2640 critic_loss=96053826070.2609 entropy=17.8181 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 146760] reward=-114520603.1 actor_loss=0.3583 critic_loss=91668793882.9474 entropy=17.7976 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 146760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626812.7 mean_steps=12.1
|
|
[Episode 146770] reward=-118644633.3 actor_loss=0.3554 critic_loss=100230581088.7111 entropy=17.8109 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 146780] reward=-119110500.9 actor_loss=0.2768 critic_loss=101555496029.0909 entropy=17.8175 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 146780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542811.3 mean_steps=14.1
|
|
[Episode 146790] reward=-118217708.3 actor_loss=0.3168 critic_loss=93826944906.9714 entropy=17.8202 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 146800] reward=-116672850.1 actor_loss=0.2643 critic_loss=99872869128.8276 entropy=17.8083 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 146800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409094.7 mean_steps=16.0
|
|
[Episode 146810] reward=-121668340.0 actor_loss=0.4038 critic_loss=98579512953.9048 entropy=17.7980 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 146820] reward=-118691190.3 actor_loss=0.3466 critic_loss=96852810683.7333 entropy=17.7779 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 146820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527638.9 mean_steps=14.0
|
|
[Episode 146830] reward=-125337508.2 actor_loss=0.2639 critic_loss=107065328666.2564 entropy=17.7661 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 146840] reward=-122115781.3 actor_loss=0.2569 critic_loss=108439123968.0000 entropy=17.7699 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 146840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528021.6 mean_steps=14.2
|
|
[Episode 146850] reward=-120308439.3 actor_loss=0.3386 critic_loss=98180923392.0000 entropy=17.7677 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 146860] reward=-121784460.6 actor_loss=0.2735 critic_loss=96206924544.0000 entropy=17.7697 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 146860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547165.9 mean_steps=13.4
|
|
[Episode 146870] reward=-120281127.4 actor_loss=0.2098 critic_loss=94029931479.0400 entropy=17.7673 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 146880] reward=-124002310.2 actor_loss=0.2181 critic_loss=96444681947.4286 entropy=17.7738 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 146880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405930.2 mean_steps=14.9
|
|
[Episode 146890] reward=-121818246.8 actor_loss=0.2430 critic_loss=95795641404.2353 entropy=17.7533 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 146900] reward=-118086532.2 actor_loss=0.3244 critic_loss=95605266176.0000 entropy=17.7584 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 146900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511881.2 mean_steps=13.8
|
|
[Episode 146910] reward=-117919967.5 actor_loss=0.3933 critic_loss=91738477661.0909 entropy=17.7544 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 146920] reward=-125343322.4 actor_loss=0.3613 critic_loss=202881250645.3333 entropy=17.7540 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 146920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589005.3 mean_steps=12.8
|
|
[Episode 146930] reward=-114461707.1 actor_loss=0.2836 critic_loss=91926074709.3333 entropy=17.7473 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 146940] reward=-117478345.7 actor_loss=0.3088 critic_loss=88339279127.2727 entropy=17.7552 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 146940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501271.0 mean_steps=12.9
|
|
[Episode 146950] reward=-120413053.7 actor_loss=0.2844 critic_loss=94507768217.6000 entropy=17.7467 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 146960] reward=-113598795.7 actor_loss=0.3911 critic_loss=89043181568.0000 entropy=17.7439 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 146960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-569823.6 mean_steps=14.1
|
|
[Episode 146970] reward=-119421139.8 actor_loss=0.3044 critic_loss=93503845327.2381 entropy=17.7333 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 146980] reward=-116965233.8 actor_loss=0.3272 critic_loss=103557434314.1053 entropy=17.7254 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 146980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-418462.6 mean_steps=14.2
|
|
[Episode 146990] reward=-120073487.3 actor_loss=0.2137 critic_loss=94714107974.6207 entropy=17.7164 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 147000] reward=-119995530.0 actor_loss=0.3139 critic_loss=96522994892.8000 entropy=17.7101 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 147000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-613831.0 mean_steps=11.7
|
|
[Episode 147010] reward=-112615097.4 actor_loss=0.4380 critic_loss=86143810816.0000 entropy=17.7063 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 147020] reward=-117968964.6 actor_loss=0.3157 critic_loss=89486746965.3333 entropy=17.7066 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 147020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578923.1 mean_steps=12.4
|
|
[Episode 147030] reward=-116316373.8 actor_loss=0.2910 critic_loss=186543549410.7429 entropy=17.7016 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 147040] reward=-124278152.4 actor_loss=0.2620 critic_loss=172164206450.7586 entropy=17.6831 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 147040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501434.6 mean_steps=13.0
|
|
[Episode 147050] reward=-118698841.2 actor_loss=0.2562 critic_loss=89726690645.3333 entropy=17.6791 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 147060] reward=-116394215.1 actor_loss=0.4155 critic_loss=89838474035.2000 entropy=17.6759 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 147060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598792.6 mean_steps=12.8
|
|
[Episode 147070] reward=-121464000.1 actor_loss=0.3101 critic_loss=96888557443.8788 entropy=17.6758 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 147080] reward=-126150926.1 actor_loss=0.3798 critic_loss=377987287176.5333 entropy=17.6912 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 147080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578326.1 mean_steps=12.4
|
|
[Episode 147090] reward=-117814144.7 actor_loss=0.3318 critic_loss=93813352448.0000 entropy=17.7093 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 147100] reward=-120696188.6 actor_loss=0.2199 critic_loss=269669755325.2174 entropy=17.7195 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 147100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505026.9 mean_steps=13.1
|
|
[Episode 147110] reward=-116752593.7 actor_loss=0.2844 critic_loss=89981235082.9714 entropy=17.7179 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 147120] reward=-126490013.8 actor_loss=0.2529 critic_loss=104595553559.2727 entropy=17.7125 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 147120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-386533.6 mean_steps=15.2
|
|
[Episode 147130] reward=-133440741.5 actor_loss=0.2688 critic_loss=1965214743432.9302 entropy=17.7036 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 147140] reward=-121013048.7 actor_loss=0.2174 critic_loss=89261128362.6667 entropy=17.6935 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 147140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426128.2 mean_steps=15.3
|
|
[Episode 147150] reward=-118942096.3 actor_loss=0.3801 critic_loss=97327449662.4390 entropy=17.6927 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 147160] reward=-117166313.1 actor_loss=0.2841 critic_loss=94294038235.4286 entropy=17.6852 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 147160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378723.8 mean_steps=15.8
|
|
[Episode 147170] reward=-116209428.9 actor_loss=0.3620 critic_loss=123530284333.1765 entropy=17.6920 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 147180] reward=-119266504.2 actor_loss=0.3011 critic_loss=95563725312.0000 entropy=17.6900 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 147180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456718.5 mean_steps=13.8
|
|
[Episode 147190] reward=-122211171.5 actor_loss=0.3317 critic_loss=103890414498.9091 entropy=17.6857 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 147200] reward=-116761166.0 actor_loss=0.2317 critic_loss=93366543428.2667 entropy=17.6810 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 147200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-653018.8 mean_steps=12.2
|
|
[Episode 147210] reward=-122092664.2 actor_loss=0.1952 critic_loss=92823624142.4516 entropy=17.6783 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 147220] reward=-119024717.6 actor_loss=0.2195 critic_loss=89294561735.1111 entropy=17.6867 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 147220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432783.4 mean_steps=16.1
|
|
[Episode 147230] reward=-119215712.2 actor_loss=0.2656 critic_loss=105138581959.1111 entropy=17.6891 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 147240] reward=-118858405.4 actor_loss=0.2804 critic_loss=108675774545.9200 entropy=17.6820 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 147240] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-642675.4 mean_steps=11.4
|
|
[Episode 147250] reward=-121340051.7 actor_loss=0.2685 critic_loss=98355202621.4400 entropy=17.6781 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 147260] reward=-120073284.9 actor_loss=0.3937 critic_loss=134470165008.5161 entropy=17.6858 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 147260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365540.1 mean_steps=15.9
|
|
[Episode 147270] reward=-119768156.2 actor_loss=0.2654 critic_loss=94203712950.8571 entropy=17.6905 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 147280] reward=-115546597.8 actor_loss=0.2919 critic_loss=91749238507.2432 entropy=17.6797 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 147280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404880.8 mean_steps=16.2
|
|
[Episode 147290] reward=-115596838.1 actor_loss=0.3549 critic_loss=88440439398.4000 entropy=17.6793 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 147300] reward=-117956483.8 actor_loss=0.2899 critic_loss=97259486928.5926 entropy=17.6754 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 147300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553478.5 mean_steps=14.1
|
|
[Episode 147310] reward=-115926118.0 actor_loss=0.3893 critic_loss=92454204757.3333 entropy=17.6776 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 147320] reward=-121503737.8 actor_loss=0.2896 critic_loss=110153740092.9524 entropy=17.6636 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 147320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436604.9 mean_steps=15.2
|
|
[Episode 147330] reward=-119727824.4 actor_loss=0.1694 critic_loss=99743766648.4706 entropy=17.6550 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 147340] reward=-117444623.7 actor_loss=0.3077 critic_loss=95084158619.8261 entropy=17.6623 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 147340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-489886.7 mean_steps=15.7
|
|
[Episode 147350] reward=-117381446.8 actor_loss=0.2996 critic_loss=92744589312.0000 entropy=17.6502 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 147360] reward=-119764177.0 actor_loss=0.1745 critic_loss=88792586397.5385 entropy=17.6401 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 147360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448628.3 mean_steps=14.4
|
|
[Episode 147370] reward=-124766659.1 actor_loss=0.2216 critic_loss=98671964475.0769 entropy=17.6398 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 147380] reward=-123613386.6 actor_loss=0.2543 critic_loss=102286251349.3333 entropy=17.6357 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 147380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445343.8 mean_steps=14.2
|
|
[Episode 147390] reward=-120315753.3 actor_loss=0.2634 critic_loss=96532889088.0000 entropy=17.6407 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 147400] reward=-114549795.2 actor_loss=0.2908 critic_loss=86415162428.2353 entropy=17.6516 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 147400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-422017.1 mean_steps=14.4
|
|
[Episode 147410] reward=-122545328.3 actor_loss=0.2522 critic_loss=97509511168.0000 entropy=17.6496 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 147420] reward=-122231925.0 actor_loss=0.2268 critic_loss=99060016014.2222 entropy=17.6488 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 147420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427074.5 mean_steps=14.2
|
|
[Episode 147430] reward=-118264583.9 actor_loss=0.3504 critic_loss=105388857806.4516 entropy=17.6493 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 147440] reward=-117297894.5 actor_loss=0.2268 critic_loss=90875013997.7143 entropy=17.6610 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 147440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538124.4 mean_steps=13.2
|
|
[Episode 147450] reward=-119069465.1 actor_loss=0.2834 critic_loss=93298530906.3529 entropy=17.6449 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 147460] reward=-118680164.0 actor_loss=0.3147 critic_loss=97249311129.6000 entropy=17.6530 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 147460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-515140.1 mean_steps=14.8
|
|
[Episode 147470] reward=-120174808.8 actor_loss=0.2813 critic_loss=99105488048.5517 entropy=17.6463 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 147480] reward=-122244310.5 actor_loss=0.3822 critic_loss=295322568294.4000 entropy=17.6444 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 147480] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-314782.7 mean_steps=17.3
|
|
[Episode 147490] reward=-119529649.8 actor_loss=0.3494 critic_loss=101817109474.7429 entropy=17.6525 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 147500] reward=-119327488.4 actor_loss=0.3603 critic_loss=106082576884.6222 entropy=17.6621 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 147500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553046.5 mean_steps=13.3
|
|
[Episode 147510] reward=-123524223.2 actor_loss=0.2709 critic_loss=100429553967.4074 entropy=17.6470 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 147520] reward=-122230067.9 actor_loss=0.2613 critic_loss=98671164695.2727 entropy=17.6453 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 147520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-310817.4 mean_steps=16.6
|
|
[Episode 147530] reward=-116991858.5 actor_loss=0.3669 critic_loss=86521923291.4286 entropy=17.6483 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 147540] reward=-119037450.1 actor_loss=0.2253 critic_loss=93676628329.4118 entropy=17.6366 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 147540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-284969.8 mean_steps=16.2
|
|
[Episode 147550] reward=-120529243.5 actor_loss=0.3744 critic_loss=102047213158.4000 entropy=17.6408 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 147560] reward=-115766194.3 actor_loss=0.2975 critic_loss=89977169978.5143 entropy=17.6491 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 147560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577976.0 mean_steps=12.5
|
|
[Episode 147570] reward=-117874048.6 actor_loss=0.2487 critic_loss=95254337945.6000 entropy=17.6609 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 147580] reward=-122103863.1 actor_loss=0.3269 critic_loss=107076151808.0000 entropy=17.6438 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 147580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490273.9 mean_steps=13.8
|
|
[Episode 147590] reward=-116541449.9 actor_loss=0.3127 critic_loss=95185276642.2326 entropy=17.6210 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 147600] reward=-119249281.0 actor_loss=0.3192 critic_loss=98870448947.2000 entropy=17.6328 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 147600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405761.4 mean_steps=14.9
|
|
[Episode 147610] reward=-118749940.0 actor_loss=0.3639 critic_loss=97239208209.0667 entropy=17.6414 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 147620] reward=-120311338.7 actor_loss=0.2683 critic_loss=90919949068.1905 entropy=17.6293 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 147620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462320.9 mean_steps=15.7
|
|
[Episode 147630] reward=-117163138.1 actor_loss=0.3104 critic_loss=90964981639.5294 entropy=17.6140 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 147640] reward=-117766751.1 actor_loss=0.3216 critic_loss=90872135168.0000 entropy=17.5946 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 147640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420770.8 mean_steps=14.5
|
|
[Episode 147650] reward=-113024371.6 actor_loss=0.3847 critic_loss=98380131147.2941 entropy=17.5977 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 147660] reward=-123568991.4 actor_loss=0.2359 critic_loss=96476489272.8889 entropy=17.5864 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 147660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-546146.8 mean_steps=12.3
|
|
[Episode 147670] reward=-122416948.9 actor_loss=0.2797 critic_loss=96742378813.7931 entropy=17.5862 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 147680] reward=-118694289.3 actor_loss=0.3567 critic_loss=93828706162.7586 entropy=17.5931 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 147680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-403742.3 mean_steps=16.4
|
|
[Episode 147690] reward=-116526791.0 actor_loss=0.2714 critic_loss=87131518117.1613 entropy=17.5990 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 147700] reward=-120046191.3 actor_loss=0.3156 critic_loss=107143088701.4400 entropy=17.6009 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 147700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-648756.2 mean_steps=13.5
|
|
[Episode 147710] reward=-117644221.1 actor_loss=0.3182 critic_loss=101480570880.0000 entropy=17.6028 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 147720] reward=-118311126.5 actor_loss=0.2505 critic_loss=91739038788.2667 entropy=17.6094 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 147720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-630997.4 mean_steps=12.1
|
|
[Episode 147730] reward=-120527996.1 actor_loss=0.2341 critic_loss=201519175878.1935 entropy=17.6017 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 147740] reward=-114076016.0 actor_loss=0.3452 critic_loss=88595123034.8387 entropy=17.6045 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 147740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453269.8 mean_steps=14.7
|
|
[Episode 147750] reward=-115810990.8 actor_loss=0.2498 critic_loss=87351883202.5600 entropy=17.6115 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 147760] reward=-119030302.2 actor_loss=0.2408 critic_loss=92555683742.4762 entropy=17.6023 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 147760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503109.6 mean_steps=15.2
|
|
[Episode 147770] reward=-116779565.7 actor_loss=0.3164 critic_loss=87891944789.3333 entropy=17.6030 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 147780] reward=-120869892.5 actor_loss=0.2670 critic_loss=97833850353.3714 entropy=17.5875 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 147780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529067.8 mean_steps=13.2
|
|
[Episode 147790] reward=-116185757.1 actor_loss=0.2980 critic_loss=88705770609.7778 entropy=17.5940 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 147800] reward=-116329679.5 actor_loss=0.2783 critic_loss=92819896807.6190 entropy=17.5949 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 147800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412980.0 mean_steps=16.4
|
|
[Episode 147810] reward=-123791546.6 actor_loss=0.1977 critic_loss=92713274274.9091 entropy=17.6003 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 147820] reward=-120141183.1 actor_loss=0.2690 critic_loss=95197281280.0000 entropy=17.6101 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 147820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548810.6 mean_steps=14.2
|
|
[Episode 147830] reward=-117025367.1 actor_loss=0.2661 critic_loss=85085559386.3529 entropy=17.6111 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 147840] reward=-122140472.9 actor_loss=0.2252 critic_loss=95728609416.5333 entropy=17.6144 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 147840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603406.5 mean_steps=12.8
|
|
[Episode 147850] reward=-117491986.3 actor_loss=0.3437 critic_loss=93804792217.6000 entropy=17.6180 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 147860] reward=-113414261.4 actor_loss=0.2917 critic_loss=90281799953.0667 entropy=17.6079 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 147860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431746.2 mean_steps=14.8
|
|
[Episode 147870] reward=-120129217.0 actor_loss=0.2772 critic_loss=94235533994.6667 entropy=17.6170 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 147880] reward=-111794324.3 actor_loss=0.2936 critic_loss=85724700672.0000 entropy=17.6326 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 147880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-348537.2 mean_steps=15.5
|
|
[Episode 147890] reward=-122859187.4 actor_loss=0.2768 critic_loss=96523344827.7333 entropy=17.6326 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 147900] reward=-120074645.6 actor_loss=0.2551 critic_loss=96518343586.9091 entropy=17.6381 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 147900] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-394118.9 mean_steps=16.9
|
|
[Episode 147910] reward=-117368790.5 actor_loss=0.2876 critic_loss=98994591636.2105 entropy=17.6264 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 147920] reward=-116744428.4 actor_loss=0.2462 critic_loss=97261291474.4889 entropy=17.6211 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 147920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539303.2 mean_steps=13.1
|
|
[Episode 147930] reward=-117202629.7 actor_loss=0.3129 critic_loss=94270076990.0606 entropy=17.6143 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 147940] reward=-113985461.1 actor_loss=0.3133 critic_loss=89495203547.4286 entropy=17.6112 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 147940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484199.0 mean_steps=15.2
|
|
[Episode 147950] reward=-119108662.1 actor_loss=0.2374 critic_loss=89728551713.3913 entropy=17.6179 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 147960] reward=-112401377.6 actor_loss=0.3552 critic_loss=89316098269.4054 entropy=17.6131 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 147960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579419.3 mean_steps=12.8
|
|
[Episode 147970] reward=-114583514.7 actor_loss=0.3350 critic_loss=92074007405.7143 entropy=17.6029 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 147980] reward=-120079006.9 actor_loss=0.2528 critic_loss=96676334871.2727 entropy=17.6116 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 147980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392299.2 mean_steps=15.8
|
|
[Episode 147990] reward=-119262858.2 actor_loss=0.2061 critic_loss=98701432604.4444 entropy=17.6250 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 148000] reward=-122313780.0 actor_loss=0.2938 critic_loss=100089310646.8571 entropy=17.6163 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 148000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485461.8 mean_steps=14.8
|
|
[Episode 148010] reward=-120949330.1 actor_loss=0.2892 critic_loss=92250223809.7297 entropy=17.6041 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 148020] reward=-117175656.3 actor_loss=0.2749 critic_loss=92693589643.6364 entropy=17.5914 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 148020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642307.9 mean_steps=12.2
|
|
[Episode 148030] reward=-122679252.7 actor_loss=0.2720 critic_loss=105483497472.0000 entropy=17.5826 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 148040] reward=-124100279.5 actor_loss=0.3188 critic_loss=99362775381.3333 entropy=17.5872 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 148040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-610451.8 mean_steps=12.0
|
|
[Episode 148050] reward=-121265467.1 actor_loss=0.2712 critic_loss=96473815332.5714 entropy=17.5802 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 148060] reward=-117865022.3 actor_loss=0.2111 critic_loss=91872788480.0000 entropy=17.5850 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 148060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-613251.3 mean_steps=11.9
|
|
[Episode 148070] reward=-117314102.6 actor_loss=0.3231 critic_loss=90839468909.7143 entropy=17.5906 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 148080] reward=-118067525.7 actor_loss=0.3402 critic_loss=92937141775.5152 entropy=17.5957 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 148080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448659.9 mean_steps=15.6
|
|
[Episode 148090] reward=-118310399.9 actor_loss=0.3123 critic_loss=88039511654.4000 entropy=17.5973 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 148100] reward=-120873962.7 actor_loss=0.3098 critic_loss=95836732659.8095 entropy=17.5898 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 148100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-515676.1 mean_steps=15.1
|
|
[Episode 148110] reward=-117401037.3 actor_loss=0.2186 critic_loss=88026705334.8571 entropy=17.5910 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 148120] reward=-119626933.5 actor_loss=0.3407 critic_loss=92059340800.0000 entropy=17.5895 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 148120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512335.0 mean_steps=14.1
|
|
[Episode 148130] reward=-121277342.8 actor_loss=0.2901 critic_loss=88908280732.9032 entropy=17.5897 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 148140] reward=-121124359.1 actor_loss=0.3057 critic_loss=97327067331.0476 entropy=17.5833 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 148140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520296.8 mean_steps=14.3
|
|
[Episode 148150] reward=-120429113.3 actor_loss=0.2977 critic_loss=100860797996.5217 entropy=17.5823 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 148160] reward=-118474190.2 actor_loss=0.2802 critic_loss=91830163683.5556 entropy=17.5844 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 148160] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-664191.4 mean_steps=11.3
|
|
[Episode 148170] reward=-115683721.8 actor_loss=0.3067 critic_loss=91107332986.4348 entropy=17.5865 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 148180] reward=-119587155.0 actor_loss=0.2407 critic_loss=90492440424.2963 entropy=17.5833 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 148180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449804.0 mean_steps=14.5
|
|
[Episode 148190] reward=-116500786.7 actor_loss=0.3052 critic_loss=89865811409.4545 entropy=17.5816 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 148200] reward=-119899943.9 actor_loss=0.2534 critic_loss=95406430981.6889 entropy=17.5833 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 148200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447657.3 mean_steps=15.2
|
|
[Episode 148210] reward=-120398571.1 actor_loss=0.3723 critic_loss=91778084590.9333 entropy=17.5753 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 148220] reward=-113953162.1 actor_loss=0.3611 critic_loss=93562094592.0000 entropy=17.5721 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 148220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-564674.8 mean_steps=14.1
|
|
[Episode 148230] reward=-114572920.1 actor_loss=0.3218 critic_loss=89194517425.2308 entropy=17.5741 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 148240] reward=-118752646.3 actor_loss=0.3674 critic_loss=96963031267.5556 entropy=17.5863 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 148240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534241.7 mean_steps=13.2
|
|
[Episode 148250] reward=-116780807.5 actor_loss=0.3195 critic_loss=92452048337.4545 entropy=17.5859 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 148260] reward=-123518471.6 actor_loss=0.2961 critic_loss=98435937393.7778 entropy=17.5956 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 148260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443596.0 mean_steps=15.1
|
|
[Episode 148270] reward=-117305388.7 actor_loss=0.3819 critic_loss=91822429669.0526 entropy=17.5926 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 148280] reward=-116867207.5 actor_loss=0.3081 critic_loss=101859049472.0000 entropy=17.5865 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 148280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482750.7 mean_steps=14.8
|
|
[Episode 148290] reward=-119342010.2 actor_loss=0.2882 critic_loss=89746109991.3846 entropy=17.5904 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 148300] reward=-115148354.5 actor_loss=0.3413 critic_loss=89827888176.7619 entropy=17.5964 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 148300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596416.9 mean_steps=12.8
|
|
[Episode 148310] reward=-114925719.7 actor_loss=0.4536 critic_loss=92836757504.0000 entropy=17.5908 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 148320] reward=-117401320.9 actor_loss=0.3803 critic_loss=92933387287.8139 entropy=17.6006 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 148320] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-311231.8 mean_steps=17.1
|
|
[Episode 148330] reward=-113074356.5 actor_loss=0.3521 critic_loss=90054787072.0000 entropy=17.5954 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 148340] reward=-119274177.4 actor_loss=0.2074 critic_loss=98063366454.3030 entropy=17.5990 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 148340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462900.7 mean_steps=14.4
|
|
[Episode 148350] reward=-115876996.8 actor_loss=0.3128 critic_loss=91956689426.9630 entropy=17.6056 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 148360] reward=-122292993.0 actor_loss=0.2809 critic_loss=97103210496.0000 entropy=17.6085 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 148360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498053.0 mean_steps=14.6
|
|
[Episode 148370] reward=-110345141.2 actor_loss=0.2962 critic_loss=91907232933.1613 entropy=17.6163 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 148380] reward=-121237939.3 actor_loss=0.2792 critic_loss=96114393673.1429 entropy=17.6127 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 148380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-579143.5 mean_steps=13.7
|
|
[Episode 148390] reward=-121843361.9 actor_loss=0.2898 critic_loss=98602051584.0000 entropy=17.6165 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 148400] reward=-119561180.6 actor_loss=0.2904 critic_loss=91820365944.4706 entropy=17.6148 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 148400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563292.8 mean_steps=12.6
|
|
[Episode 148410] reward=-113346265.3 actor_loss=0.3226 critic_loss=89489860900.5714 entropy=17.6116 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 148420] reward=-118751956.5 actor_loss=0.3566 critic_loss=92130385637.5172 entropy=17.5979 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 148420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489763.1 mean_steps=13.8
|
|
[Episode 148430] reward=-117974587.1 actor_loss=0.4124 critic_loss=92222889020.2353 entropy=17.5891 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 148440] reward=-120926362.5 actor_loss=0.2456 critic_loss=129662517854.8148 entropy=17.5915 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 148440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-422025.7 mean_steps=14.3
|
|
[Episode 148450] reward=-123099378.0 actor_loss=0.2729 critic_loss=96077077969.4545 entropy=17.6055 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 148460] reward=-121146110.8 actor_loss=0.2093 critic_loss=93295140864.0000 entropy=17.6058 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 148460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528556.3 mean_steps=13.3
|
|
[Episode 148470] reward=-120066268.8 actor_loss=0.2835 critic_loss=90985555365.6471 entropy=17.5828 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 148480] reward=-119751210.8 actor_loss=0.3361 critic_loss=91216486831.1579 entropy=17.5750 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 148480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-541423.6 mean_steps=12.2
|
|
[Episode 148490] reward=-119008440.7 actor_loss=0.2290 critic_loss=92491521852.9524 entropy=17.5953 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 148500] reward=-120729517.2 actor_loss=0.2811 critic_loss=94693594112.0000 entropy=17.5984 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 148500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560797.0 mean_steps=13.6
|
|
[Episode 148510] reward=-121954628.4 actor_loss=0.2214 critic_loss=96832929792.0000 entropy=17.6101 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 148520] reward=-121085362.5 actor_loss=0.3143 critic_loss=113934173070.2222 entropy=17.6237 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 148520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-606262.5 mean_steps=13.8
|
|
[Episode 148530] reward=-120407273.2 actor_loss=0.3406 critic_loss=93292576768.0000 entropy=17.6462 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 148540] reward=-117512854.0 actor_loss=0.3136 critic_loss=90957123716.1290 entropy=17.6489 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 148540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573019.4 mean_steps=12.8
|
|
[Episode 148550] reward=-120405668.6 actor_loss=0.3628 critic_loss=91146825636.9778 entropy=17.6492 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 148560] reward=-119471354.5 actor_loss=0.2957 critic_loss=88288433493.3333 entropy=17.6454 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 148560] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-365783.6 mean_steps=16.6
|
|
[Episode 148570] reward=-119392653.9 actor_loss=0.2711 critic_loss=97781785486.2222 entropy=17.6403 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 148580] reward=-117595353.5 actor_loss=0.2787 critic_loss=96927930368.0000 entropy=17.6432 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 148580] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-663928.7 mean_steps=10.6
|
|
[Episode 148590] reward=-118069538.7 actor_loss=0.2370 critic_loss=86917157809.2308 entropy=17.6552 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 148600] reward=-116073075.8 actor_loss=0.2579 critic_loss=88555898526.8965 entropy=17.6312 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 148600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416319.9 mean_steps=14.2
|
|
[Episode 148610] reward=-116266241.0 actor_loss=0.3487 critic_loss=89457106501.1892 entropy=17.6227 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 148620] reward=-118950653.6 actor_loss=0.2319 critic_loss=89307770060.8000 entropy=17.6246 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 148620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-504334.0 mean_steps=13.3
|
|
[Episode 148630] reward=-120136846.4 actor_loss=0.2631 critic_loss=94949489049.6000 entropy=17.6192 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 148640] reward=-115409589.2 actor_loss=0.3476 critic_loss=107273923098.9474 entropy=17.6149 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 148640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507329.2 mean_steps=13.1
|
|
[Episode 148650] reward=-119449059.3 actor_loss=0.2754 critic_loss=91751628055.2727 entropy=17.6030 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 148660] reward=-123156947.2 actor_loss=0.2163 critic_loss=92698382468.1290 entropy=17.6091 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 148660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557998.2 mean_steps=13.3
|
|
[Episode 148670] reward=-115458915.0 actor_loss=0.2861 critic_loss=93128953036.8000 entropy=17.6181 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 148680] reward=-116978419.1 actor_loss=0.3020 critic_loss=92972710297.6000 entropy=17.6226 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 148680] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-683578.3 mean_steps=10.8
|
|
[Episode 148690] reward=-123228822.0 actor_loss=0.3080 critic_loss=95322777008.3556 entropy=17.6096 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 148700] reward=-116664340.4 actor_loss=0.3632 critic_loss=87694058564.2667 entropy=17.6321 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 148700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491199.8 mean_steps=13.9
|
|
[Episode 148710] reward=-121838263.9 actor_loss=0.3142 critic_loss=95530637175.4667 entropy=17.6166 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 148720] reward=-123073756.0 actor_loss=0.3052 critic_loss=145827618087.8222 entropy=17.6380 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 148720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571146.6 mean_steps=13.2
|
|
[Episode 148730] reward=-131862526.8 actor_loss=0.2244 critic_loss=547759166441.2444 entropy=17.6381 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 148740] reward=-120227463.8 actor_loss=0.3414 critic_loss=93488977419.3778 entropy=17.6545 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 148740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642587.7 mean_steps=13.2
|
|
[Episode 148750] reward=-124038144.6 actor_loss=0.2232 critic_loss=95408375216.3556 entropy=17.6500 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 148760] reward=-117914737.8 actor_loss=0.2645 critic_loss=88699489379.0968 entropy=17.6503 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 148760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-381433.0 mean_steps=15.1
|
|
[Episode 148770] reward=-121934747.1 actor_loss=0.3248 critic_loss=94775174394.3111 entropy=17.6635 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 148780] reward=-118990021.4 actor_loss=0.2609 critic_loss=97896880882.5263 entropy=17.6629 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 148780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426349.3 mean_steps=14.2
|
|
[Episode 148790] reward=-121949643.4 actor_loss=0.2196 critic_loss=100299424736.9697 entropy=17.6614 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 148800] reward=-120044076.2 actor_loss=0.4982 critic_loss=94382662451.2000 entropy=17.6595 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 148800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517425.8 mean_steps=15.0
|
|
[Episode 148810] reward=-118636343.0 actor_loss=0.3514 critic_loss=91074004185.2121 entropy=17.6602 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 148820] reward=-121204507.2 actor_loss=0.2617 critic_loss=99988178534.4000 entropy=17.6693 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 148820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597644.8 mean_steps=12.5
|
|
[Episode 148830] reward=-120680596.2 actor_loss=0.2035 critic_loss=90702781982.1176 entropy=17.6630 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 148840] reward=-119413326.5 actor_loss=0.3148 critic_loss=92566308278.8571 entropy=17.6619 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 148840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464667.3 mean_steps=13.7
|
|
[Episode 148850] reward=-120997960.9 actor_loss=0.4563 critic_loss=106699419648.0000 entropy=17.6642 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 148860] reward=-123008814.8 actor_loss=0.2763 critic_loss=97642058974.6087 entropy=17.6591 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 148860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415401.3 mean_steps=15.9
|
|
[Episode 148870] reward=-115582943.8 actor_loss=0.2544 critic_loss=89026967221.6774 entropy=17.6558 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 148880] reward=-123112750.6 actor_loss=0.1673 critic_loss=94125112173.7143 entropy=17.6666 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 148880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423727.9 mean_steps=14.9
|
|
[Episode 148890] reward=-119999873.0 actor_loss=0.2749 critic_loss=95669141876.3636 entropy=17.6785 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 148900] reward=-115474486.8 actor_loss=0.2924 critic_loss=89622608812.9730 entropy=17.6824 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 148900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-373053.9 mean_steps=15.8
|
|
[Episode 148910] reward=-121583333.2 actor_loss=0.2500 critic_loss=92148564309.3333 entropy=17.6723 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 148920] reward=-121046900.0 actor_loss=0.2511 critic_loss=90701404783.3044 entropy=17.6719 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 148920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-597960.3 mean_steps=13.5
|
|
[Episode 148930] reward=-121353778.6 actor_loss=0.2939 critic_loss=94966919714.1333 entropy=17.6770 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 148940] reward=-120689959.7 actor_loss=0.1792 critic_loss=94480686489.6000 entropy=17.6810 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 148940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-395906.7 mean_steps=13.9
|
|
[Episode 148950] reward=-119024474.1 actor_loss=0.2658 critic_loss=90823264574.5778 entropy=17.6644 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 148960] reward=-115516145.7 actor_loss=0.3792 critic_loss=89998348379.0222 entropy=17.6604 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 148960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476639.0 mean_steps=13.7
|
|
[Episode 148970] reward=-120906529.9 actor_loss=0.2137 critic_loss=92222647864.8889 entropy=17.6684 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 148980] reward=-121172656.8 actor_loss=0.2642 critic_loss=93177598771.2000 entropy=17.6667 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 148980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535134.9 mean_steps=13.5
|
|
[Episode 148990] reward=-119526234.5 actor_loss=0.3222 critic_loss=92988065694.4762 entropy=17.6690 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 149000] reward=-122904299.4 actor_loss=0.2891 critic_loss=94391972864.0000 entropy=17.6743 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 149000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572552.6 mean_steps=13.2
|
|
[Episode 149010] reward=-116615245.6 actor_loss=0.2670 critic_loss=92634937806.4516 entropy=17.6769 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 149020] reward=-119233302.5 actor_loss=0.3013 critic_loss=99828039424.0000 entropy=17.6725 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 149020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479441.0 mean_steps=13.6
|
|
[Episode 149030] reward=-123213162.0 actor_loss=0.3566 critic_loss=107868922220.0889 entropy=17.6782 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 149040] reward=-121532400.2 actor_loss=0.3031 critic_loss=100821122744.3200 entropy=17.6726 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 149040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555164.7 mean_steps=12.5
|
|
[Episode 149050] reward=-122851571.1 actor_loss=0.3132 critic_loss=100304012524.3077 entropy=17.6692 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 149060] reward=-116129938.6 actor_loss=0.3847 critic_loss=91206717635.0476 entropy=17.6633 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 149060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513170.5 mean_steps=13.9
|
|
[Episode 149070] reward=-117453526.6 actor_loss=0.2435 critic_loss=90243558701.1765 entropy=17.6666 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 149080] reward=-122623336.2 actor_loss=0.2805 critic_loss=97124665002.6667 entropy=17.6787 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 149080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382507.1 mean_steps=15.9
|
|
[Episode 149090] reward=-119096648.9 actor_loss=0.3775 critic_loss=94049745305.6000 entropy=17.6891 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 149100] reward=-121426896.1 actor_loss=0.2818 critic_loss=90208029718.7556 entropy=17.6884 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 149100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448146.3 mean_steps=14.4
|
|
[Episode 149110] reward=-118774924.5 actor_loss=0.2924 critic_loss=88586074747.5862 entropy=17.6882 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 149120] reward=-119300475.3 actor_loss=0.2056 critic_loss=92128958691.5556 entropy=17.6775 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 149120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514567.0 mean_steps=13.8
|
|
[Episode 149130] reward=-122166537.5 actor_loss=0.2583 critic_loss=97456043956.1481 entropy=17.6795 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 149140] reward=-118371348.2 actor_loss=0.3714 critic_loss=97250035088.6956 entropy=17.6871 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 149140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-559431.3 mean_steps=13.9
|
|
[Episode 149150] reward=-116282424.6 actor_loss=0.3157 critic_loss=90055243414.5882 entropy=17.6874 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 149160] reward=-120707483.9 actor_loss=0.4095 critic_loss=94371302400.0000 entropy=17.6945 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1536 front_blocked=0
|
|
[Eval 149160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613786.0 mean_steps=12.8
|
|
[Episode 149170] reward=-123886031.5 actor_loss=0.2290 critic_loss=95229634939.2593 entropy=17.6944 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 149180] reward=-121159269.7 actor_loss=0.2391 critic_loss=88910035717.6889 entropy=17.6976 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 149180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-406650.5 mean_steps=16.8
|
|
[Episode 149190] reward=-119603240.0 actor_loss=0.2337 critic_loss=90392946980.5714 entropy=17.6951 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 149200] reward=-117509774.0 actor_loss=0.3036 critic_loss=90783871622.7368 entropy=17.6807 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 149200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-362508.7 mean_steps=15.4
|
|
[Episode 149210] reward=-113349760.0 actor_loss=0.3754 critic_loss=94039123700.8696 entropy=17.6801 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 149220] reward=-120701664.8 actor_loss=0.3861 critic_loss=92647885677.7143 entropy=17.6725 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 149220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-625784.9 mean_steps=11.8
|
|
[Episode 149230] reward=-120649003.7 actor_loss=0.2267 critic_loss=98120501020.4444 entropy=17.6643 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 149240] reward=-121294779.5 actor_loss=0.2557 critic_loss=94161453918.3158 entropy=17.6521 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 149240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-670494.9 mean_steps=13.0
|
|
[Episode 149250] reward=-120491174.2 actor_loss=0.3461 critic_loss=93178581577.1429 entropy=17.6518 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 149260] reward=-119245191.6 actor_loss=0.2435 critic_loss=95185204346.8800 entropy=17.6421 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 149260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-622773.4 mean_steps=12.7
|
|
[Episode 149270] reward=-116671440.6 actor_loss=0.3918 critic_loss=87386129461.8947 entropy=17.6399 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 149280] reward=-118826551.7 actor_loss=0.2681 critic_loss=94827841536.0000 entropy=17.6420 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 149280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500985.6 mean_steps=13.7
|
|
[Episode 149290] reward=-121489956.5 actor_loss=0.2877 critic_loss=94586241816.7742 entropy=17.6369 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 149300] reward=-123532040.8 actor_loss=0.2810 critic_loss=93843709207.2727 entropy=17.6439 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 149300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497786.1 mean_steps=14.4
|
|
[Episode 149310] reward=-114191589.1 actor_loss=0.2904 critic_loss=89746130873.3793 entropy=17.6403 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 149320] reward=-118071495.1 actor_loss=0.2878 critic_loss=89667787214.4516 entropy=17.6455 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 149320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-484063.7 mean_steps=15.3
|
|
[Episode 149330] reward=-115872081.4 actor_loss=0.3326 critic_loss=88142739683.5556 entropy=17.6579 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 149340] reward=-121071875.7 actor_loss=0.3608 critic_loss=95108639877.5652 entropy=17.6654 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 149340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-504444.7 mean_steps=15.3
|
|
[Episode 149350] reward=-124726492.3 actor_loss=0.2720 critic_loss=96598888024.2759 entropy=17.6654 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 149360] reward=-122930400.8 actor_loss=0.2535 critic_loss=95077493838.7692 entropy=17.6565 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 149360] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-708154.7 mean_steps=11.4
|
|
[Episode 149370] reward=-115975678.7 actor_loss=0.3489 critic_loss=89515439217.7778 entropy=17.6688 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 149380] reward=-121266033.4 actor_loss=0.2835 critic_loss=95703595008.0000 entropy=17.6854 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 149380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521032.7 mean_steps=14.7
|
|
[Episode 149390] reward=-117055861.9 actor_loss=0.3105 critic_loss=91991409900.3077 entropy=17.6857 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 149400] reward=-122227704.9 actor_loss=0.3311 critic_loss=93738831730.7586 entropy=17.6790 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 149400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507746.0 mean_steps=13.8
|
|
[Episode 149410] reward=-118992423.4 actor_loss=0.2339 critic_loss=87986571625.4118 entropy=17.6813 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 149420] reward=-116106250.9 actor_loss=0.2702 critic_loss=95034524558.2222 entropy=17.6833 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 149420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463894.1 mean_steps=15.4
|
|
[Episode 149430] reward=-124851300.3 actor_loss=0.2926 critic_loss=97703901047.4667 entropy=17.6973 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 149440] reward=-114012278.1 actor_loss=0.3825 critic_loss=90947106133.3333 entropy=17.7036 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 149440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408068.8 mean_steps=15.1
|
|
[Episode 149450] reward=-119636107.5 actor_loss=0.3584 critic_loss=94139401648.3556 entropy=17.7035 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 149460] reward=-117922075.1 actor_loss=0.2986 critic_loss=89591332499.9111 entropy=17.7049 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 149460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-466482.1 mean_steps=13.6
|
|
[Episode 149470] reward=-116617248.2 actor_loss=0.2695 critic_loss=89294583710.4762 entropy=17.7010 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 149480] reward=-115320505.2 actor_loss=0.2661 critic_loss=92502897346.2069 entropy=17.7052 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 149480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-314263.5 mean_steps=16.4
|
|
[Episode 149490] reward=-122338545.4 actor_loss=0.2308 critic_loss=100897333794.1333 entropy=17.6873 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 149500] reward=-118778075.5 actor_loss=0.3532 critic_loss=96502107522.8445 entropy=17.6989 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 149500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-386994.7 mean_steps=15.1
|
|
[Episode 149510] reward=-114478435.1 actor_loss=0.3347 critic_loss=90582093732.9778 entropy=17.6783 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 149520] reward=-120939143.5 actor_loss=0.2888 critic_loss=96965488640.0000 entropy=17.6667 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 149520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421806.6 mean_steps=15.3
|
|
[Episode 149530] reward=-117515666.8 actor_loss=0.3474 critic_loss=91107209801.1429 entropy=17.6739 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 149540] reward=-117050110.2 actor_loss=0.3019 critic_loss=98373167642.9474 entropy=17.6806 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 149540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-607030.1 mean_steps=13.6
|
|
[Episode 149550] reward=-121287983.7 actor_loss=0.1717 critic_loss=90660290924.0889 entropy=17.6874 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 149560] reward=-119555023.7 actor_loss=0.3582 critic_loss=94081522892.8000 entropy=17.7024 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 149560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-558743.9 mean_steps=12.3
|
|
[Episode 149570] reward=-116741320.9 actor_loss=0.3566 critic_loss=91076983921.7778 entropy=17.7317 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 149580] reward=-115528916.2 actor_loss=0.3453 critic_loss=97673110032.5161 entropy=17.7404 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 149580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521153.3 mean_steps=15.0
|
|
[Episode 149590] reward=-117164616.8 actor_loss=0.2588 critic_loss=88029863936.0000 entropy=17.7259 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 149600] reward=-111452207.9 actor_loss=0.2378 critic_loss=85701497287.1111 entropy=17.7274 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 149600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450296.3 mean_steps=15.6
|
|
[Episode 149610] reward=-121101111.0 actor_loss=0.1893 critic_loss=90626307231.2889 entropy=17.7199 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 149620] reward=-124251569.7 actor_loss=0.2023 critic_loss=93237224880.3556 entropy=17.7109 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 149620] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632124.9 mean_steps=12.4
|
|
[Episode 149630] reward=-119233912.1 actor_loss=0.3347 critic_loss=95140756305.1707 entropy=17.7119 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 149640] reward=-118789702.7 actor_loss=0.2615 critic_loss=92950957955.8788 entropy=17.6855 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 149640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412136.5 mean_steps=15.3
|
|
[Episode 149650] reward=-118735717.5 actor_loss=0.2600 critic_loss=89938704384.0000 entropy=17.6644 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 149660] reward=-120212597.6 actor_loss=0.2838 critic_loss=91988399340.3077 entropy=17.6552 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 149660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453982.5 mean_steps=14.3
|
|
[Episode 149670] reward=-117153488.8 actor_loss=0.3534 critic_loss=88149546689.4222 entropy=17.6663 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 149680] reward=-118185550.6 actor_loss=0.3249 critic_loss=91502965555.2000 entropy=17.6696 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 149680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536399.6 mean_steps=14.0
|
|
[Episode 149690] reward=-119187923.5 actor_loss=0.2725 critic_loss=87162262323.2000 entropy=17.6510 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 149700] reward=-110779765.7 actor_loss=0.2703 critic_loss=83185324800.0000 entropy=17.6615 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 149700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-374758.2 mean_steps=16.9
|
|
[Episode 149710] reward=-118522245.1 actor_loss=0.2699 critic_loss=92072985338.0465 entropy=17.6683 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 149720] reward=-120690816.1 actor_loss=0.2008 critic_loss=96467718963.2000 entropy=17.6827 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 149720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363560.2 mean_steps=15.6
|
|
[Episode 149730] reward=-123663882.7 actor_loss=0.2693 critic_loss=94488261245.1555 entropy=17.7117 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 149740] reward=-125769833.7 actor_loss=0.3353 critic_loss=101183148331.7073 entropy=17.7244 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 149740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-576243.8 mean_steps=12.8
|
|
[Episode 149750] reward=-125499224.9 actor_loss=0.2317 critic_loss=96635271395.5556 entropy=17.7138 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 149760] reward=-116547103.9 actor_loss=0.3546 critic_loss=86257441723.7333 entropy=17.7149 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 149760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492526.4 mean_steps=13.6
|
|
[Episode 149770] reward=-122168567.0 actor_loss=0.2907 critic_loss=92806627072.0000 entropy=17.7027 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 149780] reward=-117244912.5 actor_loss=0.3305 critic_loss=92482818275.5556 entropy=17.6983 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 149780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508977.2 mean_steps=13.8
|
|
[Episode 149790] reward=-121707058.4 actor_loss=0.2314 critic_loss=88641913378.1333 entropy=17.6791 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 149800] reward=-119021761.6 actor_loss=0.3830 critic_loss=96586297439.2558 entropy=17.6943 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 149800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603444.6 mean_steps=12.4
|
|
[Episode 149810] reward=-118531081.7 actor_loss=0.2622 critic_loss=96662013453.8378 entropy=17.6863 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 149820] reward=-120366053.1 actor_loss=0.2420 critic_loss=93557590528.0000 entropy=17.6856 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 149820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369999.6 mean_steps=15.7
|
|
[Episode 149830] reward=-120648259.7 actor_loss=0.2764 critic_loss=97541070665.9556 entropy=17.6844 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 149840] reward=-119374147.6 actor_loss=0.2755 critic_loss=91656284733.4400 entropy=17.7110 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 149840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495750.3 mean_steps=14.8
|
|
[Episode 149850] reward=-121028255.6 actor_loss=0.2855 critic_loss=96037340774.4000 entropy=17.7059 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 149860] reward=-116851354.2 actor_loss=0.4131 critic_loss=88923365011.9111 entropy=17.7012 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 149860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-596140.6 mean_steps=11.6
|
|
[Episode 149870] reward=-121653854.5 actor_loss=0.2546 critic_loss=92623252205.2683 entropy=17.6970 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 149880] reward=-120268864.1 actor_loss=0.1475 critic_loss=92427412912.3556 entropy=17.6991 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 149880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480247.8 mean_steps=13.7
|
|
[Episode 149890] reward=-116488673.1 actor_loss=0.2493 critic_loss=92497086600.5333 entropy=17.7068 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 149900] reward=-121161338.2 actor_loss=0.3189 critic_loss=92225966535.1111 entropy=17.7042 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 149900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585355.6 mean_steps=12.5
|
|
[Episode 149910] reward=-122987790.5 actor_loss=0.1805 critic_loss=95527855197.0909 entropy=17.7067 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 149920] reward=-122020512.6 actor_loss=0.4344 critic_loss=96374130625.9394 entropy=17.7005 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 149920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443893.7 mean_steps=14.4
|
|
[Episode 149930] reward=-116462112.1 actor_loss=0.2384 critic_loss=89219698073.6000 entropy=17.6912 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 149940] reward=-123041601.6 actor_loss=0.2857 critic_loss=100883890176.0000 entropy=17.6895 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 149940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371220.3 mean_steps=15.8
|
|
[Episode 149950] reward=-120292663.3 actor_loss=0.3002 critic_loss=100280682543.6279 entropy=17.6785 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 149960] reward=-120261984.4 actor_loss=0.3998 critic_loss=92262288770.8445 entropy=17.6644 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1530 front_blocked=0
|
|
[Eval 149960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-519075.1 mean_steps=15.7
|
|
[Episode 149970] reward=-121877532.7 actor_loss=0.3290 critic_loss=93070863200.7111 entropy=17.6664 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 149980] reward=-122897537.6 actor_loss=0.2053 critic_loss=97313243400.2581 entropy=17.6702 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 149980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-376735.0 mean_steps=15.1
|
|
[Episode 149990] reward=-121908704.9 actor_loss=0.3251 critic_loss=97863278396.9524 entropy=17.6668 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 150000] reward=-119500093.8 actor_loss=0.3024 critic_loss=102429871986.7586 entropy=17.6628 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 150000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572007.4 mean_steps=12.4
|
|
[Episode 150010] reward=-122557288.1 actor_loss=0.2393 critic_loss=98089247488.0000 entropy=17.6708 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 150020] reward=-118907634.2 actor_loss=0.3125 critic_loss=90383257786.1818 entropy=17.6805 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 150020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555795.2 mean_steps=12.4
|
|
[Episode 150030] reward=-125308480.4 actor_loss=0.3638 critic_loss=98620865467.7333 entropy=17.6779 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 150040] reward=-124177870.4 actor_loss=0.2890 critic_loss=128419239838.4762 entropy=17.6643 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 150040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-469650.0 mean_steps=15.4
|
|
[Episode 150050] reward=-125272014.9 actor_loss=0.2767 critic_loss=173873935042.2069 entropy=17.6541 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 150060] reward=-114224600.8 actor_loss=0.3303 critic_loss=85622530958.2222 entropy=17.6618 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 150060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505701.3 mean_steps=12.9
|
|
[Episode 150070] reward=-118404465.1 actor_loss=0.3414 critic_loss=90249280275.6923 entropy=17.6675 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 150080] reward=-115363490.8 actor_loss=0.3271 critic_loss=86788505317.5172 entropy=17.6556 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 150080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-428728.6 mean_steps=14.3
|
|
[Episode 150090] reward=-123255711.4 actor_loss=0.2527 critic_loss=112604724906.6667 entropy=17.6560 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 150100] reward=-123218704.3 actor_loss=0.2925 critic_loss=96646116303.2381 entropy=17.6615 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 150100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-363059.4 mean_steps=14.8
|
|
[Episode 150110] reward=-113692936.8 actor_loss=0.3188 critic_loss=89019210725.7436 entropy=17.6528 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 150120] reward=-115168791.9 actor_loss=0.3292 critic_loss=90491710902.8571 entropy=17.6485 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 150120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484596.6 mean_steps=13.7
|
|
[Episode 150130] reward=-118200423.8 actor_loss=0.2217 critic_loss=114071998368.7442 entropy=17.6356 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 150140] reward=-119392248.6 actor_loss=0.2363 critic_loss=92074535506.5806 entropy=17.6510 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 150140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445502.3 mean_steps=15.2
|
|
[Episode 150150] reward=-126380750.5 actor_loss=0.2965 critic_loss=96846194278.4000 entropy=17.6480 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 150160] reward=-118695191.9 actor_loss=0.2497 critic_loss=97829376890.4348 entropy=17.6469 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 150160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-347434.0 mean_steps=16.8
|
|
[Episode 150170] reward=-119094293.8 actor_loss=0.2655 critic_loss=94839531110.4000 entropy=17.6337 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 150180] reward=-115651773.4 actor_loss=0.3514 critic_loss=89651151376.5161 entropy=17.6318 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 150180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-610067.4 mean_steps=13.6
|
|
[Episode 150190] reward=-120746631.5 actor_loss=0.3087 critic_loss=90549859123.2000 entropy=17.6323 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 150200] reward=-121892434.9 actor_loss=0.2118 critic_loss=96178090604.6061 entropy=17.6338 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 150200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444146.9 mean_steps=15.3
|
|
[Episode 150210] reward=-122043188.0 actor_loss=0.3936 critic_loss=101604474880.0000 entropy=17.6267 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 150220] reward=-121247458.2 actor_loss=0.1766 critic_loss=88043531291.6757 entropy=17.6410 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 150220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-402176.4 mean_steps=14.7
|
|
[Episode 150230] reward=-114342760.6 actor_loss=0.2575 critic_loss=90536395662.2222 entropy=17.6250 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 150240] reward=-124264168.2 actor_loss=0.2936 critic_loss=92909722597.7436 entropy=17.6272 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 150240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-635909.7 mean_steps=13.0
|
|
[Episode 150250] reward=-118636436.9 actor_loss=0.2821 critic_loss=93970343058.2857 entropy=17.6450 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 150260] reward=-122000269.2 actor_loss=0.4038 critic_loss=93291774162.0513 entropy=17.6392 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 150260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396054.0 mean_steps=16.3
|
|
[Episode 150270] reward=-119234830.6 actor_loss=0.2418 critic_loss=89171874611.2000 entropy=17.6491 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 150280] reward=-122798715.4 actor_loss=0.1932 critic_loss=92516303758.2222 entropy=17.6631 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 150280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484881.4 mean_steps=14.6
|
|
[Episode 150290] reward=-116772466.0 actor_loss=0.3162 critic_loss=91854171415.2727 entropy=17.6578 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 150300] reward=-117543551.4 actor_loss=0.3357 critic_loss=91745708441.6000 entropy=17.6684 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 150300] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-394488.1 mean_steps=16.9
|
|
[Episode 150310] reward=-120004135.9 actor_loss=0.1890 critic_loss=95982926803.4783 entropy=17.6840 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 150320] reward=-117479756.3 actor_loss=0.3108 critic_loss=89941101408.7111 entropy=17.6761 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 150320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546620.9 mean_steps=12.9
|
|
[Episode 150330] reward=-116698846.4 actor_loss=0.2987 critic_loss=88096689356.8000 entropy=17.6816 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 150340] reward=-123692836.0 actor_loss=0.3142 critic_loss=96769264718.7692 entropy=17.6828 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 150340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-471327.4 mean_steps=15.4
|
|
[Episode 150350] reward=-123375551.5 actor_loss=0.2725 critic_loss=91420042922.6667 entropy=17.6677 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 150360] reward=-117842087.6 actor_loss=0.3093 critic_loss=91074362299.7333 entropy=17.6724 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 150360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-568135.0 mean_steps=13.9
|
|
[Episode 150370] reward=-128604306.4 actor_loss=0.2991 critic_loss=502915430676.7568 entropy=17.6767 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 150380] reward=-118759092.6 actor_loss=0.2842 critic_loss=92959813983.0857 entropy=17.6773 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 150380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463988.1 mean_steps=15.2
|
|
[Episode 150390] reward=-129342204.6 actor_loss=0.1642 critic_loss=102458896865.8824 entropy=17.6850 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 150400] reward=-124238787.3 actor_loss=0.2822 critic_loss=93988616533.3333 entropy=17.6913 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 150400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469463.2 mean_steps=14.7
|
|
[Episode 150410] reward=-120043038.1 actor_loss=0.3046 critic_loss=99046642399.1795 entropy=17.7027 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 150420] reward=-118767100.4 actor_loss=0.3022 critic_loss=91930709385.8462 entropy=17.7138 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 150420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527185.0 mean_steps=13.1
|
|
[Episode 150430] reward=-123617484.0 actor_loss=0.3504 critic_loss=94178970828.8000 entropy=17.6961 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 150440] reward=-116251654.6 actor_loss=0.3718 critic_loss=85483793885.8667 entropy=17.7038 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 150440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522451.8 mean_steps=13.8
|
|
[Episode 150450] reward=-115718798.9 actor_loss=0.2689 critic_loss=87448826801.2308 entropy=17.6893 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 150460] reward=-120356722.6 actor_loss=0.2477 critic_loss=90906468724.3636 entropy=17.6738 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 150460] success_rate=0.750 qp_infeasible_rate=0.250 mean_return=-220507.0 mean_steps=19.1
|
|
[Episode 150470] reward=-120777433.6 actor_loss=0.2274 critic_loss=99285031424.0000 entropy=17.6681 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 150480] reward=-122566629.0 actor_loss=0.2991 critic_loss=92003921237.3333 entropy=17.6729 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 150480] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-622176.3 mean_steps=11.2
|
|
[Episode 150490] reward=-121121735.1 actor_loss=0.2619 critic_loss=89470873941.3333 entropy=17.6818 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 150500] reward=-121572614.2 actor_loss=0.2639 critic_loss=98244224848.4571 entropy=17.6843 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 150500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463096.8 mean_steps=14.2
|
|
[Episode 150510] reward=-123713960.1 actor_loss=0.2383 critic_loss=92335864527.5676 entropy=17.6926 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 150520] reward=-119188322.2 actor_loss=0.4003 critic_loss=95603932569.6000 entropy=17.6820 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 150520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517834.0 mean_steps=14.8
|
|
[Episode 150530] reward=-120416619.8 actor_loss=0.3254 critic_loss=99284947399.1111 entropy=17.6745 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 150540] reward=-118661098.5 actor_loss=0.3010 critic_loss=91162082157.7143 entropy=17.6705 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 150540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511536.4 mean_steps=13.8
|
|
[Episode 150550] reward=-119799710.5 actor_loss=0.3310 critic_loss=92516463047.1111 entropy=17.6820 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 150560] reward=-123284287.1 actor_loss=0.3691 critic_loss=98819422253.5111 entropy=17.6859 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 150560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516624.0 mean_steps=13.8
|
|
[Episode 150570] reward=-121394437.1 actor_loss=0.3757 critic_loss=94227235284.1143 entropy=17.6751 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 150580] reward=-117148622.1 actor_loss=0.3759 critic_loss=87385679313.4545 entropy=17.6713 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 150580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-496488.1 mean_steps=12.8
|
|
[Episode 150590] reward=-119291160.9 actor_loss=0.4539 critic_loss=96841949983.2195 entropy=17.6716 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1530 front_blocked=0
|
|
[Episode 150600] reward=-115808936.3 actor_loss=0.3080 critic_loss=94334422616.2759 entropy=17.6711 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 150600] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-263543.3 mean_steps=17.9
|
|
[Episode 150610] reward=-122234006.2 actor_loss=0.2781 critic_loss=96769402652.4444 entropy=17.6740 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 150620] reward=-117123211.7 actor_loss=0.2412 critic_loss=86884961157.1200 entropy=17.6746 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 150620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578061.4 mean_steps=12.3
|
|
[Episode 150630] reward=-117375035.6 actor_loss=0.4397 critic_loss=91354656768.0000 entropy=17.6582 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 150640] reward=-116463117.9 actor_loss=0.3642 critic_loss=86952746263.2727 entropy=17.6627 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 150640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-406326.4 mean_steps=14.2
|
|
[Episode 150650] reward=-108759655.9 actor_loss=0.3701 critic_loss=86467953095.1111 entropy=17.6579 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 150660] reward=-121445743.3 actor_loss=0.3642 critic_loss=91366097106.0513 entropy=17.6652 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 150660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596240.2 mean_steps=13.6
|
|
[Episode 150670] reward=-122548140.8 actor_loss=0.2376 critic_loss=118161260953.6000 entropy=17.6601 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 150680] reward=-123744549.1 actor_loss=0.1289 critic_loss=100018755907.3684 entropy=17.6652 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 150680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-345476.7 mean_steps=15.6
|
|
[Episode 150690] reward=-120144829.3 actor_loss=0.2323 critic_loss=105727537524.3636 entropy=17.6648 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 150700] reward=-123429335.3 actor_loss=0.2042 critic_loss=90672063110.7368 entropy=17.6606 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 150700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556345.1 mean_steps=14.0
|
|
[Episode 150710] reward=-122097799.7 actor_loss=0.3016 critic_loss=99976269088.8205 entropy=17.6502 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 150720] reward=-122729827.2 actor_loss=0.2632 critic_loss=93090969442.4615 entropy=17.6350 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 150720] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-734790.1 mean_steps=9.6
|
|
[Episode 150730] reward=-119288015.7 actor_loss=0.2022 critic_loss=86328605354.6667 entropy=17.6126 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 150740] reward=-120238545.8 actor_loss=0.3837 critic_loss=90434962773.3333 entropy=17.6163 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 150740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-605964.8 mean_steps=13.3
|
|
[Episode 150750] reward=-122301867.8 actor_loss=0.2678 critic_loss=95238281286.6207 entropy=17.6211 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 150760] reward=-122193795.6 actor_loss=0.2555 critic_loss=92723362669.7143 entropy=17.6209 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 150760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491925.7 mean_steps=14.5
|
|
[Episode 150770] reward=-117385180.8 actor_loss=0.3477 critic_loss=94802693066.1053 entropy=17.6331 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 150780] reward=-120922535.6 actor_loss=0.2383 critic_loss=92663093657.6000 entropy=17.6358 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 150780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450163.0 mean_steps=15.2
|
|
[Episode 150790] reward=-116241011.9 actor_loss=0.3528 critic_loss=90200173772.8000 entropy=17.6464 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 150800] reward=-122480945.3 actor_loss=0.2665 critic_loss=93672847883.9070 entropy=17.6512 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 150800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-427323.9 mean_steps=16.9
|
|
[Episode 150810] reward=-117659490.5 actor_loss=0.2623 critic_loss=91424791630.7692 entropy=17.6527 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 150820] reward=-117128896.8 actor_loss=0.3169 critic_loss=86899398337.4222 entropy=17.6500 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 150820] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-319865.2 mean_steps=17.4
|
|
[Episode 150830] reward=-117281049.5 actor_loss=0.3545 critic_loss=91206816450.2069 entropy=17.6330 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 150840] reward=-123294929.2 actor_loss=0.2383 critic_loss=98807217356.8000 entropy=17.6411 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 150840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476366.5 mean_steps=14.4
|
|
[Episode 150850] reward=-124733469.7 actor_loss=0.1730 critic_loss=93848115404.8000 entropy=17.6246 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 150860] reward=-123369028.3 actor_loss=0.3505 critic_loss=98324802036.6222 entropy=17.6314 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 150860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557128.7 mean_steps=12.9
|
|
[Episode 150870] reward=-111361555.9 actor_loss=0.2818 critic_loss=85556370743.6522 entropy=17.6247 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 150880] reward=-117403246.3 actor_loss=0.2836 critic_loss=91578155752.7273 entropy=17.6192 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 150880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462148.7 mean_steps=14.7
|
|
[Episode 150890] reward=-121524707.7 actor_loss=0.1770 critic_loss=91555092187.4286 entropy=17.6354 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 150900] reward=-119190582.8 actor_loss=0.3343 critic_loss=91388962673.1163 entropy=17.6465 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 150900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537735.7 mean_steps=14.2
|
|
[Episode 150910] reward=-123637110.7 actor_loss=0.2727 critic_loss=95970349590.2609 entropy=17.6635 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 150920] reward=-124801431.1 actor_loss=0.2510 critic_loss=99915859332.4138 entropy=17.6637 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 150920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438699.5 mean_steps=15.1
|
|
[Episode 150930] reward=-123464184.1 actor_loss=0.3487 critic_loss=99273707835.0769 entropy=17.6696 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 150940] reward=-118457907.6 actor_loss=0.3634 critic_loss=92462231336.4211 entropy=17.6719 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 150940] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-729894.2 mean_steps=11.7
|
|
[Episode 150950] reward=-123364744.2 actor_loss=0.3264 critic_loss=100110492254.8148 entropy=17.6774 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 150960] reward=-123739300.2 actor_loss=0.2006 critic_loss=96311217313.6842 entropy=17.6789 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 150960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-474099.9 mean_steps=15.3
|
|
[Episode 150970] reward=-116594825.7 actor_loss=0.2590 critic_loss=90506660770.9091 entropy=17.6705 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 150980] reward=-119968151.3 actor_loss=0.4011 critic_loss=90995549330.2857 entropy=17.6673 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 150980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-525056.0 mean_steps=12.2
|
|
[Episode 150990] reward=-124514215.2 actor_loss=0.2359 critic_loss=94093989751.4667 entropy=17.6696 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 151000] reward=-122954168.2 actor_loss=0.2542 critic_loss=93312130707.9111 entropy=17.6569 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 151000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366207.5 mean_steps=16.2
|
|
[Episode 151010] reward=-122890469.2 actor_loss=0.3176 critic_loss=90158253972.2105 entropy=17.6391 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 151020] reward=-118356260.0 actor_loss=0.3729 critic_loss=93280006963.2000 entropy=17.6477 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 151020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439835.9 mean_steps=15.1
|
|
[Episode 151030] reward=-122384613.5 actor_loss=0.2592 critic_loss=94828530944.0000 entropy=17.6485 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 151040] reward=-123035535.1 actor_loss=0.2615 critic_loss=90423056294.9565 entropy=17.6529 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 151040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415186.4 mean_steps=15.9
|
|
[Episode 151050] reward=-117606477.4 actor_loss=0.3365 critic_loss=88677754624.0000 entropy=17.6568 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 151060] reward=-116812929.9 actor_loss=0.2411 critic_loss=88027258470.4000 entropy=17.6455 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 151060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-360435.4 mean_steps=16.7
|
|
[Episode 151070] reward=-124421966.4 actor_loss=0.2605 critic_loss=97676969984.0000 entropy=17.6469 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 151080] reward=-114390225.1 actor_loss=0.2740 critic_loss=101087950402.7826 entropy=17.6591 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 151080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454606.4 mean_steps=14.6
|
|
[Episode 151090] reward=-121789861.1 actor_loss=0.3019 critic_loss=93147034624.0000 entropy=17.6545 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 151100] reward=-117102382.6 actor_loss=0.3180 critic_loss=85303411252.9655 entropy=17.6568 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 151100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488405.4 mean_steps=14.7
|
|
[Episode 151110] reward=-118067048.4 actor_loss=0.3730 critic_loss=85045224789.3333 entropy=17.6630 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 151120] reward=-122517628.9 actor_loss=0.2982 critic_loss=95240913640.7273 entropy=17.6542 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 151120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-548621.4 mean_steps=14.4
|
|
[Episode 151130] reward=-115620505.5 actor_loss=0.3246 critic_loss=83240955159.2727 entropy=17.6491 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 151140] reward=-117637892.8 actor_loss=0.2948 critic_loss=94256884004.5714 entropy=17.6474 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 151140] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-649088.7 mean_steps=11.8
|
|
[Episode 151150] reward=-121358774.0 actor_loss=0.3681 critic_loss=92957403243.7895 entropy=17.6632 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 151160] reward=-125853140.0 actor_loss=0.2110 critic_loss=98762465280.0000 entropy=17.6644 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 151160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444761.7 mean_steps=15.6
|
|
[Episode 151170] reward=-122732387.9 actor_loss=0.3418 critic_loss=95676712773.8182 entropy=17.6690 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 151180] reward=-121903295.5 actor_loss=0.2308 critic_loss=95487547994.3529 entropy=17.6830 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 151180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391448.5 mean_steps=15.8
|
|
[Episode 151190] reward=-115948342.1 actor_loss=0.3293 critic_loss=85873028002.9091 entropy=17.6786 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 151200] reward=-120378916.2 actor_loss=0.3294 critic_loss=89020377380.5714 entropy=17.6714 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 151200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503505.5 mean_steps=14.0
|
|
[Episode 151210] reward=-121774366.3 actor_loss=0.3111 critic_loss=94882895579.4286 entropy=17.6580 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 151220] reward=-119342781.2 actor_loss=0.3274 critic_loss=86861951698.8235 entropy=17.6589 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 151220] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-627808.5 mean_steps=11.1
|
|
[Episode 151230] reward=-122356138.9 actor_loss=0.1977 critic_loss=95191987768.8889 entropy=17.6530 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 151240] reward=-118336419.9 actor_loss=0.2022 critic_loss=94762625901.7143 entropy=17.6358 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 151240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-453879.0 mean_steps=16.1
|
|
[Episode 151250] reward=-118119117.4 actor_loss=0.3713 critic_loss=90022617800.3478 entropy=17.6417 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 151260] reward=-121033649.9 actor_loss=0.2862 critic_loss=91474383088.9412 entropy=17.6556 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 151260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421173.0 mean_steps=16.1
|
|
[Episode 151270] reward=-111515757.6 actor_loss=0.3574 critic_loss=89313230848.0000 entropy=17.6538 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 151280] reward=-124258084.9 actor_loss=0.1737 critic_loss=93388895810.7826 entropy=17.6572 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 151280] success_rate=0.750 qp_infeasible_rate=0.250 mean_return=-159029.4 mean_steps=19.1
|
|
[Episode 151290] reward=-122571003.9 actor_loss=0.3103 critic_loss=94708514360.8889 entropy=17.6694 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 151300] reward=-115018324.4 actor_loss=0.3666 critic_loss=94641118061.7143 entropy=17.6643 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 151300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427169.4 mean_steps=15.2
|
|
[Episode 151310] reward=-114112475.8 actor_loss=0.3373 critic_loss=86195725994.6667 entropy=17.6582 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 151320] reward=-124179814.2 actor_loss=0.2760 critic_loss=94378893942.1538 entropy=17.6651 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 151320] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-393897.7 mean_steps=17.9
|
|
[Episode 151330] reward=-118207994.3 actor_loss=0.4249 critic_loss=112232938115.6572 entropy=17.6631 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 151340] reward=-122778625.4 actor_loss=0.2888 critic_loss=93054337609.1429 entropy=17.6681 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 151340] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-719075.6 mean_steps=11.7
|
|
[Episode 151350] reward=-119046746.5 actor_loss=0.3574 critic_loss=93643576320.0000 entropy=17.6642 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 151360] reward=-123784090.6 actor_loss=0.2819 critic_loss=96727694904.8889 entropy=17.6726 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 151360] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-641442.3 mean_steps=11.2
|
|
[Episode 151370] reward=-119729714.7 actor_loss=0.3682 critic_loss=89245648061.6296 entropy=17.6775 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 151380] reward=-122382581.9 actor_loss=0.3493 critic_loss=94167446089.1429 entropy=17.6774 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 151380] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-655708.3 mean_steps=11.6
|
|
[Episode 151390] reward=-122981246.2 actor_loss=0.3384 critic_loss=94789101860.5714 entropy=17.6760 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 151400] reward=-115267168.8 actor_loss=0.3095 critic_loss=85654482053.5652 entropy=17.6802 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 151400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-625824.3 mean_steps=13.1
|
|
[Episode 151410] reward=-122600637.8 actor_loss=0.2731 critic_loss=92698044416.0000 entropy=17.6778 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 151420] reward=-119825598.3 actor_loss=0.3377 critic_loss=90151911424.0000 entropy=17.6763 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 151420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-404098.9 mean_steps=15.3
|
|
[Episode 151430] reward=-119860591.5 actor_loss=0.3184 critic_loss=90496847052.8000 entropy=17.6633 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 151440] reward=-114946269.9 actor_loss=0.3612 critic_loss=86600857442.4615 entropy=17.6618 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 151440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529969.2 mean_steps=12.9
|
|
[Episode 151450] reward=-116387839.7 actor_loss=0.2733 critic_loss=89121093474.4615 entropy=17.6505 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 151460] reward=-118848294.0 actor_loss=0.2403 critic_loss=90313278025.1429 entropy=17.6550 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 151460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580364.8 mean_steps=12.8
|
|
[Episode 151470] reward=-117268822.6 actor_loss=0.2886 critic_loss=89972854935.7037 entropy=17.6550 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 151480] reward=-120116483.6 actor_loss=0.1943 critic_loss=88891135737.4359 entropy=17.6608 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 151480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382524.1 mean_steps=16.2
|
|
[Episode 151490] reward=-119106047.2 actor_loss=0.3325 critic_loss=90565135872.0000 entropy=17.6629 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 151500] reward=-121322127.4 actor_loss=0.3036 critic_loss=93256001126.4000 entropy=17.6901 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 151500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470510.5 mean_steps=14.8
|
|
[Episode 151510] reward=-120329761.7 actor_loss=0.3564 critic_loss=90130651256.4706 entropy=17.6911 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 151520] reward=-121769805.5 actor_loss=0.3081 critic_loss=95149267535.6444 entropy=17.6908 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 151520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538082.5 mean_steps=13.0
|
|
[Episode 151530] reward=-117790612.1 actor_loss=0.2110 critic_loss=85459681661.0233 entropy=17.6728 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 151540] reward=-116735086.8 actor_loss=0.3126 critic_loss=86100530229.8947 entropy=17.6845 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 151540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462862.5 mean_steps=13.4
|
|
[Episode 151550] reward=-120031154.8 actor_loss=0.3314 critic_loss=92013625856.0000 entropy=17.6708 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 151560] reward=-116467524.6 actor_loss=0.2579 critic_loss=90244946147.5556 entropy=17.6698 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 151560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642975.2 mean_steps=12.7
|
|
[Episode 151570] reward=-121645001.1 actor_loss=0.2834 critic_loss=88726156839.3846 entropy=17.6632 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 151580] reward=-122217975.9 actor_loss=0.3040 critic_loss=92683579533.2414 entropy=17.6656 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 151580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527716.2 mean_steps=13.1
|
|
[Episode 151590] reward=-117243658.1 actor_loss=0.3657 critic_loss=92071235128.8889 entropy=17.6682 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 151600] reward=-126196082.7 actor_loss=0.2453 critic_loss=99179892462.9333 entropy=17.6594 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 151600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449607.3 mean_steps=15.6
|
|
[Episode 151610] reward=-124043271.7 actor_loss=0.2548 critic_loss=92426790970.5143 entropy=17.6600 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 151620] reward=-121747112.6 actor_loss=0.3322 critic_loss=94474057318.4000 entropy=17.6499 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 151620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-688330.5 mean_steps=11.4
|
|
[Episode 151630] reward=-121070488.0 actor_loss=0.3286 critic_loss=91149316291.0476 entropy=17.6745 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 151640] reward=-121346288.3 actor_loss=0.3122 critic_loss=93443351932.3428 entropy=17.6887 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 151640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488589.3 mean_steps=14.6
|
|
[Episode 151650] reward=-118196630.3 actor_loss=0.2922 critic_loss=91493145167.6444 entropy=17.6876 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 151660] reward=-119637508.3 actor_loss=0.3493 critic_loss=85950555806.8965 entropy=17.6801 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 151660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476104.6 mean_steps=13.5
|
|
[Episode 151670] reward=-122316382.9 actor_loss=0.3887 critic_loss=99588365680.6400 entropy=17.6830 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 151680] reward=-120298135.7 actor_loss=0.2595 critic_loss=94947851673.6000 entropy=17.6796 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 151680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515738.3 mean_steps=14.1
|
|
[Episode 151690] reward=-120549184.7 actor_loss=0.2745 critic_loss=90584757341.0909 entropy=17.6840 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 151700] reward=-121996407.9 actor_loss=0.2326 critic_loss=93327950180.1739 entropy=17.6642 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 151700] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-701219.1 mean_steps=11.5
|
|
[Episode 151710] reward=-127375070.4 actor_loss=0.2085 critic_loss=98892531109.6471 entropy=17.6644 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 151720] reward=-127378956.7 actor_loss=0.1488 critic_loss=102550664260.2667 entropy=17.6622 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 151720] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-758129.9 mean_steps=12.0
|
|
[Episode 151730] reward=-116153757.8 actor_loss=0.3000 critic_loss=85981893107.5122 entropy=17.6608 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 151740] reward=-122613020.6 actor_loss=0.2302 critic_loss=90672709383.7576 entropy=17.6523 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 151740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480631.6 mean_steps=13.6
|
|
[Episode 151750] reward=-120930205.5 actor_loss=0.2826 critic_loss=92858991256.2162 entropy=17.6469 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 151760] reward=-119332197.6 actor_loss=0.3671 critic_loss=89379608696.4706 entropy=17.6350 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 151760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538427.8 mean_steps=13.1
|
|
[Episode 151770] reward=-120908186.3 actor_loss=0.2582 critic_loss=97153751883.2941 entropy=17.6455 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 151780] reward=-116868643.2 actor_loss=0.2942 critic_loss=88653361607.1111 entropy=17.6370 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 151780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531527.4 mean_steps=13.7
|
|
[Episode 151790] reward=-113003199.2 actor_loss=0.3254 critic_loss=88179846192.7619 entropy=17.6341 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 151800] reward=-124057667.4 actor_loss=0.2480 critic_loss=99416510008.8889 entropy=17.6329 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 151800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550776.9 mean_steps=12.3
|
|
[Episode 151810] reward=-120305402.5 actor_loss=0.2795 critic_loss=95787244475.7333 entropy=17.6360 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 151820] reward=-120511440.7 actor_loss=0.3400 critic_loss=88803778842.4828 entropy=17.6366 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 151820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-429273.5 mean_steps=16.2
|
|
[Episode 151830] reward=-122408409.3 actor_loss=0.3603 critic_loss=97675159738.1818 entropy=17.6399 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 151840] reward=-122254230.6 actor_loss=0.3182 critic_loss=96482228633.6000 entropy=17.6383 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 151840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-577387.6 mean_steps=11.7
|
|
[Episode 151850] reward=-116494997.1 actor_loss=0.2859 critic_loss=89900852624.6956 entropy=17.6390 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 151860] reward=-120600990.2 actor_loss=0.2466 critic_loss=88620761634.1333 entropy=17.6346 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 151860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596260.9 mean_steps=13.8
|
|
[Episode 151870] reward=-118605072.2 actor_loss=0.2923 critic_loss=89784676807.1111 entropy=17.6341 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 151880] reward=-112451353.4 actor_loss=0.3550 critic_loss=84090342688.8205 entropy=17.6338 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 151880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-422883.2 mean_steps=14.2
|
|
[Episode 151890] reward=-121913704.6 actor_loss=0.2005 critic_loss=92281589174.8571 entropy=17.6309 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 151900] reward=-116169675.6 actor_loss=0.3268 critic_loss=88728211364.9778 entropy=17.6226 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 151900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603947.0 mean_steps=12.8
|
|
[Episode 151910] reward=-121608106.5 actor_loss=0.2869 critic_loss=91270061787.4286 entropy=17.6277 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 151920] reward=-117969541.2 actor_loss=0.2709 critic_loss=87957583685.8182 entropy=17.6366 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 151920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499426.9 mean_steps=14.0
|
|
[Episode 151930] reward=-118222392.0 actor_loss=0.2382 critic_loss=90767395862.7556 entropy=17.6287 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 151940] reward=-117712003.3 actor_loss=0.3101 critic_loss=86899966516.9655 entropy=17.6082 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 151940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-527998.0 mean_steps=14.4
|
|
[Episode 151950] reward=-122680101.4 actor_loss=0.2715 critic_loss=94491434552.8889 entropy=17.6123 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 151960] reward=-113564374.4 actor_loss=0.2277 critic_loss=83825977636.5714 entropy=17.6037 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 151960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-585385.7 mean_steps=14.6
|
|
[Episode 151970] reward=-120208512.4 actor_loss=0.3435 critic_loss=93635872857.0435 entropy=17.6015 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 151980] reward=-116102989.8 actor_loss=0.2751 critic_loss=85630357913.6000 entropy=17.6031 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 151980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536624.5 mean_steps=13.2
|
|
[Episode 151990] reward=-122666417.8 actor_loss=0.2807 critic_loss=94171548876.8000 entropy=17.5872 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 152000] reward=-118981941.5 actor_loss=0.3104 critic_loss=90316752158.7200 entropy=17.5943 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 152000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561888.5 mean_steps=14.4
|
|
[Episode 152010] reward=-117751575.9 actor_loss=0.2360 critic_loss=89220188842.6667 entropy=17.5982 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 152020] reward=-122035615.4 actor_loss=0.3442 critic_loss=96236596932.9231 entropy=17.6072 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 152020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483816.1 mean_steps=15.4
|
|
[Episode 152030] reward=-118240162.7 actor_loss=0.3824 critic_loss=90414324919.7949 entropy=17.6021 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 152040] reward=-120711169.6 actor_loss=0.3118 critic_loss=91554154268.4444 entropy=17.6002 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 152040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472924.1 mean_steps=14.7
|
|
[Episode 152050] reward=-116709472.9 actor_loss=0.4291 critic_loss=92998187349.3333 entropy=17.5944 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 152060] reward=-119844064.4 actor_loss=0.2801 critic_loss=88961467904.0000 entropy=17.5981 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 152060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560825.9 mean_steps=13.2
|
|
[Episode 152070] reward=-117281958.4 actor_loss=0.3171 critic_loss=88946880512.0000 entropy=17.5956 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 152080] reward=-124255810.1 actor_loss=0.3314 critic_loss=96771386026.6667 entropy=17.5844 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 152080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470212.5 mean_steps=14.6
|
|
[Episode 152090] reward=-120007975.7 actor_loss=0.3830 critic_loss=95443754692.9231 entropy=17.5761 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 152100] reward=-120238272.5 actor_loss=0.3558 critic_loss=86755327317.3333 entropy=17.5813 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 152100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549652.1 mean_steps=13.2
|
|
[Episode 152110] reward=-118810888.7 actor_loss=0.3577 critic_loss=88678158336.0000 entropy=17.5768 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 152120] reward=-119863841.0 actor_loss=0.3005 critic_loss=90636464713.1429 entropy=17.5763 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 152120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405482.1 mean_steps=15.3
|
|
[Episode 152130] reward=-126587623.0 actor_loss=0.2695 critic_loss=96193063936.0000 entropy=17.5760 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 152140] reward=-118085910.6 actor_loss=0.2906 critic_loss=93580725067.2941 entropy=17.5895 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 152140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-575791.2 mean_steps=14.3
|
|
[Episode 152150] reward=-114137159.3 actor_loss=0.3658 critic_loss=84979054119.3846 entropy=17.5784 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 152160] reward=-116565207.2 actor_loss=0.3396 critic_loss=85636023364.2667 entropy=17.5801 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 152160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588488.3 mean_steps=13.6
|
|
[Episode 152170] reward=-118453820.1 actor_loss=0.2833 critic_loss=89954003305.4118 entropy=17.5777 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 152180] reward=-117053469.1 actor_loss=0.4026 critic_loss=88042652129.8824 entropy=17.5803 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 152180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501230.6 mean_steps=14.8
|
|
[Episode 152190] reward=-120794621.7 actor_loss=0.3453 critic_loss=94814992091.4286 entropy=17.5789 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 152200] reward=-116477070.9 actor_loss=0.3420 critic_loss=90766880643.8788 entropy=17.5850 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 152200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-638777.1 mean_steps=11.8
|
|
[Episode 152210] reward=-121261009.2 actor_loss=0.2400 critic_loss=93336611498.6667 entropy=17.5876 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 152220] reward=-122705833.7 actor_loss=0.2965 critic_loss=91962211077.6889 entropy=17.5894 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 152220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529973.5 mean_steps=14.2
|
|
[Episode 152230] reward=-120200458.0 actor_loss=0.3150 critic_loss=90224522769.6552 entropy=17.5821 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 152240] reward=-123659283.4 actor_loss=0.1945 critic_loss=93923563640.4706 entropy=17.5783 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 152240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-437397.2 mean_steps=16.6
|
|
[Episode 152250] reward=-118059135.7 actor_loss=0.3084 critic_loss=91621152085.3333 entropy=17.5820 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 152260] reward=-113856033.3 actor_loss=0.3758 critic_loss=85379946632.5333 entropy=17.5942 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 152260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-373864.8 mean_steps=16.7
|
|
[Episode 152270] reward=-120944152.1 actor_loss=0.3165 critic_loss=93298951782.4000 entropy=17.5901 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 152280] reward=-122296098.3 actor_loss=0.2396 critic_loss=94308958208.0000 entropy=17.5925 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 152280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446452.0 mean_steps=15.1
|
|
[Episode 152290] reward=-123683954.1 actor_loss=0.2975 critic_loss=98640189253.8182 entropy=17.5978 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 152300] reward=-120197848.0 actor_loss=0.3470 critic_loss=92766486072.8889 entropy=17.5882 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 152300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-707401.0 mean_steps=12.1
|
|
[Episode 152310] reward=-120434593.8 actor_loss=0.1883 critic_loss=93658322261.3333 entropy=17.5802 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 152320] reward=-118119200.3 actor_loss=0.3198 critic_loss=87242482145.8824 entropy=17.5973 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 152320] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-324318.3 mean_steps=16.2
|
|
[Episode 152330] reward=-118733048.6 actor_loss=0.2629 critic_loss=90308039546.4348 entropy=17.5975 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 152340] reward=-122451107.0 actor_loss=0.2404 critic_loss=96067433525.8947 entropy=17.5952 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 152340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-677473.5 mean_steps=12.1
|
|
[Episode 152350] reward=-119713815.4 actor_loss=0.2438 critic_loss=93003168550.7879 entropy=17.6025 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 152360] reward=-117367430.9 actor_loss=0.3380 critic_loss=92146898013.0909 entropy=17.5979 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 152360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623579.1 mean_steps=12.7
|
|
[Episode 152370] reward=-117750245.4 actor_loss=0.2863 critic_loss=89136863565.3954 entropy=17.5949 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 152380] reward=-123020743.8 actor_loss=0.3765 critic_loss=95415197696.0000 entropy=17.5959 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 152380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468463.9 mean_steps=14.2
|
|
[Episode 152390] reward=-117810959.8 actor_loss=0.2277 critic_loss=89143401472.0000 entropy=17.5916 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 152400] reward=-118497024.7 actor_loss=0.3348 critic_loss=94033181478.7879 entropy=17.5934 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 152400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-621340.7 mean_steps=12.8
|
|
[Episode 152410] reward=-117166769.9 actor_loss=0.3459 critic_loss=92008476672.0000 entropy=17.5974 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 152420] reward=-119504120.1 actor_loss=0.3177 critic_loss=94607547553.6842 entropy=17.6022 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 152420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-428249.5 mean_steps=16.2
|
|
[Episode 152430] reward=-118696546.5 actor_loss=0.2936 critic_loss=87156377479.5294 entropy=17.5993 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 152440] reward=-120821713.8 actor_loss=0.2307 critic_loss=97758628249.6000 entropy=17.6008 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 152440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490749.0 mean_steps=13.9
|
|
[Episode 152450] reward=-119465801.7 actor_loss=0.2514 critic_loss=88535265940.6452 entropy=17.6128 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 152460] reward=-119414694.6 actor_loss=0.3234 critic_loss=91288371882.6667 entropy=17.6226 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 152460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-573368.3 mean_steps=14.2
|
|
[Episode 152470] reward=-114824403.0 actor_loss=0.3054 critic_loss=86920049664.0000 entropy=17.6173 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 152480] reward=-117188506.3 actor_loss=0.3893 critic_loss=86700217466.8800 entropy=17.6162 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 152480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461849.4 mean_steps=14.2
|
|
[Episode 152490] reward=-114872415.2 actor_loss=0.4495 critic_loss=89950521262.0800 entropy=17.6267 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 152500] reward=-117053929.5 actor_loss=0.2670 critic_loss=85545067724.8000 entropy=17.6262 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 152500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546191.0 mean_steps=14.4
|
|
[Episode 152510] reward=-117060698.8 actor_loss=0.3260 critic_loss=89458675985.0667 entropy=17.6321 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 152520] reward=-114298234.9 actor_loss=0.3093 critic_loss=83036650973.8667 entropy=17.6284 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 152520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441083.7 mean_steps=15.2
|
|
[Episode 152530] reward=-117539454.4 actor_loss=0.2843 critic_loss=90506167134.3158 entropy=17.6174 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 152540] reward=-121016148.8 actor_loss=0.3351 critic_loss=92976649898.6667 entropy=17.6107 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 152540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369094.2 mean_steps=15.9
|
|
[Episode 152550] reward=-125482350.3 actor_loss=0.3276 critic_loss=99271519940.9231 entropy=17.6138 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 152560] reward=-119849076.1 actor_loss=0.2873 critic_loss=90385472755.8095 entropy=17.6004 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 152560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-639686.1 mean_steps=11.6
|
|
[Episode 152570] reward=-120035039.9 actor_loss=0.3069 critic_loss=89770608594.4889 entropy=17.5869 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 152580] reward=-121552668.2 actor_loss=0.1674 critic_loss=96951836307.9111 entropy=17.5817 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 152580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-372554.6 mean_steps=16.6
|
|
[Episode 152590] reward=-119287749.9 actor_loss=0.3156 critic_loss=90776755231.0303 entropy=17.5941 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 152600] reward=-119440498.7 actor_loss=0.2638 critic_loss=89958786844.4444 entropy=17.5784 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 152600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536134.5 mean_steps=13.9
|
|
[Episode 152610] reward=-118705622.6 actor_loss=0.3151 critic_loss=89784094378.6667 entropy=17.5681 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 152620] reward=-116422709.4 actor_loss=0.4001 critic_loss=87870998300.4444 entropy=17.5704 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 152620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554713.3 mean_steps=13.3
|
|
[Episode 152630] reward=-122359161.3 actor_loss=0.2825 critic_loss=96038096896.0000 entropy=17.5710 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 152640] reward=-117105777.4 actor_loss=0.3082 critic_loss=83649268121.6000 entropy=17.5661 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 152640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361153.4 mean_steps=16.9
|
|
[Episode 152650] reward=-120295202.5 actor_loss=0.3080 critic_loss=90852515840.0000 entropy=17.5543 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 152660] reward=-120664146.1 actor_loss=0.3642 critic_loss=92392111718.4000 entropy=17.5499 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 152660] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-347930.6 mean_steps=16.8
|
|
[Episode 152670] reward=-118987467.1 actor_loss=0.3063 critic_loss=88526247377.4545 entropy=17.5619 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 152680] reward=-119085356.2 actor_loss=0.2624 critic_loss=100448958720.0000 entropy=17.5799 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 152680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590104.3 mean_steps=13.6
|
|
[Episode 152690] reward=-110556606.2 actor_loss=0.3491 critic_loss=87167876747.6364 entropy=17.5869 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 152700] reward=-120610025.3 actor_loss=0.3967 critic_loss=130307003572.7059 entropy=17.5835 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 152700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470181.3 mean_steps=13.9
|
|
[Episode 152710] reward=-124763922.8 actor_loss=0.2252 critic_loss=93753581568.0000 entropy=17.5868 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 152720] reward=-119198264.0 actor_loss=0.2269 critic_loss=95307216896.0000 entropy=17.5952 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 152720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552868.8 mean_steps=13.7
|
|
[Episode 152730] reward=-116328047.4 actor_loss=0.3378 critic_loss=90114104481.6842 entropy=17.6010 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 152740] reward=-118435820.8 actor_loss=0.3660 critic_loss=91961530970.3529 entropy=17.5887 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 152740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468565.1 mean_steps=13.4
|
|
[Episode 152750] reward=-117772361.4 actor_loss=0.2248 critic_loss=93947266480.3556 entropy=17.5809 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 152760] reward=-115418160.9 actor_loss=0.3017 critic_loss=87844120234.6667 entropy=17.5728 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 152760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621775.6 mean_steps=11.7
|
|
[Episode 152770] reward=-119524201.3 actor_loss=0.2745 critic_loss=88011699200.0000 entropy=17.5668 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 152780] reward=-117578563.8 actor_loss=0.2320 critic_loss=89886224143.0588 entropy=17.5618 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 152780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516146.4 mean_steps=13.7
|
|
[Episode 152790] reward=-123204616.4 actor_loss=0.1949 critic_loss=96104996864.0000 entropy=17.5591 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 152800] reward=-120823174.3 actor_loss=0.3563 critic_loss=93861190951.8222 entropy=17.5539 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 152800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-589044.3 mean_steps=11.5
|
|
[Episode 152810] reward=-119372240.2 actor_loss=0.2485 critic_loss=90135155663.2381 entropy=17.5486 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 152820] reward=-113809658.4 actor_loss=0.3395 critic_loss=89198951424.0000 entropy=17.5381 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 152820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-460298.1 mean_steps=15.2
|
|
[Episode 152830] reward=-118373909.6 actor_loss=0.4518 critic_loss=89915958110.3158 entropy=17.5319 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1517 front_blocked=0
|
|
[Episode 152840] reward=-121999390.2 actor_loss=0.3734 critic_loss=92367611979.8519 entropy=17.5366 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 152840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416552.4 mean_steps=15.3
|
|
[Episode 152850] reward=-115117365.3 actor_loss=0.3370 critic_loss=85055380663.7949 entropy=17.5312 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 152860] reward=-120360848.3 actor_loss=0.2269 critic_loss=113706550508.3077 entropy=17.5368 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 152860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-535393.9 mean_steps=12.3
|
|
[Episode 152870] reward=-123117731.0 actor_loss=0.2885 critic_loss=93158125195.6364 entropy=17.5275 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 152880] reward=-119258331.4 actor_loss=0.3206 critic_loss=100527156713.7391 entropy=17.5385 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 152880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553512.8 mean_steps=13.3
|
|
[Episode 152890] reward=-116225741.3 actor_loss=0.3770 critic_loss=89402147480.2162 entropy=17.5385 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 152900] reward=-122303558.7 actor_loss=0.3120 critic_loss=91369845646.2222 entropy=17.5501 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 152900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580745.2 mean_steps=13.4
|
|
[Episode 152910] reward=-119497380.2 actor_loss=0.2373 critic_loss=96655703736.3200 entropy=17.5477 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 152920] reward=-116839871.8 actor_loss=0.3134 critic_loss=85377466632.2581 entropy=17.5512 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 152920] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-658593.4 mean_steps=10.8
|
|
[Episode 152930] reward=-117294673.8 actor_loss=0.3539 critic_loss=89567983883.1304 entropy=17.5515 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 152940] reward=-114796886.7 actor_loss=0.3812 critic_loss=83084821260.1905 entropy=17.5511 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 152940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424715.7 mean_steps=14.4
|
|
[Episode 152950] reward=-118159803.9 actor_loss=0.3344 critic_loss=93368689517.7143 entropy=17.5639 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 152960] reward=-116462366.1 actor_loss=0.5273 critic_loss=92383158902.1538 entropy=17.5696 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 152960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548010.3 mean_steps=14.1
|
|
[Episode 152970] reward=-117141255.0 actor_loss=0.3320 critic_loss=90988761205.0286 entropy=17.5765 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 152980] reward=-122041700.3 actor_loss=0.2939 critic_loss=89872382829.7143 entropy=17.5810 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 152980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-608327.8 mean_steps=12.1
|
|
[Episode 152990] reward=-117626896.4 actor_loss=0.3278 critic_loss=90728208308.1481 entropy=17.5841 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 153000] reward=-113905194.4 actor_loss=0.3469 critic_loss=83848899060.6222 entropy=17.5815 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 153000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439586.6 mean_steps=14.3
|
|
[Episode 153010] reward=-115287694.8 actor_loss=0.2427 critic_loss=84366175085.7143 entropy=17.5890 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 153020] reward=-114133496.1 actor_loss=0.2548 critic_loss=83380082338.3415 entropy=17.5952 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 153020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-508210.7 mean_steps=14.8
|
|
[Episode 153030] reward=-121593184.3 actor_loss=0.2465 critic_loss=93774800896.0000 entropy=17.5884 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 153040] reward=-119490720.7 actor_loss=0.2407 critic_loss=89563085937.7778 entropy=17.5959 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 153040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-622638.3 mean_steps=12.8
|
|
[Episode 153050] reward=-118636550.4 actor_loss=0.2838 critic_loss=91802833327.1579 entropy=17.5956 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 153060] reward=-119209860.5 actor_loss=0.2411 critic_loss=88839007436.8000 entropy=17.5963 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 153060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-631810.9 mean_steps=13.1
|
|
[Episode 153070] reward=-115621855.8 actor_loss=0.3185 critic_loss=84818613799.3846 entropy=17.5990 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 153080] reward=-117769138.2 actor_loss=0.2941 critic_loss=88001654077.7931 entropy=17.6044 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 153080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470539.4 mean_steps=13.9
|
|
[Episode 153090] reward=-120570574.9 actor_loss=0.2186 critic_loss=92088157846.5882 entropy=17.6158 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 153100] reward=-117467575.8 actor_loss=0.1789 critic_loss=86469580663.4667 entropy=17.6136 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 153100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500315.2 mean_steps=14.9
|
|
[Episode 153110] reward=-117315766.8 actor_loss=0.2523 critic_loss=85064390306.3415 entropy=17.6253 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 153120] reward=-120861238.7 actor_loss=0.3908 critic_loss=90717442226.0870 entropy=17.6150 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 153120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-639227.1 mean_steps=12.2
|
|
[Episode 153130] reward=-120049527.4 actor_loss=0.2742 critic_loss=93914099484.4444 entropy=17.6134 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 153140] reward=-120603033.9 actor_loss=0.1396 critic_loss=90648816298.6667 entropy=17.6163 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 153140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460452.4 mean_steps=14.1
|
|
[Episode 153150] reward=-118085241.8 actor_loss=0.3118 critic_loss=89737224192.0000 entropy=17.6162 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 153160] reward=-121444269.5 actor_loss=0.2863 critic_loss=97282194363.7333 entropy=17.6109 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 153160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464905.0 mean_steps=15.1
|
|
[Episode 153170] reward=-118280088.1 actor_loss=0.2230 critic_loss=88363378005.3333 entropy=17.5975 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 153180] reward=-117467537.7 actor_loss=0.3734 critic_loss=85818453619.6129 entropy=17.5923 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 153180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-525066.6 mean_steps=14.8
|
|
[Episode 153190] reward=-119064372.0 actor_loss=0.3169 critic_loss=94163062061.1765 entropy=17.5951 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 153200] reward=-120824762.6 actor_loss=0.2255 critic_loss=90458762074.8387 entropy=17.5968 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 153200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502409.7 mean_steps=14.1
|
|
[Episode 153210] reward=-118928607.6 actor_loss=0.3436 critic_loss=91104524333.5111 entropy=17.6095 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 153220] reward=-118706016.2 actor_loss=0.3417 critic_loss=93149479302.0952 entropy=17.6000 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 153220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415115.9 mean_steps=16.1
|
|
[Episode 153230] reward=-113592264.2 actor_loss=0.4299 critic_loss=84725704996.5714 entropy=17.6073 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 153240] reward=-120266294.8 actor_loss=0.3103 critic_loss=93786130059.6364 entropy=17.5853 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 153240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-607061.4 mean_steps=11.8
|
|
[Episode 153250] reward=-110904481.5 actor_loss=0.4566 critic_loss=80594422121.4118 entropy=17.5824 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 153260] reward=-117226652.5 actor_loss=0.2413 critic_loss=87330205184.0000 entropy=17.5780 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 153260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-628066.1 mean_steps=13.2
|
|
[Episode 153270] reward=-120114004.5 actor_loss=0.1428 critic_loss=84993856378.4348 entropy=17.5757 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 153280] reward=-121995810.5 actor_loss=0.3051 critic_loss=209148287534.5454 entropy=17.5718 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 153280] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-646117.1 mean_steps=10.9
|
|
[Episode 153290] reward=-113215110.2 actor_loss=0.3670 critic_loss=82329060352.0000 entropy=17.5762 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 153300] reward=-123833652.9 actor_loss=0.2676 critic_loss=120501847982.0800 entropy=17.5649 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 153300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560557.1 mean_steps=13.5
|
|
[Episode 153310] reward=-110936008.8 actor_loss=0.4139 critic_loss=83452791193.6000 entropy=17.5576 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 153320] reward=-111581063.1 actor_loss=0.2907 critic_loss=79934890480.4848 entropy=17.5766 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 153320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579330.4 mean_steps=12.9
|
|
[Episode 153330] reward=-118787980.0 actor_loss=0.4001 critic_loss=86516765309.1555 entropy=17.5757 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 153340] reward=-120012244.5 actor_loss=0.3920 critic_loss=86402731804.4444 entropy=17.5650 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 153340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454733.2 mean_steps=14.4
|
|
[Episode 153350] reward=-113137720.4 actor_loss=0.3667 critic_loss=86993638400.0000 entropy=17.5691 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 153360] reward=-111645821.0 actor_loss=0.3957 critic_loss=79864378527.2889 entropy=17.5632 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 153360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550883.8 mean_steps=13.2
|
|
[Episode 153370] reward=-113246250.8 actor_loss=0.3655 critic_loss=90559135984.9412 entropy=17.5590 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 153380] reward=-118026190.7 actor_loss=0.1637 critic_loss=84251873735.1111 entropy=17.5785 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 153380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548484.0 mean_steps=13.8
|
|
[Episode 153390] reward=-116686510.9 actor_loss=0.3185 critic_loss=86641662179.5556 entropy=17.5613 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 153400] reward=-117409933.9 actor_loss=0.2274 critic_loss=89441636260.9778 entropy=17.5468 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 153400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589422.4 mean_steps=14.0
|
|
[Episode 153410] reward=-118359303.3 actor_loss=0.3114 critic_loss=87136407179.6364 entropy=17.5556 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 153420] reward=-121061326.3 actor_loss=0.2890 critic_loss=96148865934.2222 entropy=17.5455 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 153420] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-638732.3 mean_steps=11.2
|
|
[Episode 153430] reward=-118945667.8 actor_loss=0.3291 critic_loss=88212227953.1163 entropy=17.5542 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 153440] reward=-115962564.1 actor_loss=0.3440 critic_loss=89471510482.4889 entropy=17.5444 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 153440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-631386.2 mean_steps=12.9
|
|
[Episode 153450] reward=-122559594.8 actor_loss=0.3341 critic_loss=115336843459.0476 entropy=17.5570 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 153460] reward=-117895422.2 actor_loss=0.3311 critic_loss=86299937387.1628 entropy=17.5688 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 153460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632530.9 mean_steps=11.9
|
|
[Episode 153470] reward=-121492554.1 actor_loss=0.2711 critic_loss=90554448449.6410 entropy=17.5750 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 153480] reward=-115639812.4 actor_loss=0.3141 critic_loss=85699931904.0000 entropy=17.5763 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 153480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463897.8 mean_steps=13.8
|
|
[Episode 153490] reward=-116818139.0 actor_loss=0.2415 critic_loss=83531812700.1600 entropy=17.5680 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 153500] reward=-116481700.1 actor_loss=0.3613 critic_loss=86283792624.9412 entropy=17.5719 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 153500] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-661968.3 mean_steps=10.5
|
|
[Episode 153510] reward=-115200099.2 actor_loss=0.3455 critic_loss=84846483456.0000 entropy=17.5766 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 153520] reward=-121912943.8 actor_loss=0.2489 critic_loss=94112026975.0857 entropy=17.5691 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 153520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534552.8 mean_steps=13.3
|
|
[Episode 153530] reward=-120115413.7 actor_loss=0.2412 critic_loss=87379264034.1333 entropy=17.5654 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 153540] reward=-122992426.9 actor_loss=0.2133 critic_loss=93321946409.2903 entropy=17.5500 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 153540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398434.4 mean_steps=15.2
|
|
[Episode 153550] reward=-117469323.4 actor_loss=0.3331 critic_loss=87367076741.1200 entropy=17.5410 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 153560] reward=-116153658.6 actor_loss=0.3047 critic_loss=87050336197.4857 entropy=17.5489 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 153560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557108.9 mean_steps=13.1
|
|
[Episode 153570] reward=-119190586.6 actor_loss=0.2271 critic_loss=89649845212.6897 entropy=17.5526 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 153580] reward=-116777883.5 actor_loss=0.3654 critic_loss=85071970889.1429 entropy=17.5505 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 153580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416096.5 mean_steps=15.2
|
|
[Episode 153590] reward=-120817241.6 actor_loss=0.2349 critic_loss=93759773081.6000 entropy=17.5626 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 153600] reward=-116969640.6 actor_loss=0.3616 critic_loss=86328802721.1852 entropy=17.5624 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 153600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386751.3 mean_steps=16.1
|
|
[Episode 153610] reward=-119075910.3 actor_loss=0.2933 critic_loss=91441592729.6000 entropy=17.5627 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 153620] reward=-120472122.6 actor_loss=0.3035 critic_loss=92446541899.8519 entropy=17.5703 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 153620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554564.1 mean_steps=12.4
|
|
[Episode 153630] reward=-119480327.4 actor_loss=0.2861 critic_loss=93125344417.6842 entropy=17.5719 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 153640] reward=-117282147.4 actor_loss=0.2923 critic_loss=89924948650.6667 entropy=17.5818 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 153640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586478.6 mean_steps=12.6
|
|
[Episode 153650] reward=-122029761.2 actor_loss=0.2702 critic_loss=100544653994.6667 entropy=17.5842 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 153660] reward=-111884045.8 actor_loss=0.3883 critic_loss=83466723655.6800 entropy=17.5673 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 153660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637053.0 mean_steps=12.2
|
|
[Episode 153670] reward=-118834763.3 actor_loss=0.3283 critic_loss=88212270952.2963 entropy=17.5597 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 153680] reward=-111030434.4 actor_loss=0.3893 critic_loss=87949553781.0286 entropy=17.5648 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 153680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532837.7 mean_steps=14.2
|
|
[Episode 153690] reward=-121941146.1 actor_loss=0.2733 critic_loss=127279006989.4737 entropy=17.5503 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 153700] reward=-120466318.1 actor_loss=0.3262 critic_loss=91379329458.4242 entropy=17.5586 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 153700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487684.0 mean_steps=14.1
|
|
[Episode 153710] reward=-117842307.1 actor_loss=0.2579 critic_loss=93991470876.4444 entropy=17.5722 approx_kl=0.0114 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 153720] reward=-127177560.4 actor_loss=0.3030 critic_loss=255686052942.7692 entropy=17.5761 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 153720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-598150.6 mean_steps=11.8
|
|
[Episode 153730] reward=-118545502.7 actor_loss=0.3403 critic_loss=87896306688.0000 entropy=17.5635 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 153740] reward=-113868726.4 actor_loss=0.3230 critic_loss=83569657992.5333 entropy=17.5556 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 153740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489309.5 mean_steps=14.1
|
|
[Episode 153750] reward=-124057161.2 actor_loss=0.1861 critic_loss=97804051981.8378 entropy=17.5469 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 153760] reward=-118849801.5 actor_loss=0.2335 critic_loss=84787607961.6000 entropy=17.5584 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 153760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550020.4 mean_steps=13.7
|
|
[Episode 153770] reward=-119123426.2 actor_loss=0.3365 critic_loss=87723021471.2889 entropy=17.5556 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 153780] reward=-116965477.9 actor_loss=0.2977 critic_loss=91113035093.3333 entropy=17.5537 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 153780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552757.6 mean_steps=14.3
|
|
[Episode 153790] reward=-117431974.9 actor_loss=0.3914 critic_loss=90090485896.5333 entropy=17.5482 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 153800] reward=-122887266.0 actor_loss=0.2532 critic_loss=94820856263.1111 entropy=17.5387 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 153800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-560462.3 mean_steps=14.6
|
|
[Episode 153810] reward=-115273207.1 actor_loss=0.3278 critic_loss=86760569969.7778 entropy=17.5387 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 153820] reward=-122330246.0 actor_loss=0.3161 critic_loss=93204776004.2667 entropy=17.5414 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 153820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-480366.2 mean_steps=12.9
|
|
[Episode 153830] reward=-116744617.5 actor_loss=0.1992 critic_loss=90430704298.6667 entropy=17.5474 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 153840] reward=-120600055.6 actor_loss=0.3702 critic_loss=96349376387.8788 entropy=17.5393 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 153840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-614972.9 mean_steps=14.2
|
|
[Episode 153850] reward=-120849750.6 actor_loss=0.2702 critic_loss=97320988025.2632 entropy=17.5318 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 153860] reward=-120882826.6 actor_loss=0.1625 critic_loss=90122643280.4571 entropy=17.5205 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 153860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498996.4 mean_steps=15.2
|
|
[Episode 153870] reward=-120865766.8 actor_loss=0.3321 critic_loss=88164381876.7059 entropy=17.5175 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 153880] reward=-120671287.9 actor_loss=0.2869 critic_loss=86931504128.0000 entropy=17.5278 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 153880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467571.2 mean_steps=14.2
|
|
[Episode 153890] reward=-119627802.1 actor_loss=0.2929 critic_loss=88270933196.8000 entropy=17.5185 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 153900] reward=-111544266.5 actor_loss=0.4244 critic_loss=81636260370.9630 entropy=17.5105 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 153900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-652225.3 mean_steps=11.8
|
|
[Episode 153910] reward=-120650613.5 actor_loss=0.2197 critic_loss=90877398220.8000 entropy=17.5055 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 153920] reward=-116761350.0 actor_loss=0.4236 critic_loss=97373995008.0000 entropy=17.5042 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 153920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393133.2 mean_steps=16.4
|
|
[Episode 153930] reward=-114794083.1 actor_loss=0.2406 critic_loss=88029656101.9259 entropy=17.5055 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 153940] reward=-122107134.2 actor_loss=0.3841 critic_loss=96802451156.2927 entropy=17.4997 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 153940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573431.1 mean_steps=13.7
|
|
[Episode 153950] reward=-115513894.7 actor_loss=0.4070 critic_loss=87431301874.5263 entropy=17.5132 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 153960] reward=-119815132.9 actor_loss=0.3566 critic_loss=89026750613.8537 entropy=17.5104 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 153960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-574178.9 mean_steps=14.7
|
|
[Episode 153970] reward=-115521230.0 actor_loss=0.2897 critic_loss=86198967713.1852 entropy=17.4987 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 153980] reward=-123289631.1 actor_loss=0.2903 critic_loss=94073739587.3684 entropy=17.4932 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 153980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-341115.8 mean_steps=16.9
|
|
[Episode 153990] reward=-113182618.9 actor_loss=0.2764 critic_loss=79224842685.2174 entropy=17.4914 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 154000] reward=-112495040.4 actor_loss=0.4074 critic_loss=84835224482.9091 entropy=17.4868 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 154000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560325.3 mean_steps=13.3
|
|
[Episode 154010] reward=-117444168.1 actor_loss=0.3156 critic_loss=87304464829.2174 entropy=17.4818 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 154020] reward=-116333931.0 actor_loss=0.2768 critic_loss=88230378496.0000 entropy=17.4936 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 154020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-403549.2 mean_steps=17.4
|
|
[Episode 154030] reward=-118529821.2 actor_loss=0.2569 critic_loss=88078027629.7143 entropy=17.4850 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 154040] reward=-121639841.3 actor_loss=0.3203 critic_loss=92949923572.8696 entropy=17.4784 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 154040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-468071.7 mean_steps=15.9
|
|
[Episode 154050] reward=-118733445.9 actor_loss=0.3200 critic_loss=100462721291.1304 entropy=17.4707 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 154060] reward=-117184126.9 actor_loss=0.3014 critic_loss=84212276883.9111 entropy=17.4670 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 154060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-283400.1 mean_steps=16.4
|
|
[Episode 154070] reward=-114557633.7 actor_loss=0.3324 critic_loss=84130323429.7436 entropy=17.4666 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 154080] reward=-120913301.9 actor_loss=0.2272 critic_loss=88640954880.0000 entropy=17.4829 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 154080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492153.8 mean_steps=14.3
|
|
[Episode 154090] reward=-117807923.7 actor_loss=0.3218 critic_loss=91584701128.3478 entropy=17.4799 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 154100] reward=-119308979.4 actor_loss=0.2854 critic_loss=89349372313.6000 entropy=17.4779 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 154100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392041.8 mean_steps=16.6
|
|
[Episode 154110] reward=-116894862.9 actor_loss=0.4272 critic_loss=91787348377.6000 entropy=17.4902 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 154120] reward=-119846595.4 actor_loss=0.2498 critic_loss=90273697069.1765 entropy=17.4986 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 154120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-497710.1 mean_steps=15.8
|
|
[Episode 154130] reward=-118280934.4 actor_loss=0.3201 critic_loss=90181428689.4545 entropy=17.4959 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 154140] reward=-120389780.5 actor_loss=0.1547 critic_loss=90229704770.0645 entropy=17.5006 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 154140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393980.9 mean_steps=16.6
|
|
[Episode 154150] reward=-115940239.9 actor_loss=0.1900 critic_loss=87017980928.0000 entropy=17.4950 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 154160] reward=-120084913.9 actor_loss=0.3263 critic_loss=88437269443.7647 entropy=17.4945 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 154160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569848.4 mean_steps=13.6
|
|
[Episode 154170] reward=-120176754.5 actor_loss=0.3260 critic_loss=86121720880.7619 entropy=17.5038 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 154180] reward=-117502346.8 actor_loss=0.3870 critic_loss=85198664171.5200 entropy=17.4961 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 154180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578206.9 mean_steps=12.8
|
|
[Episode 154190] reward=-116311344.8 actor_loss=0.3068 critic_loss=91245045418.6667 entropy=17.5047 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 154200] reward=-109252381.1 actor_loss=0.3723 critic_loss=79660725316.2667 entropy=17.5225 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 154200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-687925.9 mean_steps=12.4
|
|
[Episode 154210] reward=-122088082.2 actor_loss=0.2506 critic_loss=131913327957.3333 entropy=17.5207 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 154220] reward=-119496136.2 actor_loss=0.2829 critic_loss=89203397427.2000 entropy=17.5180 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 154220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435099.0 mean_steps=14.3
|
|
[Episode 154230] reward=-114710493.4 actor_loss=0.3334 critic_loss=83842790238.3158 entropy=17.5223 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 154240] reward=-114093245.0 actor_loss=0.2555 critic_loss=83409287354.1818 entropy=17.5204 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 154240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440960.6 mean_steps=14.8
|
|
[Episode 154250] reward=-122367883.6 actor_loss=0.2826 critic_loss=100624356010.6667 entropy=17.5322 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 154260] reward=-120486624.8 actor_loss=0.2505 critic_loss=89721608601.6000 entropy=17.5387 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 154260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-569858.0 mean_steps=14.4
|
|
[Episode 154270] reward=-114401764.2 actor_loss=0.4360 critic_loss=84796474327.0400 entropy=17.5391 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 154280] reward=-113684860.9 actor_loss=0.3477 critic_loss=82453869519.2381 entropy=17.5549 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 154280] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-664181.2 mean_steps=11.1
|
|
[Episode 154290] reward=-119323357.5 actor_loss=0.2758 critic_loss=94731300226.8445 entropy=17.5588 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 154300] reward=-118432618.1 actor_loss=0.3721 critic_loss=88554870647.4667 entropy=17.5683 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 154300] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-310111.0 mean_steps=16.6
|
|
[Episode 154310] reward=-114805613.4 actor_loss=0.2740 critic_loss=86238624517.6889 entropy=17.5725 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 154320] reward=-116680532.9 actor_loss=0.2589 critic_loss=86688278846.5778 entropy=17.5710 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 154320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623561.7 mean_steps=12.8
|
|
[Episode 154330] reward=-114021502.6 actor_loss=0.3157 critic_loss=79526654537.1429 entropy=17.5748 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 154340] reward=-119868111.9 actor_loss=0.4277 critic_loss=100636639713.8824 entropy=17.5807 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 154340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517685.1 mean_steps=14.2
|
|
[Episode 154350] reward=-116341352.3 actor_loss=0.3034 critic_loss=85831260569.6000 entropy=17.5818 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 154360] reward=-119278287.3 actor_loss=0.3034 critic_loss=95054628305.4545 entropy=17.5837 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 154360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448257.6 mean_steps=15.6
|
|
[Episode 154370] reward=-120187963.8 actor_loss=0.3368 critic_loss=87976020347.2593 entropy=17.5809 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 154380] reward=-115414680.1 actor_loss=0.3693 critic_loss=85391869542.4000 entropy=17.5860 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 154380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511817.3 mean_steps=12.9
|
|
[Episode 154390] reward=-119083525.9 actor_loss=0.2374 critic_loss=87910507952.3556 entropy=17.5790 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 154400] reward=-125168499.2 actor_loss=0.1782 critic_loss=96851116744.3478 entropy=17.5637 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 154400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453502.0 mean_steps=14.5
|
|
[Episode 154410] reward=-119794337.7 actor_loss=0.2235 critic_loss=86971469491.8919 entropy=17.5808 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 154420] reward=-111160575.2 actor_loss=0.3516 critic_loss=85199335529.0256 entropy=17.5814 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 154420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517564.8 mean_steps=15.2
|
|
[Episode 154430] reward=-122284494.3 actor_loss=0.2718 critic_loss=93013596762.3529 entropy=17.5702 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 154440] reward=-113738750.1 actor_loss=0.3012 critic_loss=84848550446.5455 entropy=17.5722 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 154440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448411.4 mean_steps=15.4
|
|
[Episode 154450] reward=-117489392.4 actor_loss=0.3084 critic_loss=83849871872.0000 entropy=17.5665 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 154460] reward=-116852351.7 actor_loss=0.4752 critic_loss=89854476769.8824 entropy=17.5658 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 154460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559103.4 mean_steps=12.3
|
|
[Episode 154470] reward=-117607545.4 actor_loss=0.1785 critic_loss=87813003309.5111 entropy=17.5596 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 154480] reward=-115881208.9 actor_loss=0.2800 critic_loss=88791428482.8445 entropy=17.5648 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 154480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454270.4 mean_steps=15.4
|
|
[Episode 154490] reward=-113765752.3 actor_loss=0.3670 critic_loss=83476063783.3846 entropy=17.5435 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 154500] reward=-116282555.7 actor_loss=0.3415 critic_loss=85620059787.6364 entropy=17.5171 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 154500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509565.1 mean_steps=14.3
|
|
[Episode 154510] reward=-114072869.3 actor_loss=0.3010 critic_loss=83795695170.7826 entropy=17.5068 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 154520] reward=-115411698.2 actor_loss=0.2871 critic_loss=80708865820.4444 entropy=17.5022 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 154520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506226.5 mean_steps=14.2
|
|
[Episode 154530] reward=-119558176.9 actor_loss=0.2978 critic_loss=91238988185.6000 entropy=17.5092 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 154540] reward=-113285110.1 actor_loss=0.2677 critic_loss=83857223680.0000 entropy=17.5066 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 154540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466553.9 mean_steps=14.6
|
|
[Episode 154550] reward=-115061187.6 actor_loss=0.3480 critic_loss=86959049065.4118 entropy=17.5108 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 154560] reward=-118369896.8 actor_loss=0.2809 critic_loss=88824147968.0000 entropy=17.5191 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 154560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468400.2 mean_steps=13.9
|
|
[Episode 154570] reward=-119664508.7 actor_loss=0.3020 critic_loss=91209490960.5161 entropy=17.5113 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 154580] reward=-116273045.8 actor_loss=0.2164 critic_loss=86251016838.7368 entropy=17.5011 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 154580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495608.2 mean_steps=14.4
|
|
[Episode 154590] reward=-116312762.4 actor_loss=0.3335 critic_loss=83974280098.9091 entropy=17.4991 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 154600] reward=-118719249.4 actor_loss=0.4347 critic_loss=91778881428.2105 entropy=17.5107 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 154600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490003.7 mean_steps=14.8
|
|
[Episode 154610] reward=-120703896.6 actor_loss=0.2479 critic_loss=87422709618.7586 entropy=17.5084 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 154620] reward=-117753660.0 actor_loss=0.3409 critic_loss=83329548719.1579 entropy=17.4998 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 154620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463427.7 mean_steps=15.1
|
|
[Episode 154630] reward=-117144803.5 actor_loss=0.3691 critic_loss=91231413910.5882 entropy=17.4873 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 154640] reward=-120593813.7 actor_loss=0.3318 critic_loss=91390701940.3636 entropy=17.4850 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 154640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-336297.3 mean_steps=16.5
|
|
[Episode 154650] reward=-117420826.2 actor_loss=0.3418 critic_loss=88774430720.0000 entropy=17.4830 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 154660] reward=-117434950.7 actor_loss=0.3430 critic_loss=91943826773.3333 entropy=17.4850 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 154660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546501.0 mean_steps=14.5
|
|
[Episode 154670] reward=-120911869.2 actor_loss=0.2279 critic_loss=89735117568.0000 entropy=17.5037 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 154680] reward=-120430525.7 actor_loss=0.2930 critic_loss=92459184401.0667 entropy=17.5012 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 154680] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-677846.1 mean_steps=11.5
|
|
[Episode 154690] reward=-120699595.8 actor_loss=0.1816 critic_loss=92695257088.0000 entropy=17.5066 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 154700] reward=-115395639.4 actor_loss=0.1950 critic_loss=85879693858.1333 entropy=17.5135 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 154700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594651.7 mean_steps=13.1
|
|
[Episode 154710] reward=-114107652.8 actor_loss=0.2988 critic_loss=81192108032.0000 entropy=17.5018 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 154720] reward=-118588302.3 actor_loss=0.3567 critic_loss=97786786560.0000 entropy=17.5146 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 154720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-663928.0 mean_steps=15.6
|
|
[Episode 154730] reward=-123579937.4 actor_loss=0.3607 critic_loss=437715391488.0000 entropy=17.5294 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 154740] reward=-114605552.9 actor_loss=0.3461 critic_loss=92403526490.8387 entropy=17.5331 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 154740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400283.4 mean_steps=15.2
|
|
[Episode 154750] reward=-121033520.4 actor_loss=0.2781 critic_loss=107358142919.1111 entropy=17.5342 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 154760] reward=-118624640.7 actor_loss=0.2637 critic_loss=151070915058.1622 entropy=17.5391 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 154760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-650779.0 mean_steps=13.0
|
|
[Episode 154770] reward=-123031805.9 actor_loss=0.3276 critic_loss=94957275136.0000 entropy=17.5390 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 154780] reward=-120605959.3 actor_loss=0.1716 critic_loss=153789644169.8462 entropy=17.5399 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 154780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519984.8 mean_steps=14.9
|
|
[Episode 154790] reward=-116218341.3 actor_loss=0.2149 critic_loss=150876113866.1053 entropy=17.5525 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 154800] reward=-128721871.3 actor_loss=0.2241 critic_loss=196539697834.6667 entropy=17.5532 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 154800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577572.7 mean_steps=13.7
|
|
[Episode 154810] reward=-124066675.1 actor_loss=0.2265 critic_loss=104190567112.3478 entropy=17.5617 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 154820] reward=-118939333.7 actor_loss=0.3156 critic_loss=88431223140.1739 entropy=17.5515 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 154820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-616549.9 mean_steps=11.8
|
|
[Episode 154830] reward=-117517531.5 actor_loss=0.2796 critic_loss=87491475911.1111 entropy=17.5535 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 154840] reward=-116714684.6 actor_loss=0.3033 critic_loss=97896197928.4211 entropy=17.5422 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 154840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490212.2 mean_steps=14.9
|
|
[Episode 154850] reward=-117047522.1 actor_loss=0.3185 critic_loss=82150375424.0000 entropy=17.5239 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 154860] reward=-118838306.9 actor_loss=0.2653 critic_loss=93554284468.1481 entropy=17.5271 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 154860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530041.7 mean_steps=13.3
|
|
[Episode 154870] reward=-121148647.6 actor_loss=0.2141 critic_loss=90444297557.3333 entropy=17.5279 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 154880] reward=-117415743.6 actor_loss=0.2934 critic_loss=86807180154.4348 entropy=17.5339 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 154880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520333.3 mean_steps=13.2
|
|
[Episode 154890] reward=-118326101.0 actor_loss=0.3001 critic_loss=87655484494.7692 entropy=17.5295 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 154900] reward=-118153848.5 actor_loss=0.3477 critic_loss=102043248640.0000 entropy=17.5335 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 154900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-468450.4 mean_steps=13.2
|
|
[Episode 154910] reward=-115419839.1 actor_loss=0.3067 critic_loss=87367870702.1395 entropy=17.5290 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 154920] reward=-124042428.7 actor_loss=0.3418 critic_loss=92018752256.0000 entropy=17.5222 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 154920] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-445149.7 mean_steps=17.6
|
|
[Episode 154930] reward=-116347867.3 actor_loss=0.4022 critic_loss=83700841378.9091 entropy=17.5277 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 154940] reward=-124283582.2 actor_loss=0.1834 critic_loss=100190916403.2000 entropy=17.5188 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 154940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589216.6 mean_steps=13.7
|
|
[Episode 154950] reward=-126222944.7 actor_loss=0.1795 critic_loss=276902749980.4445 entropy=17.5166 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 154960] reward=-118032925.9 actor_loss=0.3369 critic_loss=85782232632.8889 entropy=17.5138 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 154960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524324.8 mean_steps=13.2
|
|
[Episode 154970] reward=-118776508.2 actor_loss=0.3230 critic_loss=91493248301.1765 entropy=17.5217 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 154980] reward=-119359448.4 actor_loss=0.3199 critic_loss=93964299041.3913 entropy=17.5302 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 154980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-392870.4 mean_steps=17.4
|
|
[Episode 154990] reward=-117856243.7 actor_loss=0.3762 critic_loss=122112983859.2000 entropy=17.5281 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 155000] reward=-114368932.0 actor_loss=0.2779 critic_loss=85597906261.3333 entropy=17.5256 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 155000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378665.5 mean_steps=16.3
|
|
[Episode 155010] reward=-119334377.8 actor_loss=0.2968 critic_loss=90189891741.5385 entropy=17.5243 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 155020] reward=-118792397.5 actor_loss=0.1827 critic_loss=92839764992.0000 entropy=17.5102 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 155020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-328363.5 mean_steps=16.8
|
|
[Episode 155030] reward=-120352400.2 actor_loss=0.1376 critic_loss=100321025820.4444 entropy=17.5088 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 155040] reward=-116249153.4 actor_loss=0.2408 critic_loss=87979827764.9655 entropy=17.4982 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 155040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542821.6 mean_steps=13.3
|
|
[Episode 155050] reward=-119455117.2 actor_loss=0.4077 critic_loss=91029373231.4074 entropy=17.4859 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 155060] reward=-118348648.7 actor_loss=0.1618 critic_loss=89446852049.4545 entropy=17.4831 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 155060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-352700.9 mean_steps=17.1
|
|
[Episode 155070] reward=-121606596.8 actor_loss=0.1864 critic_loss=91722761216.0000 entropy=17.4801 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 155080] reward=-114117711.6 actor_loss=0.2990 critic_loss=82309647228.7179 entropy=17.4809 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 155080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490236.7 mean_steps=15.3
|
|
[Episode 155090] reward=-119444261.6 actor_loss=0.3539 critic_loss=91971562336.7111 entropy=17.4870 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 155100] reward=-117686177.8 actor_loss=0.2547 critic_loss=93658864146.9630 entropy=17.4815 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 155100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-657295.1 mean_steps=12.3
|
|
[Episode 155110] reward=-116589868.0 actor_loss=0.3036 critic_loss=89305153536.0000 entropy=17.4872 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 155120] reward=-118947220.6 actor_loss=0.3251 critic_loss=106720967065.6000 entropy=17.4867 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 155120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510472.3 mean_steps=14.1
|
|
[Episode 155130] reward=-121454599.1 actor_loss=0.2057 critic_loss=111827968455.1111 entropy=17.4791 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 155140] reward=-114025287.4 actor_loss=0.2649 critic_loss=84744504008.3478 entropy=17.4885 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 155140] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-630687.9 mean_steps=11.0
|
|
[Episode 155150] reward=-120296091.2 actor_loss=0.2570 critic_loss=111680930611.2000 entropy=17.5081 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 155160] reward=-120931093.8 actor_loss=0.2723 critic_loss=107541820620.8000 entropy=17.4976 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 155160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-378172.9 mean_steps=15.2
|
|
[Episode 155170] reward=-123525825.4 actor_loss=0.1514 critic_loss=100142957989.6471 entropy=17.4916 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 155180] reward=-122698600.9 actor_loss=0.2532 critic_loss=109083578368.0000 entropy=17.4983 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 155180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438037.2 mean_steps=14.6
|
|
[Episode 155190] reward=-117901651.6 actor_loss=0.2242 critic_loss=98777478010.4348 entropy=17.5024 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 155200] reward=-120643812.2 actor_loss=0.2987 critic_loss=95830115072.0000 entropy=17.5150 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 155200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-482040.2 mean_steps=16.3
|
|
[Episode 155210] reward=-122651121.5 actor_loss=0.3279 critic_loss=93853174988.8000 entropy=17.5116 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 155220] reward=-118703532.3 actor_loss=0.2165 critic_loss=91350450176.0000 entropy=17.5137 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 155220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478852.0 mean_steps=15.2
|
|
[Episode 155230] reward=-115485266.2 actor_loss=0.3192 critic_loss=98145575139.5556 entropy=17.5075 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 155240] reward=-120969078.9 actor_loss=0.2600 critic_loss=94779087654.7879 entropy=17.5222 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 155240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529096.4 mean_steps=13.6
|
|
[Episode 155250] reward=-118399957.4 actor_loss=0.3159 critic_loss=86408221910.3256 entropy=17.5120 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 155260] reward=-120624261.3 actor_loss=0.2185 critic_loss=92104640441.3793 entropy=17.5346 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 155260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455360.0 mean_steps=15.8
|
|
[Episode 155270] reward=-114208909.6 actor_loss=0.2747 critic_loss=81086117595.4286 entropy=17.5278 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 155280] reward=-119043147.0 actor_loss=0.3374 critic_loss=87436645899.3778 entropy=17.5229 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 155280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453960.8 mean_steps=15.7
|
|
[Episode 155290] reward=-122286640.3 actor_loss=0.3387 critic_loss=133679523784.6487 entropy=17.5283 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 155300] reward=-124956091.3 actor_loss=0.2815 critic_loss=128856385378.4615 entropy=17.5296 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 155300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424339.4 mean_steps=15.6
|
|
[Episode 155310] reward=-119990581.3 actor_loss=0.3204 critic_loss=94567549747.2000 entropy=17.5266 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 155320] reward=-118995188.8 actor_loss=0.3169 critic_loss=88237396992.0000 entropy=17.5441 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 155320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-397134.9 mean_steps=14.3
|
|
[Episode 155330] reward=-121291204.5 actor_loss=0.3240 critic_loss=100389829578.1053 entropy=17.5340 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 155340] reward=-120026102.6 actor_loss=0.2156 critic_loss=88889124181.3333 entropy=17.5287 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 155340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471757.4 mean_steps=13.7
|
|
[Episode 155350] reward=-121124259.6 actor_loss=0.2891 critic_loss=89568089492.8372 entropy=17.5326 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 155360] reward=-118059620.2 actor_loss=0.4059 critic_loss=88772008770.3704 entropy=17.5363 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 155360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497963.5 mean_steps=14.0
|
|
[Episode 155370] reward=-113420067.7 actor_loss=0.3331 critic_loss=81454418966.7556 entropy=17.5199 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 155380] reward=-119697504.0 actor_loss=0.3089 critic_loss=87303656501.8947 entropy=17.5106 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 155380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583954.1 mean_steps=13.6
|
|
[Episode 155390] reward=-115238415.2 actor_loss=0.3595 critic_loss=82959381299.2000 entropy=17.4895 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 155400] reward=-124640485.4 actor_loss=0.2512 critic_loss=99033711138.1333 entropy=17.4694 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 155400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-579942.5 mean_steps=13.4
|
|
[Episode 155410] reward=-118098617.5 actor_loss=0.3698 critic_loss=90524310459.7333 entropy=17.4609 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 155420] reward=-110503397.7 actor_loss=0.2754 critic_loss=77661355432.5854 entropy=17.4610 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 155420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558082.0 mean_steps=13.3
|
|
[Episode 155430] reward=-121638504.5 actor_loss=0.2855 critic_loss=88508821995.5200 entropy=17.4859 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 155440] reward=-113922669.6 actor_loss=0.2700 critic_loss=84686205213.7674 entropy=17.4853 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 155440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-460892.0 mean_steps=15.8
|
|
[Episode 155450] reward=-120031100.5 actor_loss=0.2667 critic_loss=111828980297.1429 entropy=17.4895 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 155460] reward=-119653107.6 actor_loss=0.2256 critic_loss=118868775731.2000 entropy=17.5041 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 155460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449857.6 mean_steps=15.7
|
|
[Episode 155470] reward=-114411966.2 actor_loss=0.3569 critic_loss=96796271684.2667 entropy=17.5059 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 155480] reward=-116884023.4 actor_loss=0.3164 critic_loss=85184531849.8462 entropy=17.5047 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 155480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413804.2 mean_steps=15.2
|
|
[Episode 155490] reward=-120038374.5 actor_loss=0.2461 critic_loss=87061037552.4848 entropy=17.4946 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 155500] reward=-114568399.5 actor_loss=0.3671 critic_loss=92114158660.2667 entropy=17.4888 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 155500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458628.6 mean_steps=15.8
|
|
[Episode 155510] reward=-126573937.5 actor_loss=0.3205 critic_loss=209774964736.0000 entropy=17.4972 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 155520] reward=-118535065.3 actor_loss=0.2851 critic_loss=108096978579.9111 entropy=17.5099 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 155520] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-288558.7 mean_steps=18.2
|
|
[Episode 155530] reward=-116624023.4 actor_loss=0.2099 critic_loss=149936410282.6667 entropy=17.5248 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 155540] reward=-124054092.5 actor_loss=0.2711 critic_loss=103978028297.4815 entropy=17.5279 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 155540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440366.6 mean_steps=15.6
|
|
[Episode 155550] reward=-122396656.5 actor_loss=0.3579 critic_loss=133099298451.9111 entropy=17.5221 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 155560] reward=-113925157.8 actor_loss=0.3031 critic_loss=83789509427.2000 entropy=17.5256 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 155560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426008.5 mean_steps=15.2
|
|
[Episode 155570] reward=-119015395.6 actor_loss=0.3550 critic_loss=87975332683.2941 entropy=17.5443 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 155580] reward=-113530502.4 actor_loss=0.2091 critic_loss=85166256559.1579 entropy=17.5539 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 155580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-568294.9 mean_steps=12.3
|
|
[Episode 155590] reward=-119190953.7 actor_loss=0.3177 critic_loss=96004012081.9512 entropy=17.5614 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 155600] reward=-120282398.7 actor_loss=0.2889 critic_loss=114507573930.6667 entropy=17.5512 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 155600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-558580.0 mean_steps=12.2
|
|
[Episode 155610] reward=-121933194.2 actor_loss=0.2176 critic_loss=109868017891.5556 entropy=17.5501 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 155620] reward=-120829456.4 actor_loss=0.3068 critic_loss=100695515664.5161 entropy=17.5425 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 155620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-513782.0 mean_steps=15.3
|
|
[Episode 155630] reward=-116340242.5 actor_loss=0.2761 critic_loss=124111052368.8421 entropy=17.5481 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 155640] reward=-121412078.2 actor_loss=0.2843 critic_loss=102766060119.4146 entropy=17.5498 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 155640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467244.2 mean_steps=13.7
|
|
[Episode 155650] reward=-116118686.9 actor_loss=0.3364 critic_loss=89774412049.0667 entropy=17.5548 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 155660] reward=-120097159.2 actor_loss=0.3318 critic_loss=113541697792.0000 entropy=17.5680 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 155660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-469902.6 mean_steps=15.5
|
|
[Episode 155670] reward=-115640761.6 actor_loss=0.2895 critic_loss=89763718441.2903 entropy=17.5616 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 155680] reward=-117427603.9 actor_loss=0.3465 critic_loss=88432717004.8000 entropy=17.5629 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 155680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524934.2 mean_steps=14.1
|
|
[Episode 155690] reward=-120381424.9 actor_loss=0.3697 critic_loss=123025812613.5652 entropy=17.5722 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 155700] reward=-118428109.9 actor_loss=0.3331 critic_loss=91789805214.8965 entropy=17.5618 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 155700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544329.9 mean_steps=14.2
|
|
[Episode 155710] reward=-120597194.5 actor_loss=0.3534 critic_loss=95014443690.6667 entropy=17.5581 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 155720] reward=-115136164.8 actor_loss=0.3080 critic_loss=85025319713.3913 entropy=17.5692 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 155720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-469330.4 mean_steps=15.2
|
|
[Episode 155730] reward=-110709900.2 actor_loss=0.3850 critic_loss=88985679189.3333 entropy=17.5752 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 155740] reward=-115903786.5 actor_loss=0.2748 critic_loss=91135844352.0000 entropy=17.5612 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 155740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-492168.7 mean_steps=16.1
|
|
[Episode 155750] reward=-111276496.3 actor_loss=0.3011 critic_loss=81014803324.7179 entropy=17.5587 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 155760] reward=-119049475.0 actor_loss=0.2355 critic_loss=86084771840.0000 entropy=17.5491 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 155760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320640.7 mean_steps=16.6
|
|
[Episode 155770] reward=-118202145.0 actor_loss=0.3306 critic_loss=112112487862.8571 entropy=17.5388 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 155780] reward=-116288204.7 actor_loss=0.3709 critic_loss=85108420235.6364 entropy=17.5355 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 155780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537396.1 mean_steps=13.7
|
|
[Episode 155790] reward=-116575179.2 actor_loss=0.1753 critic_loss=83271821784.6154 entropy=17.5329 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 155800] reward=-117049355.6 actor_loss=0.3013 critic_loss=84970336460.8000 entropy=17.5262 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 155800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-647147.6 mean_steps=13.1
|
|
[Episode 155810] reward=-118979958.7 actor_loss=0.2635 critic_loss=90994763883.7895 entropy=17.5207 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 155820] reward=-122032363.7 actor_loss=0.3073 critic_loss=90021791698.4889 entropy=17.5249 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 155820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537193.2 mean_steps=14.1
|
|
[Episode 155830] reward=-118725871.0 actor_loss=0.2786 critic_loss=124446340983.4667 entropy=17.5203 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 155840] reward=-120407302.4 actor_loss=0.3617 critic_loss=114838861531.4286 entropy=17.5293 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 155840] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-655968.7 mean_steps=11.2
|
|
[Episode 155850] reward=-117358977.2 actor_loss=0.2913 critic_loss=83476040523.2941 entropy=17.5268 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 155860] reward=-121842674.2 actor_loss=0.2985 critic_loss=92325890184.5333 entropy=17.5265 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 155860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-576626.5 mean_steps=13.9
|
|
[Episode 155870] reward=-119081319.6 actor_loss=0.3231 critic_loss=89944707891.2000 entropy=17.5274 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 155880] reward=-117859696.0 actor_loss=0.3479 critic_loss=100038935620.2667 entropy=17.5241 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 155880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-364881.5 mean_steps=16.6
|
|
[Episode 155890] reward=-117323139.1 actor_loss=0.2924 critic_loss=99666273385.9310 entropy=17.5184 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 155900] reward=-117962939.9 actor_loss=0.2268 critic_loss=86316785664.0000 entropy=17.5341 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 155900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528911.5 mean_steps=14.2
|
|
[Episode 155910] reward=-114604480.4 actor_loss=0.3331 critic_loss=85450382242.9091 entropy=17.5400 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 155920] reward=-117049355.9 actor_loss=0.4220 critic_loss=90863516785.7778 entropy=17.5298 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 155920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366531.3 mean_steps=15.8
|
|
[Episode 155930] reward=-119465916.4 actor_loss=0.2774 critic_loss=89875673403.0769 entropy=17.5480 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 155940] reward=-128845443.4 actor_loss=0.3804 critic_loss=470521879507.4783 entropy=17.5573 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 155940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612800.2 mean_steps=11.8
|
|
[Episode 155950] reward=-121394634.4 actor_loss=0.1945 critic_loss=228450157968.6956 entropy=17.5617 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 155960] reward=-117905781.7 actor_loss=0.3730 critic_loss=95269402851.5556 entropy=17.5869 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 155960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419891.2 mean_steps=14.9
|
|
[Episode 155970] reward=-121116757.6 actor_loss=0.2290 critic_loss=92005332309.3333 entropy=17.5857 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 155980] reward=-122528762.8 actor_loss=0.2972 critic_loss=235285945457.7778 entropy=17.6001 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 155980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555168.7 mean_steps=12.2
|
|
[Episode 155990] reward=-115934725.6 actor_loss=0.3743 critic_loss=85499306194.0513 entropy=17.6085 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 156000] reward=-119582608.0 actor_loss=0.2225 critic_loss=87669082976.7111 entropy=17.6028 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 156000] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-675863.2 mean_steps=10.6
|
|
[Episode 156010] reward=-117608138.5 actor_loss=0.2669 critic_loss=82842850357.8947 entropy=17.5867 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 156020] reward=-116122735.4 actor_loss=0.3593 critic_loss=96708725274.9474 entropy=17.5889 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 156020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-624174.2 mean_steps=11.8
|
|
[Episode 156030] reward=-111338874.5 actor_loss=0.3374 critic_loss=84914617587.8095 entropy=17.5825 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 156040] reward=-120614364.4 actor_loss=0.3046 critic_loss=90604854035.6923 entropy=17.5796 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 156040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494058.2 mean_steps=13.6
|
|
[Episode 156050] reward=-120032827.0 actor_loss=0.2951 critic_loss=136202524672.0000 entropy=17.5655 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 156060] reward=-126857680.3 actor_loss=0.2511 critic_loss=96111347153.4545 entropy=17.5831 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 156060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465783.4 mean_steps=15.2
|
|
[Episode 156070] reward=-121356791.7 actor_loss=0.3210 critic_loss=97788536604.4444 entropy=17.5774 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 156080] reward=-120016774.7 actor_loss=0.3201 critic_loss=90151070378.6667 entropy=17.5603 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 156080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489955.0 mean_steps=14.6
|
|
[Episode 156090] reward=-118437255.0 actor_loss=0.2635 critic_loss=85466359034.3111 entropy=17.5653 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 156100] reward=-121332114.4 actor_loss=0.2511 critic_loss=92985903786.6667 entropy=17.5506 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 156100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-490788.9 mean_steps=15.6
|
|
[Episode 156110] reward=-119956887.3 actor_loss=0.2919 critic_loss=88308780600.8889 entropy=17.5536 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 156120] reward=-110644805.5 actor_loss=0.3194 critic_loss=82322787532.8000 entropy=17.5618 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 156120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-613591.5 mean_steps=11.9
|
|
[Episode 156130] reward=-115427011.4 actor_loss=0.2305 critic_loss=85387370496.0000 entropy=17.5738 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 156140] reward=-122535747.9 actor_loss=0.2781 critic_loss=95818101191.1111 entropy=17.5616 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 156140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576487.1 mean_steps=13.7
|
|
[Episode 156150] reward=-117637689.7 actor_loss=0.2832 critic_loss=85849308901.5172 entropy=17.5724 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 156160] reward=-122762331.0 actor_loss=0.3562 critic_loss=97268455105.4222 entropy=17.5742 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 156160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427226.4 mean_steps=14.7
|
|
[Episode 156170] reward=-118005785.2 actor_loss=0.2894 critic_loss=84998649901.5111 entropy=17.5855 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 156180] reward=-121512878.7 actor_loss=0.2542 critic_loss=91153406554.3529 entropy=17.5736 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 156180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-530225.3 mean_steps=15.4
|
|
[Episode 156190] reward=-119922995.4 actor_loss=0.3320 critic_loss=88067474391.0400 entropy=17.5823 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 156200] reward=-119089652.4 actor_loss=0.4972 critic_loss=93766129745.9200 entropy=17.5777 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1569 front_blocked=0
|
|
[Eval 156200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464014.7 mean_steps=14.6
|
|
[Episode 156210] reward=-118132543.5 actor_loss=0.3743 critic_loss=95480010279.3846 entropy=17.5728 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 156220] reward=-114000906.5 actor_loss=0.2713 critic_loss=81943102681.2121 entropy=17.5809 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 156220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540527.5 mean_steps=13.3
|
|
[Episode 156230] reward=-113773068.0 actor_loss=0.4127 critic_loss=84243294890.6667 entropy=17.5801 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 156240] reward=-127472780.9 actor_loss=0.2064 critic_loss=301102971935.0303 entropy=17.5642 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 156240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-682145.0 mean_steps=12.2
|
|
[Episode 156250] reward=-114460644.2 actor_loss=0.2638 critic_loss=127089155185.7778 entropy=17.5700 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 156260] reward=-111539539.2 actor_loss=0.3052 critic_loss=87061723477.3333 entropy=17.5741 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 156260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467718.5 mean_steps=14.4
|
|
[Episode 156270] reward=-120533654.6 actor_loss=0.2978 critic_loss=90333584179.2000 entropy=17.5778 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 156280] reward=-116284769.7 actor_loss=0.2840 critic_loss=86509006392.8889 entropy=17.5696 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 156280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516233.0 mean_steps=14.1
|
|
[Episode 156290] reward=-116811025.2 actor_loss=0.3290 critic_loss=95121764937.1429 entropy=17.5537 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 156300] reward=-114467426.6 actor_loss=0.2070 critic_loss=84360211402.1053 entropy=17.5517 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 156300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580469.9 mean_steps=12.4
|
|
[Episode 156310] reward=-118057128.7 actor_loss=0.3278 critic_loss=90774098944.0000 entropy=17.5544 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 156320] reward=-114848461.5 actor_loss=0.2434 critic_loss=85179567786.6667 entropy=17.5574 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 156320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446888.8 mean_steps=14.5
|
|
[Episode 156330] reward=-119388458.5 actor_loss=0.2042 critic_loss=86635892280.8889 entropy=17.5442 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 156340] reward=-119177270.5 actor_loss=0.3597 critic_loss=112304298939.7333 entropy=17.5516 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 156340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-390841.5 mean_steps=16.1
|
|
[Episode 156350] reward=-118303533.3 actor_loss=0.3293 critic_loss=89265257221.6889 entropy=17.5571 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 156360] reward=-122989721.1 actor_loss=0.3122 critic_loss=140243194857.2444 entropy=17.5715 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 156360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-608080.9 mean_steps=12.7
|
|
[Episode 156370] reward=-117220624.1 actor_loss=0.2734 critic_loss=80101051596.8000 entropy=17.5679 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 156380] reward=-122274268.3 actor_loss=0.2922 critic_loss=94093860083.8095 entropy=17.5655 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 156380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419444.2 mean_steps=14.4
|
|
[Episode 156390] reward=-114909027.9 actor_loss=0.3872 critic_loss=106396836104.2581 entropy=17.5774 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 156400] reward=-124541134.3 actor_loss=0.3104 critic_loss=133617182310.4000 entropy=17.5769 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 156400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-530992.4 mean_steps=11.3
|
|
[Episode 156410] reward=-117871994.5 actor_loss=0.3308 critic_loss=107313746292.3636 entropy=17.5724 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 156420] reward=-123496941.3 actor_loss=0.2592 critic_loss=102324343542.5185 entropy=17.5627 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 156420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617000.5 mean_steps=11.9
|
|
[Episode 156430] reward=-126217940.5 actor_loss=0.2749 critic_loss=117599777302.2609 entropy=17.5700 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 156440] reward=-119805831.9 actor_loss=0.3123 critic_loss=88502997612.6061 entropy=17.5693 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 156440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520857.8 mean_steps=14.0
|
|
[Episode 156450] reward=-119770470.1 actor_loss=0.3587 critic_loss=92049136174.5455 entropy=17.5793 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 156460] reward=-120230475.6 actor_loss=0.3251 critic_loss=89212907975.1111 entropy=17.5799 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 156460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-688443.0 mean_steps=13.8
|
|
[Episode 156470] reward=-123002999.3 actor_loss=0.2289 critic_loss=94551407703.7714 entropy=17.5738 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 156480] reward=-120324811.3 actor_loss=0.2240 critic_loss=84339270022.0952 entropy=17.5784 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 156480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491352.2 mean_steps=12.8
|
|
[Episode 156490] reward=-115642666.1 actor_loss=0.2815 critic_loss=85694777789.2174 entropy=17.5648 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 156500] reward=-116239729.4 actor_loss=0.3933 critic_loss=83821496121.8065 entropy=17.5538 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 156500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533447.4 mean_steps=13.3
|
|
[Episode 156510] reward=-120671471.6 actor_loss=0.2456 critic_loss=88408986237.1555 entropy=17.5636 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 156520] reward=-125818427.2 actor_loss=0.2591 critic_loss=97823783976.9600 entropy=17.5483 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 156520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444142.3 mean_steps=15.5
|
|
[Episode 156530] reward=-119041719.7 actor_loss=0.3605 critic_loss=86660344490.6667 entropy=17.5333 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 156540] reward=-118217340.9 actor_loss=0.3396 critic_loss=86115496566.1538 entropy=17.5343 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 156540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503314.7 mean_steps=14.0
|
|
[Episode 156550] reward=-113679701.1 actor_loss=0.2814 critic_loss=78481654472.3478 entropy=17.5449 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 156560] reward=-108820084.9 actor_loss=0.3253 critic_loss=78997681845.6774 entropy=17.5590 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 156560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547746.0 mean_steps=14.3
|
|
[Episode 156570] reward=-119514107.5 actor_loss=0.1805 critic_loss=84720800278.2609 entropy=17.5566 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 156580] reward=-117411636.7 actor_loss=0.3380 critic_loss=85316600937.9310 entropy=17.5616 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 156580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586663.9 mean_steps=12.8
|
|
[Episode 156590] reward=-118423647.1 actor_loss=0.3215 critic_loss=88497395029.3333 entropy=17.5617 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 156600] reward=-124719323.7 actor_loss=0.3040 critic_loss=97037475111.8222 entropy=17.5681 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 156600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425020.8 mean_steps=15.2
|
|
[Episode 156610] reward=-118705826.7 actor_loss=0.3188 critic_loss=90549331480.3810 entropy=17.5700 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 156620] reward=-119799506.3 actor_loss=0.2720 critic_loss=89186660807.1111 entropy=17.5664 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 156620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526971.9 mean_steps=14.2
|
|
[Episode 156630] reward=-120779381.3 actor_loss=0.3720 critic_loss=90642357179.7333 entropy=17.5606 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 156640] reward=-121544691.7 actor_loss=0.2554 critic_loss=93019255040.0000 entropy=17.5571 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 156640] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-601970.9 mean_steps=12.0
|
|
[Episode 156650] reward=-121488038.7 actor_loss=0.3292 critic_loss=93102303477.7600 entropy=17.5636 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 156660] reward=-122067539.9 actor_loss=0.3545 critic_loss=91112111250.2857 entropy=17.5555 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 156660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521769.6 mean_steps=14.8
|
|
[Episode 156670] reward=-121642932.3 actor_loss=0.3645 critic_loss=91688322092.5217 entropy=17.5612 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 156680] reward=-122683226.4 actor_loss=0.3183 critic_loss=96774703923.2000 entropy=17.5618 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 156680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-616323.7 mean_steps=11.8
|
|
[Episode 156690] reward=-118761341.5 actor_loss=0.2777 critic_loss=87685643931.8261 entropy=17.5585 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 156700] reward=-118261551.5 actor_loss=0.3618 critic_loss=84216582516.3636 entropy=17.5539 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 156700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588129.8 mean_steps=13.5
|
|
[Episode 156710] reward=-119429544.5 actor_loss=0.2972 critic_loss=86297873507.0968 entropy=17.5599 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 156720] reward=-119355877.8 actor_loss=0.2215 critic_loss=88997301361.7778 entropy=17.5632 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 156720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-406047.6 mean_steps=14.1
|
|
[Episode 156730] reward=-121755834.3 actor_loss=0.3133 critic_loss=100524963840.0000 entropy=17.5640 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 156740] reward=-122092531.0 actor_loss=0.2936 critic_loss=91986252595.2000 entropy=17.5756 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 156740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432039.4 mean_steps=15.8
|
|
[Episode 156750] reward=-118232089.2 actor_loss=0.3665 critic_loss=88473499648.0000 entropy=17.5787 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 156760] reward=-118522904.4 actor_loss=0.2410 critic_loss=90924593367.5789 entropy=17.5872 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 156760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461333.0 mean_steps=13.9
|
|
[Episode 156770] reward=-122903403.5 actor_loss=0.2680 critic_loss=124598096244.3636 entropy=17.5920 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 156780] reward=-120486551.3 actor_loss=0.3515 critic_loss=90265566208.0000 entropy=17.5895 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 156780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438466.2 mean_steps=14.6
|
|
[Episode 156790] reward=-118701350.8 actor_loss=0.2931 critic_loss=90004489830.4000 entropy=17.5882 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 156800] reward=-117891389.9 actor_loss=0.2800 critic_loss=93712054681.6000 entropy=17.5838 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 156800] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-636239.0 mean_steps=11.0
|
|
[Episode 156810] reward=-116583694.3 actor_loss=0.3068 critic_loss=88299530532.5714 entropy=17.5810 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 156820] reward=-121525586.7 actor_loss=0.3138 critic_loss=87695205315.7647 entropy=17.5859 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 156820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-527545.8 mean_steps=12.1
|
|
[Episode 156830] reward=-111179046.5 actor_loss=0.3755 critic_loss=85149391985.7778 entropy=17.5884 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 156840] reward=-119514075.8 actor_loss=0.3927 critic_loss=101095117917.0909 entropy=17.5777 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 156840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-506592.8 mean_steps=14.8
|
|
[Episode 156850] reward=-117625764.4 actor_loss=0.2695 critic_loss=82448828746.3226 entropy=17.5718 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 156860] reward=-116262679.9 actor_loss=0.2244 critic_loss=86859201697.6842 entropy=17.5741 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 156860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-377585.7 mean_steps=16.9
|
|
[Episode 156870] reward=-121588942.7 actor_loss=0.3679 critic_loss=105462248898.5600 entropy=17.5639 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 156880] reward=-119711757.0 actor_loss=0.3055 critic_loss=91806627840.0000 entropy=17.5595 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 156880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578087.3 mean_steps=13.4
|
|
[Episode 156890] reward=-120233309.5 actor_loss=0.3141 critic_loss=90622190933.3333 entropy=17.5564 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 156900] reward=-118864870.2 actor_loss=0.2707 critic_loss=98812026017.6842 entropy=17.5444 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 156900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544998.2 mean_steps=13.3
|
|
[Episode 156910] reward=-116873010.4 actor_loss=0.3251 critic_loss=87005305319.6190 entropy=17.5497 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 156920] reward=-115675611.8 actor_loss=0.3323 critic_loss=84084670107.8261 entropy=17.5538 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 156920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516937.3 mean_steps=15.1
|
|
[Episode 156930] reward=-120422401.5 actor_loss=0.2739 critic_loss=90766339072.0000 entropy=17.5538 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 156940] reward=-120277557.9 actor_loss=0.3241 critic_loss=90982150790.7368 entropy=17.5520 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 156940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521532.8 mean_steps=13.8
|
|
[Episode 156950] reward=-116709150.4 actor_loss=0.4143 critic_loss=85313721139.2000 entropy=17.5478 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 156960] reward=-117193187.3 actor_loss=0.3622 critic_loss=92482974378.6667 entropy=17.5453 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 156960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555301.4 mean_steps=12.4
|
|
[Episode 156970] reward=-119452266.2 actor_loss=0.2057 critic_loss=88428567210.6667 entropy=17.5486 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 156980] reward=-118544304.0 actor_loss=0.3272 critic_loss=89662987575.6522 entropy=17.5444 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 156980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561276.0 mean_steps=13.6
|
|
[Episode 156990] reward=-113876570.7 actor_loss=0.3032 critic_loss=81238212061.8667 entropy=17.5460 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 157000] reward=-116657685.2 actor_loss=0.2835 critic_loss=82391471284.7059 entropy=17.5550 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 157000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445883.5 mean_steps=14.9
|
|
[Episode 157010] reward=-117415337.9 actor_loss=0.2737 critic_loss=83488723889.2308 entropy=17.5576 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 157020] reward=-119989425.8 actor_loss=0.2406 critic_loss=89255497097.8462 entropy=17.5686 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 157020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-572984.6 mean_steps=15.0
|
|
[Episode 157030] reward=-120641921.1 actor_loss=0.2537 critic_loss=86834394597.0526 entropy=17.5633 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 157040] reward=-116003004.0 actor_loss=0.2810 critic_loss=144310931083.6364 entropy=17.5645 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 157040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-470831.8 mean_steps=16.9
|
|
[Episode 157050] reward=-122190303.5 actor_loss=0.3017 critic_loss=95554591675.7333 entropy=17.5602 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 157060] reward=-119137200.2 actor_loss=0.2857 critic_loss=91438424545.8824 entropy=17.5739 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 157060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-412660.6 mean_steps=17.5
|
|
[Episode 157070] reward=-121395984.1 actor_loss=0.3547 critic_loss=101417153194.6667 entropy=17.5766 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 157080] reward=-117372907.9 actor_loss=0.2573 critic_loss=90638660608.0000 entropy=17.5779 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 157080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513212.0 mean_steps=14.3
|
|
[Episode 157090] reward=-113455334.8 actor_loss=0.4617 critic_loss=85573026816.0000 entropy=17.5633 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 157100] reward=-119813698.7 actor_loss=0.3255 critic_loss=87383873349.8182 entropy=17.5603 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 157100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571924.9 mean_steps=13.3
|
|
[Episode 157110] reward=-123712947.6 actor_loss=0.1985 critic_loss=90640343412.3636 entropy=17.5551 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 157120] reward=-123055675.9 actor_loss=0.3363 critic_loss=85046403891.2000 entropy=17.5523 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 157120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573402.7 mean_steps=12.7
|
|
[Episode 157130] reward=-119586652.7 actor_loss=0.3165 critic_loss=88802230272.0000 entropy=17.5468 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 157140] reward=-115315354.6 actor_loss=0.3593 critic_loss=82129325624.8889 entropy=17.5358 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 157140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476652.5 mean_steps=14.8
|
|
[Episode 157150] reward=-123439784.8 actor_loss=0.3498 critic_loss=96082328195.6572 entropy=17.5266 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 157160] reward=-119457577.8 actor_loss=0.1833 critic_loss=85737808106.0571 entropy=17.5252 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 157160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-424304.0 mean_steps=16.4
|
|
[Episode 157170] reward=-119060848.8 actor_loss=0.3277 critic_loss=91348965533.5385 entropy=17.5263 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 157180] reward=-121256991.9 actor_loss=0.2101 critic_loss=87074634547.2000 entropy=17.5291 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 157180] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-741229.8 mean_steps=11.5
|
|
[Episode 157190] reward=-120009460.2 actor_loss=0.2928 critic_loss=87686285154.4615 entropy=17.5317 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 157200] reward=-120070457.0 actor_loss=0.4405 critic_loss=156025847504.5926 entropy=17.5233 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 157200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-608260.8 mean_steps=12.5
|
|
[Episode 157210] reward=-117766732.2 actor_loss=0.2524 critic_loss=85174249881.6000 entropy=17.5139 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 157220] reward=-122115186.7 actor_loss=0.2284 critic_loss=90473886956.3077 entropy=17.5185 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 157220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493630.0 mean_steps=14.1
|
|
[Episode 157230] reward=-118684231.0 actor_loss=0.2534 critic_loss=82991351335.3846 entropy=17.5220 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 157240] reward=-117746499.8 actor_loss=0.2965 critic_loss=83533733497.9048 entropy=17.5215 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 157240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-529846.4 mean_steps=12.4
|
|
[Episode 157250] reward=-120931654.2 actor_loss=0.2505 critic_loss=86902691840.0000 entropy=17.5260 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 157260] reward=-118052048.4 actor_loss=0.3358 critic_loss=85454805401.6000 entropy=17.5261 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 157260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565174.2 mean_steps=13.2
|
|
[Episode 157270] reward=-117956585.9 actor_loss=0.2555 critic_loss=88835773890.5600 entropy=17.5093 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 157280] reward=-112943856.7 actor_loss=0.2350 critic_loss=80472941942.6341 entropy=17.5061 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 157280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579474.0 mean_steps=12.8
|
|
[Episode 157290] reward=-122412284.5 actor_loss=0.2430 critic_loss=91322024618.6667 entropy=17.5088 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 157300] reward=-121154299.1 actor_loss=0.3478 critic_loss=103505466572.8000 entropy=17.5061 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 157300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459806.8 mean_steps=15.4
|
|
[Episode 157310] reward=-117421355.3 actor_loss=0.3682 critic_loss=83957012666.1818 entropy=17.5043 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 157320] reward=-115492654.0 actor_loss=0.3506 critic_loss=83872563200.0000 entropy=17.4848 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 157320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432845.4 mean_steps=16.4
|
|
[Episode 157330] reward=-114324396.9 actor_loss=0.3392 critic_loss=81591624527.4483 entropy=17.5004 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 157340] reward=-118475441.8 actor_loss=0.3767 critic_loss=87275449163.2941 entropy=17.5047 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 157340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408468.3 mean_steps=15.5
|
|
[Episode 157350] reward=-123217249.7 actor_loss=0.3323 critic_loss=93659465552.4571 entropy=17.4963 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 157360] reward=-121087617.5 actor_loss=0.2115 critic_loss=90588840146.0513 entropy=17.4927 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 157360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-485809.4 mean_steps=15.9
|
|
[Episode 157370] reward=-119661232.9 actor_loss=0.2872 critic_loss=84094239402.6667 entropy=17.4916 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 157380] reward=-118196060.8 actor_loss=0.2120 critic_loss=83813835142.0952 entropy=17.5039 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 157380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-384826.4 mean_steps=15.2
|
|
[Episode 157390] reward=-119045679.5 actor_loss=0.2745 critic_loss=88757821440.0000 entropy=17.4997 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 157400] reward=-118376180.1 actor_loss=0.3480 critic_loss=86036612096.0000 entropy=17.5021 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 157400] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-417914.4 mean_steps=17.0
|
|
[Episode 157410] reward=-111936581.3 actor_loss=0.3085 critic_loss=97304659870.4762 entropy=17.5136 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 157420] reward=-118229748.6 actor_loss=0.2916 critic_loss=85080680555.7895 entropy=17.5188 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 157420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424393.5 mean_steps=15.3
|
|
[Episode 157430] reward=-116687761.0 actor_loss=0.1989 critic_loss=84333998489.6000 entropy=17.5156 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 157440] reward=-113295493.5 actor_loss=0.2446 critic_loss=86912484752.6956 entropy=17.5216 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 157440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-537048.0 mean_steps=12.3
|
|
[Episode 157450] reward=-116568258.4 actor_loss=0.3623 critic_loss=87432110080.0000 entropy=17.5274 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 157460] reward=-118770685.8 actor_loss=0.3333 critic_loss=86211976192.0000 entropy=17.5307 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 157460] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-706323.6 mean_steps=10.4
|
|
[Episode 157470] reward=-116482034.9 actor_loss=0.2704 critic_loss=87624258901.3333 entropy=17.5315 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 157480] reward=-121603060.1 actor_loss=0.3035 critic_loss=90650999661.7143 entropy=17.5386 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 157480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490006.6 mean_steps=15.1
|
|
[Episode 157490] reward=-121508164.4 actor_loss=0.2773 critic_loss=88141501662.6087 entropy=17.5428 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 157500] reward=-122997813.8 actor_loss=0.3377 critic_loss=94305779894.0444 entropy=17.5439 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 157500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551976.3 mean_steps=13.4
|
|
[Episode 157510] reward=-118970347.2 actor_loss=0.3197 critic_loss=89407807146.6667 entropy=17.5386 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 157520] reward=-123644199.1 actor_loss=0.1848 critic_loss=92456800628.3636 entropy=17.5315 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 157520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-544808.8 mean_steps=15.2
|
|
[Episode 157530] reward=-119486684.9 actor_loss=0.3381 critic_loss=84455989248.0000 entropy=17.5215 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 157540] reward=-122849297.8 actor_loss=0.2532 critic_loss=90792970922.6667 entropy=17.5058 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 157540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-351452.6 mean_steps=16.7
|
|
[Episode 157550] reward=-122794291.4 actor_loss=0.2126 critic_loss=88004059526.0952 entropy=17.5009 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 157560] reward=-116358844.3 actor_loss=0.3734 critic_loss=85898825142.8571 entropy=17.5041 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 157560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545164.1 mean_steps=14.1
|
|
[Episode 157570] reward=-124479570.2 actor_loss=0.2478 critic_loss=92200648400.5926 entropy=17.4972 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 157580] reward=-111775251.0 actor_loss=0.3615 critic_loss=83222978560.0000 entropy=17.4994 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 157580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-706776.0 mean_steps=12.5
|
|
[Episode 157590] reward=-120327159.9 actor_loss=0.2743 critic_loss=92546021323.4872 entropy=17.5008 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 157600] reward=-120934094.6 actor_loss=0.3186 critic_loss=90133331361.1852 entropy=17.5001 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 157600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-636425.4 mean_steps=13.2
|
|
[Episode 157610] reward=-118301068.2 actor_loss=0.2311 critic_loss=85951615127.7037 entropy=17.5243 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 157620] reward=-111762432.0 actor_loss=0.2630 critic_loss=88505759170.5600 entropy=17.5262 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 157620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602532.4 mean_steps=12.7
|
|
[Episode 157630] reward=-120603931.5 actor_loss=0.2011 critic_loss=86965662028.1081 entropy=17.5281 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 157640] reward=-120222182.8 actor_loss=0.2200 critic_loss=86039396352.0000 entropy=17.5254 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 157640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407075.9 mean_steps=16.4
|
|
[Episode 157650] reward=-114992726.7 actor_loss=0.2182 critic_loss=85968811739.4286 entropy=17.5349 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 157660] reward=-114069955.1 actor_loss=0.3289 critic_loss=83049695914.6667 entropy=17.5301 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 157660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432706.6 mean_steps=14.2
|
|
[Episode 157670] reward=-120534036.2 actor_loss=0.3157 critic_loss=89779831564.1905 entropy=17.5643 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 157680] reward=-120900267.0 actor_loss=0.3065 critic_loss=89729010005.3333 entropy=17.5491 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 157680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-473132.1 mean_steps=15.7
|
|
[Episode 157690] reward=-114757497.1 actor_loss=0.3051 critic_loss=80261517220.9778 entropy=17.5529 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 157700] reward=-125095234.5 actor_loss=0.2827 critic_loss=98399095648.7111 entropy=17.5377 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 157700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-468204.3 mean_steps=16.0
|
|
[Episode 157710] reward=-114358717.2 actor_loss=0.3663 critic_loss=88631895337.2903 entropy=17.5426 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 157720] reward=-118483468.9 actor_loss=0.3003 critic_loss=83420630220.8000 entropy=17.5506 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 157720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415712.3 mean_steps=15.8
|
|
[Episode 157730] reward=-120250894.8 actor_loss=0.3325 critic_loss=89534983372.8000 entropy=17.5522 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 157740] reward=-115389653.9 actor_loss=0.4088 critic_loss=81485782857.9556 entropy=17.5478 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1517 front_blocked=0
|
|
[Eval 157740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379164.0 mean_steps=15.8
|
|
[Episode 157750] reward=-114551012.2 actor_loss=0.2685 critic_loss=79957738884.4138 entropy=17.5492 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 157760] reward=-119133700.0 actor_loss=0.3170 critic_loss=87829354420.1481 entropy=17.5550 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 157760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469949.2 mean_steps=14.5
|
|
[Episode 157770] reward=-109707671.2 actor_loss=0.2035 critic_loss=74898730715.4286 entropy=17.5587 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 157780] reward=-125533099.2 actor_loss=0.1690 critic_loss=136125881384.9600 entropy=17.5485 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 157780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-852769.4 mean_steps=13.6
|
|
[Episode 157790] reward=-113879133.3 actor_loss=0.3798 critic_loss=83393189010.2857 entropy=17.5527 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 157800] reward=-116784316.6 actor_loss=0.2981 critic_loss=83878139221.3333 entropy=17.5548 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 157800] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366987.0 mean_steps=16.8
|
|
[Episode 157810] reward=-115140239.7 actor_loss=0.2576 critic_loss=79974124020.6222 entropy=17.5546 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 157820] reward=-123866765.0 actor_loss=0.2488 critic_loss=89991787315.2000 entropy=17.5553 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 157820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-384025.6 mean_steps=14.7
|
|
[Episode 157830] reward=-117721998.5 actor_loss=0.1976 critic_loss=84659711353.2632 entropy=17.5557 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 157840] reward=-114382577.8 actor_loss=0.4532 critic_loss=83220414712.2424 entropy=17.5605 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 157840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517864.8 mean_steps=14.1
|
|
[Episode 157850] reward=-120367918.4 actor_loss=0.2474 critic_loss=93208056885.8947 entropy=17.5725 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 157860] reward=-119093679.1 actor_loss=0.3391 critic_loss=88794347565.5111 entropy=17.5667 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 157860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458445.7 mean_steps=15.8
|
|
[Episode 157870] reward=-124249332.7 actor_loss=0.2663 critic_loss=94435366001.7778 entropy=17.5686 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 157880] reward=-121755847.9 actor_loss=0.2605 critic_loss=94650870760.1861 entropy=17.5541 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 157880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466920.6 mean_steps=14.2
|
|
[Episode 157890] reward=-113513675.6 actor_loss=0.3795 critic_loss=80621616059.7333 entropy=17.5645 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 157900] reward=-118164945.6 actor_loss=0.3555 critic_loss=84507559480.8889 entropy=17.5761 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 157900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507226.3 mean_steps=13.2
|
|
[Episode 157910] reward=-123490890.4 actor_loss=0.2325 critic_loss=91487839768.3810 entropy=17.5663 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 157920] reward=-121403150.2 actor_loss=0.3368 critic_loss=88908359121.4545 entropy=17.5554 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 157920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425608.9 mean_steps=14.4
|
|
[Episode 157930] reward=-121027492.3 actor_loss=0.2764 critic_loss=85959703119.6444 entropy=17.5542 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 157940] reward=-118734422.0 actor_loss=0.2413 critic_loss=89129161523.2000 entropy=17.5604 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 157940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572946.8 mean_steps=13.1
|
|
[Episode 157950] reward=-117345029.7 actor_loss=0.3328 critic_loss=86722058285.5111 entropy=17.5726 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 157960] reward=-115697572.6 actor_loss=0.2252 critic_loss=89221652297.9556 entropy=17.5623 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 157960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573611.3 mean_steps=13.7
|
|
[Episode 157970] reward=-118921939.5 actor_loss=0.2805 critic_loss=84853394090.6667 entropy=17.5636 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 157980] reward=-132222696.9 actor_loss=0.4249 critic_loss=1304046010368.0000 entropy=17.5621 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 157980] success_rate=0.350 qp_infeasible_rate=0.600 mean_return=-318490748.1 mean_steps=173.9
|
|
[Episode 157990] reward=-118760443.2 actor_loss=0.3092 critic_loss=154844770304.0000 entropy=17.5794 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 158000] reward=-123186263.5 actor_loss=0.3259 critic_loss=145576776869.1613 entropy=17.5758 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 158000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530787.3 mean_steps=13.8
|
|
[Episode 158010] reward=-116653597.4 actor_loss=0.3294 critic_loss=84334980459.3548 entropy=17.5718 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 158020] reward=-118787614.1 actor_loss=0.2369 critic_loss=92746137209.9048 entropy=17.5748 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 158020] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-321330.4 mean_steps=17.4
|
|
[Episode 158030] reward=-118464226.8 actor_loss=0.2968 critic_loss=90008404423.1111 entropy=17.5733 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 158040] reward=-113559847.3 actor_loss=0.2779 critic_loss=78933810884.9231 entropy=17.5562 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 158040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405876.7 mean_steps=15.2
|
|
[Episode 158050] reward=-116493405.3 actor_loss=0.3711 critic_loss=89461164819.6923 entropy=17.5503 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 158060] reward=-124163669.8 actor_loss=0.3048 critic_loss=213502848133.5652 entropy=17.5430 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 158060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549814.9 mean_steps=13.3
|
|
[Episode 158070] reward=-119584329.9 actor_loss=0.2528 critic_loss=87170938407.3846 entropy=17.5322 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 158080] reward=-112720045.9 actor_loss=0.3271 critic_loss=78166208420.9778 entropy=17.5316 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 158080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431550.8 mean_steps=15.2
|
|
[Episode 158090] reward=-118942259.9 actor_loss=0.3449 critic_loss=100940061627.7333 entropy=17.5164 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 158100] reward=-122002820.1 actor_loss=0.2546 critic_loss=94044548482.8445 entropy=17.5090 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 158100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471593.4 mean_steps=14.4
|
|
[Episode 158110] reward=-118401530.5 actor_loss=0.2465 critic_loss=98363729296.6956 entropy=17.5013 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 158120] reward=-119685943.9 actor_loss=0.3086 critic_loss=121422612070.4000 entropy=17.5127 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 158120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498924.0 mean_steps=14.2
|
|
[Episode 158130] reward=-120110172.9 actor_loss=0.2448 critic_loss=93490750532.2667 entropy=17.5060 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 158140] reward=-119834466.4 actor_loss=0.2499 critic_loss=243624199372.8000 entropy=17.5156 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 158140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452511.5 mean_steps=14.7
|
|
[Episode 158150] reward=-114110308.1 actor_loss=0.3835 critic_loss=90931186005.3333 entropy=17.5045 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 158160] reward=-120797612.3 actor_loss=0.2156 critic_loss=93450731520.0000 entropy=17.5219 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 158160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401226.1 mean_steps=15.3
|
|
[Episode 158170] reward=-121466604.0 actor_loss=0.3166 critic_loss=95067586742.0444 entropy=17.5197 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 158180] reward=-751992585.1 actor_loss=0.2278 critic_loss=464758538409622.5625 entropy=17.5305 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 158180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457773.1 mean_steps=14.3
|
|
[Episode 158190] reward=-117904766.9 actor_loss=0.2717 critic_loss=91578888192.0000 entropy=17.5391 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 158200] reward=-332929480.4 actor_loss=0.2299 critic_loss=66354098995200.0000 entropy=17.5507 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 158200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510431.5 mean_steps=13.3
|
|
[Episode 158210] reward=-118248703.5 actor_loss=0.2790 critic_loss=90939356137.2444 entropy=17.5408 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 158220] reward=-109693263.8 actor_loss=0.2824 critic_loss=79021841749.3333 entropy=17.5499 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 158220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423074.6 mean_steps=15.7
|
|
[Episode 158230] reward=-115856312.9 actor_loss=0.2785 critic_loss=89407089504.7111 entropy=17.5635 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 158240] reward=-136799497.1 actor_loss=0.3707 critic_loss=2365150418261.3335 entropy=17.5675 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 158240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461056.3 mean_steps=14.8
|
|
[Episode 158250] reward=-117514175.6 actor_loss=0.2335 critic_loss=92232799209.2444 entropy=17.5621 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 158260] reward=-117432990.0 actor_loss=0.2948 critic_loss=88385175552.0000 entropy=17.5812 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 158260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402634.6 mean_steps=15.4
|
|
[Episode 158270] reward=-121173486.3 actor_loss=0.3128 critic_loss=131941507072.0000 entropy=17.5767 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 158280] reward=-157866117.1 actor_loss=0.2502 critic_loss=3851186529668.4136 entropy=17.5706 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 158280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-632039.8 mean_steps=13.2
|
|
[Episode 158290] reward=-117288887.6 actor_loss=0.2735 critic_loss=84883648827.0769 entropy=17.5741 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 158300] reward=-118622974.1 actor_loss=0.2434 critic_loss=91762429678.9333 entropy=17.5719 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 158300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482498.0 mean_steps=14.9
|
|
[Episode 158310] reward=-118398322.8 actor_loss=0.3401 critic_loss=88291322538.6667 entropy=17.5692 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 158320] reward=-108318413.8 actor_loss=0.3756 critic_loss=75636458837.3333 entropy=17.5683 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 158320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554805.7 mean_steps=13.3
|
|
[Episode 158330] reward=-117730235.4 actor_loss=0.3496 critic_loss=86036386527.1795 entropy=17.5835 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 158340] reward=-114748985.5 actor_loss=0.2899 critic_loss=82438942976.0000 entropy=17.5802 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 158340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458562.5 mean_steps=14.6
|
|
[Episode 158350] reward=-118296794.3 actor_loss=0.2134 critic_loss=85674631168.0000 entropy=17.5796 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 158360] reward=-120222325.1 actor_loss=0.2149 critic_loss=93812254037.3333 entropy=17.5734 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 158360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-520604.8 mean_steps=15.2
|
|
[Episode 158370] reward=-114405561.7 actor_loss=0.2352 critic_loss=84380988052.6452 entropy=17.5696 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 158380] reward=-115679613.0 actor_loss=0.3033 critic_loss=95028182667.6364 entropy=17.5623 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 158380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446679.7 mean_steps=14.7
|
|
[Episode 158390] reward=-118953229.1 actor_loss=0.3747 critic_loss=190424679492.2667 entropy=17.5728 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 158400] reward=-118473013.9 actor_loss=0.2137 critic_loss=84367435548.4444 entropy=17.5759 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 158400] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-244983.4 mean_steps=18.2
|
|
[Episode 158410] reward=-115062568.5 actor_loss=0.3413 critic_loss=90362828390.4000 entropy=17.5845 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 158420] reward=-112731667.3 actor_loss=0.3384 critic_loss=81270425088.0000 entropy=17.5797 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 158420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544895.0 mean_steps=14.4
|
|
[Episode 158430] reward=-187758787.4 actor_loss=0.1716 critic_loss=11286790633795.3691 entropy=17.5944 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 158440] reward=-152754702.3 actor_loss=0.2671 critic_loss=4768273458062.2227 entropy=17.6006 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 158440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566667.2 mean_steps=12.5
|
|
[Episode 158450] reward=-113474186.2 actor_loss=0.3285 critic_loss=88904532332.0889 entropy=17.5938 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 158460] reward=-113226806.5 actor_loss=0.2763 critic_loss=92503452876.8000 entropy=17.5882 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 158460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-594855.2 mean_steps=13.3
|
|
[Episode 158470] reward=-115380893.1 actor_loss=0.2958 critic_loss=84685990316.6512 entropy=17.5830 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 158480] reward=-118318656.5 actor_loss=0.3238 critic_loss=92114574592.0000 entropy=17.5849 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 158480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-429837.2 mean_steps=16.4
|
|
[Episode 158490] reward=-118848409.8 actor_loss=0.2121 critic_loss=87387564519.6190 entropy=17.5856 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 158500] reward=-114069442.1 actor_loss=0.2405 critic_loss=82418327779.5556 entropy=17.5863 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 158500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464977.7 mean_steps=14.8
|
|
[Episode 158510] reward=-118928746.6 actor_loss=0.2838 critic_loss=82334509738.6667 entropy=17.5794 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 158520] reward=-129375703.9 actor_loss=0.3252 critic_loss=407640487643.4286 entropy=17.5869 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 158520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630149.7 mean_steps=12.5
|
|
[Episode 158530] reward=-116718327.0 actor_loss=0.3199 critic_loss=85551170969.6000 entropy=17.5883 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 158540] reward=-114543737.1 actor_loss=0.2999 critic_loss=87707624836.4138 entropy=17.5929 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 158540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-365934.6 mean_steps=16.9
|
|
[Episode 158550] reward=-116832723.5 actor_loss=0.2281 critic_loss=85472085060.2667 entropy=17.5870 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 158560] reward=-115448618.1 actor_loss=0.3083 critic_loss=80140553517.1765 entropy=17.5836 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 158560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-506625.7 mean_steps=14.8
|
|
[Episode 158570] reward=-117571133.8 actor_loss=0.1673 critic_loss=84598027605.3333 entropy=17.5765 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 158580] reward=-113228906.8 actor_loss=0.4858 critic_loss=82871133388.8000 entropy=17.5704 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1530 front_blocked=0
|
|
[Eval 158580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521647.0 mean_steps=13.3
|
|
[Episode 158590] reward=-115303598.6 actor_loss=0.2383 critic_loss=86749627642.3111 entropy=17.5685 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 158600] reward=-112958358.3 actor_loss=0.3273 critic_loss=84482143436.8000 entropy=17.5664 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 158600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392897.8 mean_steps=15.9
|
|
[Episode 158610] reward=-121538424.7 actor_loss=0.3028 critic_loss=90605716548.2667 entropy=17.5627 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 158620] reward=-119994663.6 actor_loss=0.2112 critic_loss=86998479667.2000 entropy=17.5593 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 158620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476506.9 mean_steps=14.8
|
|
[Episode 158630] reward=-116938161.1 actor_loss=0.2921 critic_loss=84238001111.0400 entropy=17.5642 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 158640] reward=-120675360.5 actor_loss=0.2950 critic_loss=101782812145.3714 entropy=17.5559 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 158640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423243.2 mean_steps=15.1
|
|
[Episode 158650] reward=-115940252.4 actor_loss=0.4531 critic_loss=85149388274.8718 entropy=17.5511 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 158660] reward=-118526375.0 actor_loss=0.3905 critic_loss=89774488175.3044 entropy=17.5412 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 158660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-498700.0 mean_steps=12.9
|
|
[Episode 158670] reward=-120113762.4 actor_loss=0.2509 critic_loss=89039307093.3333 entropy=17.5578 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 158680] reward=-120574735.4 actor_loss=0.3650 critic_loss=95343895620.2667 entropy=17.5694 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 158680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596475.4 mean_steps=13.7
|
|
[Episode 158690] reward=-120770855.3 actor_loss=0.3068 critic_loss=92622035750.7879 entropy=17.5699 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 158700] reward=-116722081.7 actor_loss=0.3034 critic_loss=88769557572.2667 entropy=17.5656 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 158700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-313675.3 mean_steps=17.3
|
|
[Episode 158710] reward=-116762286.9 actor_loss=0.3480 critic_loss=83965397310.5778 entropy=17.5799 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 158720] reward=-113735855.4 actor_loss=0.3649 critic_loss=90785085392.3721 entropy=17.5947 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 158720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-584006.1 mean_steps=11.9
|
|
[Episode 158730] reward=-113881985.3 actor_loss=0.3009 critic_loss=78821191797.0286 entropy=17.5693 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 158740] reward=-122084970.8 actor_loss=0.1832 critic_loss=90428861595.1515 entropy=17.5903 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 158740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-321046.5 mean_steps=16.8
|
|
[Episode 158750] reward=-114942198.3 actor_loss=0.3904 critic_loss=82950634186.4186 entropy=17.6007 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 158760] reward=-119284496.5 actor_loss=0.3431 critic_loss=88580819626.6667 entropy=17.6101 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 158760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427974.2 mean_steps=14.6
|
|
[Episode 158770] reward=-115468041.9 actor_loss=0.3901 critic_loss=85800114273.5238 entropy=17.5986 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 158780] reward=-124576062.8 actor_loss=0.3587 critic_loss=166038941696.0000 entropy=17.5867 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 158780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398354.5 mean_steps=16.1
|
|
[Episode 158790] reward=-122537960.7 actor_loss=0.2591 critic_loss=92403465511.8222 entropy=17.5891 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 158800] reward=-121814919.4 actor_loss=0.1815 critic_loss=93843823001.6000 entropy=17.5968 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 158800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468149.6 mean_steps=14.7
|
|
[Episode 158810] reward=-117855617.5 actor_loss=0.2374 critic_loss=87288062420.1143 entropy=17.5931 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 158820] reward=-121343880.2 actor_loss=0.2497 critic_loss=92224041779.2000 entropy=17.6131 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 158820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480954.4 mean_steps=14.8
|
|
[Episode 158830] reward=-120794603.2 actor_loss=0.3015 critic_loss=185541224448.0000 entropy=17.6130 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 158840] reward=-125927142.5 actor_loss=0.2969 critic_loss=409999881739.3778 entropy=17.5992 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 158840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531219.3 mean_steps=13.1
|
|
[Episode 158850] reward=-118584246.9 actor_loss=0.3030 critic_loss=87271443296.7111 entropy=17.6014 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 158860] reward=-114542179.3 actor_loss=0.3212 critic_loss=84668691979.3778 entropy=17.6046 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 158860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487947.6 mean_steps=14.7
|
|
[Episode 158870] reward=-118853621.8 actor_loss=0.3011 critic_loss=89631117038.9333 entropy=17.6139 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 158880] reward=-120680566.3 actor_loss=0.2339 critic_loss=90010220641.5238 entropy=17.6067 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 158880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541942.6 mean_steps=13.2
|
|
[Episode 158890] reward=-112995332.9 actor_loss=0.2406 critic_loss=87371393643.1628 entropy=17.5985 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 158900] reward=-116230310.8 actor_loss=0.3679 critic_loss=87797485195.6364 entropy=17.6104 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 158900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435944.6 mean_steps=14.4
|
|
[Episode 158910] reward=-120855138.3 actor_loss=0.2383 critic_loss=89311949420.6061 entropy=17.6092 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 158920] reward=-110819824.8 actor_loss=0.3070 critic_loss=89874595384.8889 entropy=17.6341 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 158920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368907.8 mean_steps=15.9
|
|
[Episode 158930] reward=-118062475.0 actor_loss=0.2705 critic_loss=90774873429.3333 entropy=17.6366 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 158940] reward=-122764973.0 actor_loss=0.2621 critic_loss=92667707181.9487 entropy=17.6346 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 158940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503835.8 mean_steps=13.9
|
|
[Episode 158950] reward=-119183577.8 actor_loss=0.2455 critic_loss=173273121480.3478 entropy=17.6262 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 158960] reward=-120979626.1 actor_loss=0.1977 critic_loss=89132483106.1333 entropy=17.6312 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 158960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490961.4 mean_steps=14.8
|
|
[Episode 158970] reward=-122555099.5 actor_loss=0.2492 critic_loss=92398975906.9091 entropy=17.6379 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 158980] reward=-116601569.4 actor_loss=0.2469 critic_loss=86771063739.7333 entropy=17.6588 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 158980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-445381.7 mean_steps=13.6
|
|
[Episode 158990] reward=-112747646.2 actor_loss=0.3268 critic_loss=82135796157.2174 entropy=17.6555 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 159000] reward=-112278073.3 actor_loss=0.3901 critic_loss=80374041804.8000 entropy=17.6550 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 159000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416003.7 mean_steps=15.0
|
|
[Episode 159010] reward=-120331642.5 actor_loss=0.2415 critic_loss=91139247854.9333 entropy=17.6476 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 159020] reward=-116735698.4 actor_loss=0.3870 critic_loss=89667114507.3778 entropy=17.6507 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 159020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417284.6 mean_steps=15.1
|
|
[Episode 159030] reward=-109033498.6 actor_loss=0.3018 critic_loss=78455997408.9697 entropy=17.6473 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 159040] reward=-119754755.3 actor_loss=0.3035 critic_loss=90583044958.3158 entropy=17.6383 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 159040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427062.5 mean_steps=15.3
|
|
[Episode 159050] reward=-114885200.8 actor_loss=0.3405 critic_loss=82071374233.6000 entropy=17.6396 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 159060] reward=-114818017.3 actor_loss=0.3457 critic_loss=85110801705.2903 entropy=17.6438 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 159060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468708.3 mean_steps=14.7
|
|
[Episode 159070] reward=-119956470.7 actor_loss=0.2838 critic_loss=87176232353.1852 entropy=17.6384 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 159080] reward=-117729445.1 actor_loss=0.3321 critic_loss=84817213667.5556 entropy=17.6627 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 159080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-628335.6 mean_steps=11.8
|
|
[Episode 159090] reward=-116492999.6 actor_loss=0.2692 critic_loss=88556835962.8800 entropy=17.6828 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 159100] reward=-118451853.4 actor_loss=0.2237 critic_loss=84330916158.5778 entropy=17.6753 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 159100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434268.5 mean_steps=14.5
|
|
[Episode 159110] reward=-113857941.2 actor_loss=0.2655 critic_loss=90467903818.3226 entropy=17.6592 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 159120] reward=-117004439.6 actor_loss=0.3252 critic_loss=87448252416.0000 entropy=17.6566 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 159120] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-284165.1 mean_steps=17.1
|
|
[Episode 159130] reward=-119603293.0 actor_loss=0.2359 critic_loss=84950325930.6667 entropy=17.6434 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 159140] reward=-110906450.0 actor_loss=0.3658 critic_loss=81889130763.1304 entropy=17.6375 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 159140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511364.7 mean_steps=12.9
|
|
[Episode 159150] reward=-120507568.5 actor_loss=0.3000 critic_loss=91537353204.6222 entropy=17.6341 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 159160] reward=-123072019.2 actor_loss=0.2921 critic_loss=88295081734.2439 entropy=17.6483 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 159160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-490643.0 mean_steps=15.6
|
|
[Episode 159170] reward=-115440043.5 actor_loss=0.3226 critic_loss=84235984167.8222 entropy=17.6554 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 159180] reward=-114369479.8 actor_loss=0.2931 critic_loss=82125331569.7778 entropy=17.6278 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 159180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-406435.0 mean_steps=16.2
|
|
[Episode 159190] reward=-120149036.1 actor_loss=0.2890 critic_loss=89438171591.1111 entropy=17.6140 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 159200] reward=-122784301.4 actor_loss=0.2313 critic_loss=91528283536.6956 entropy=17.6034 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 159200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510339.2 mean_steps=13.1
|
|
[Episode 159210] reward=-118736894.9 actor_loss=0.3199 critic_loss=83290617173.3333 entropy=17.6027 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 159220] reward=-116275903.8 actor_loss=0.3985 critic_loss=82805062997.3333 entropy=17.6049 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 159220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-393316.2 mean_steps=15.0
|
|
[Episode 159230] reward=-114312207.9 actor_loss=0.4158 critic_loss=88850308573.8667 entropy=17.6084 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 159240] reward=-113423489.4 actor_loss=0.2517 critic_loss=80456897149.1555 entropy=17.6067 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 159240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-455141.7 mean_steps=12.8
|
|
[Episode 159250] reward=-122800539.1 actor_loss=0.2570 critic_loss=91791025449.2903 entropy=17.6084 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 159260] reward=-120789379.1 actor_loss=0.2471 critic_loss=90801515906.8445 entropy=17.6045 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 159260] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-649143.4 mean_steps=11.2
|
|
[Episode 159270] reward=-122123510.8 actor_loss=0.2633 critic_loss=95588908145.7778 entropy=17.6165 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 159280] reward=-117096331.8 actor_loss=0.2551 critic_loss=90183255084.5217 entropy=17.6216 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 159280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478604.0 mean_steps=14.9
|
|
[Episode 159290] reward=-119126795.1 actor_loss=0.2643 critic_loss=85455862009.7561 entropy=17.6203 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 159300] reward=-111287990.8 actor_loss=0.3242 critic_loss=82468140529.3714 entropy=17.6189 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 159300] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-306899.2 mean_steps=17.4
|
|
[Episode 159310] reward=-115702619.4 actor_loss=0.3127 critic_loss=84723937689.6000 entropy=17.6066 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 159320] reward=-115786792.8 actor_loss=0.2014 critic_loss=82533896477.7674 entropy=17.6220 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 159320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-458164.7 mean_steps=14.0
|
|
[Episode 159330] reward=-119816678.5 actor_loss=0.2662 critic_loss=90028436950.4865 entropy=17.6158 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 159340] reward=-115536893.4 actor_loss=0.2807 critic_loss=88362846708.6222 entropy=17.6129 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 159340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556448.4 mean_steps=13.3
|
|
[Episode 159350] reward=-114356478.6 actor_loss=0.3438 critic_loss=86768036627.6923 entropy=17.6086 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 159360] reward=-120564828.6 actor_loss=0.1813 critic_loss=88402684404.6222 entropy=17.6086 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 159360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391247.5 mean_steps=15.9
|
|
[Episode 159370] reward=-121296080.8 actor_loss=0.3247 critic_loss=93406138492.1212 entropy=17.6101 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 159380] reward=-116620637.4 actor_loss=0.3554 critic_loss=89781717464.6154 entropy=17.5919 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 159380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361566.2 mean_steps=16.6
|
|
[Episode 159390] reward=-115981930.7 actor_loss=0.2843 critic_loss=88112492668.1212 entropy=17.5859 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 159400] reward=-115306640.2 actor_loss=0.3439 critic_loss=86601537763.5556 entropy=17.5882 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 159400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420249.6 mean_steps=15.5
|
|
[Episode 159410] reward=-116234082.6 actor_loss=0.2100 critic_loss=88343940002.9091 entropy=17.5902 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 159420] reward=-121659680.8 actor_loss=0.2303 critic_loss=89111662450.7586 entropy=17.5979 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 159420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444749.2 mean_steps=14.2
|
|
[Episode 159430] reward=-122464596.5 actor_loss=0.3047 critic_loss=94636056079.5152 entropy=17.5977 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 159440] reward=-116841660.7 actor_loss=0.3341 critic_loss=88001993027.3684 entropy=17.5937 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 159440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446762.6 mean_steps=15.2
|
|
[Episode 159450] reward=-116727542.8 actor_loss=0.3186 critic_loss=86250441138.4242 entropy=17.5859 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 159460] reward=-121481840.6 actor_loss=0.3908 critic_loss=126897388246.7097 entropy=17.5829 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 159460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-578124.2 mean_steps=15.2
|
|
[Episode 159470] reward=-123772588.0 actor_loss=0.2688 critic_loss=266268138259.6923 entropy=17.5809 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 159480] reward=-125282394.4 actor_loss=0.2681 critic_loss=200420779154.2857 entropy=17.5831 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 159480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503281.0 mean_steps=14.9
|
|
[Episode 159490] reward=-121592378.6 actor_loss=0.2739 critic_loss=130363155894.8571 entropy=17.5636 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 159500] reward=-122373342.2 actor_loss=0.2952 critic_loss=227004722614.8571 entropy=17.5709 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 159500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414102.8 mean_steps=15.1
|
|
[Episode 159510] reward=-119062272.0 actor_loss=0.3284 critic_loss=84858210923.1628 entropy=17.5630 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 159520] reward=-117551419.1 actor_loss=0.2975 critic_loss=88486753426.2857 entropy=17.5585 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 159520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510021.0 mean_steps=13.9
|
|
[Episode 159530] reward=-121907627.2 actor_loss=0.3203 critic_loss=179617494175.2889 entropy=17.5523 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 159540] reward=-118376048.0 actor_loss=0.3713 critic_loss=134488513103.6444 entropy=17.5578 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 159540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379428.9 mean_steps=16.1
|
|
[Episode 159550] reward=-118590962.4 actor_loss=0.3151 critic_loss=85736498269.0909 entropy=17.5608 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 159560] reward=-120484237.3 actor_loss=0.2302 critic_loss=93745450361.2632 entropy=17.5558 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 159560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531091.3 mean_steps=14.2
|
|
[Episode 159570] reward=-112810602.9 actor_loss=0.4325 critic_loss=86180923801.6000 entropy=17.5495 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 159580] reward=-120841448.3 actor_loss=0.2432 critic_loss=87862636544.0000 entropy=17.5584 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 159580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405474.5 mean_steps=15.9
|
|
[Episode 159590] reward=-115226697.5 actor_loss=0.3226 critic_loss=86018682242.8445 entropy=17.5552 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 159600] reward=-119318394.7 actor_loss=0.2407 critic_loss=84950791899.4286 entropy=17.5629 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 159600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572262.8 mean_steps=13.2
|
|
[Episode 159610] reward=-114418120.2 actor_loss=0.3127 critic_loss=111730304297.2903 entropy=17.5693 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 159620] reward=-120775385.5 actor_loss=0.3004 critic_loss=89647284906.6667 entropy=17.5691 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 159620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415460.3 mean_steps=16.3
|
|
[Episode 159630] reward=-118702377.9 actor_loss=0.2232 critic_loss=90120374651.2593 entropy=17.5574 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 159640] reward=-112698886.8 actor_loss=0.3496 critic_loss=90158310144.0000 entropy=17.5547 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 159640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-608756.4 mean_steps=13.2
|
|
[Episode 159650] reward=-126618956.1 actor_loss=0.2819 critic_loss=334616963544.6154 entropy=17.5522 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 159660] reward=-112959241.4 actor_loss=0.3357 critic_loss=82929967104.0000 entropy=17.5522 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 159660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450851.0 mean_steps=15.4
|
|
[Episode 159670] reward=-114937865.3 actor_loss=0.3519 critic_loss=85705544402.8235 entropy=17.5580 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 159680] reward=-114312286.4 actor_loss=0.3519 critic_loss=81889749594.3529 entropy=17.5452 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 159680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-696552.8 mean_steps=13.3
|
|
[Episode 159690] reward=-118561823.6 actor_loss=0.3101 critic_loss=87873711182.7692 entropy=17.5363 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 159700] reward=-118953946.4 actor_loss=0.2535 critic_loss=87963415581.2571 entropy=17.5322 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 159700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574950.0 mean_steps=13.4
|
|
[Episode 159710] reward=-115912908.7 actor_loss=0.3871 critic_loss=88548528653.1282 entropy=17.5441 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 159720] reward=-114343383.9 actor_loss=0.3120 critic_loss=84896005597.8667 entropy=17.5403 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 159720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-577658.3 mean_steps=11.4
|
|
[Episode 159730] reward=-116344496.2 actor_loss=0.3300 critic_loss=87344456704.0000 entropy=17.5364 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 159740] reward=-111234794.7 actor_loss=0.3576 critic_loss=81362875313.2308 entropy=17.5387 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 159740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551222.9 mean_steps=12.9
|
|
[Episode 159750] reward=-119504259.2 actor_loss=0.3131 critic_loss=90129854008.8889 entropy=17.5519 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 159760] reward=-117356757.5 actor_loss=0.4033 critic_loss=269185923630.5454 entropy=17.5389 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 159760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501471.0 mean_steps=15.1
|
|
[Episode 159770] reward=-121013329.5 actor_loss=0.2708 critic_loss=91251746061.4737 entropy=17.5459 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 159780] reward=-133617821.4 actor_loss=0.3177 critic_loss=1210052972544.0000 entropy=17.5482 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 159780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585438.7 mean_steps=12.6
|
|
[Episode 159790] reward=-119396677.8 actor_loss=0.3386 critic_loss=87203258075.4286 entropy=17.5307 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 159800] reward=-121049805.3 actor_loss=0.3052 critic_loss=87685320977.0667 entropy=17.5030 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 159800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-616532.0 mean_steps=12.0
|
|
[Episode 159810] reward=-115517639.0 actor_loss=0.2777 critic_loss=82958046367.2889 entropy=17.5172 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 159820] reward=-120766599.0 actor_loss=0.2949 critic_loss=90144087153.7778 entropy=17.5128 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 159820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-584562.4 mean_steps=13.3
|
|
[Episode 159830] reward=-117511218.9 actor_loss=0.2473 critic_loss=85713489009.7778 entropy=17.5160 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 159840] reward=-121423549.9 actor_loss=0.2526 critic_loss=93518192405.9429 entropy=17.5127 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 159840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431412.0 mean_steps=15.6
|
|
[Episode 159850] reward=-120891253.2 actor_loss=0.3662 critic_loss=92098748142.9333 entropy=17.5232 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 159860] reward=-118328847.0 actor_loss=0.2778 critic_loss=97610639546.1818 entropy=17.5257 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 159860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-489654.3 mean_steps=13.0
|
|
[Episode 159870] reward=-114434913.3 actor_loss=0.3205 critic_loss=83297338163.2000 entropy=17.5266 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 159880] reward=-118242972.1 actor_loss=0.2672 critic_loss=84251707271.5294 entropy=17.5277 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 159880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540441.3 mean_steps=13.2
|
|
[Episode 159890] reward=-115057017.8 actor_loss=0.2569 critic_loss=84224755388.6316 entropy=17.5183 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 159900] reward=-116599932.7 actor_loss=0.2652 critic_loss=81178210004.2927 entropy=17.5233 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 159900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445406.6 mean_steps=15.4
|
|
[Episode 159910] reward=-112828089.3 actor_loss=0.3007 critic_loss=90973037454.2222 entropy=17.5248 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 159920] reward=-114333286.7 actor_loss=0.3263 critic_loss=86361134109.2571 entropy=17.5223 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 159920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560181.4 mean_steps=13.4
|
|
[Episode 159930] reward=-114682478.4 actor_loss=0.3212 critic_loss=85936914614.0444 entropy=17.5193 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 159940] reward=-114279597.1 actor_loss=0.4095 critic_loss=80940125388.8000 entropy=17.5227 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 159940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527707.7 mean_steps=14.7
|
|
[Episode 159950] reward=-113337309.1 actor_loss=0.3445 critic_loss=81155061573.8182 entropy=17.5341 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 159960] reward=-118663143.8 actor_loss=0.4211 critic_loss=89843610322.8235 entropy=17.5262 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 159960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519836.8 mean_steps=14.8
|
|
[Episode 159970] reward=-121429328.2 actor_loss=0.3087 critic_loss=92076603392.0000 entropy=17.5287 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 159980] reward=-120538069.9 actor_loss=0.2740 critic_loss=92462009002.6667 entropy=17.5263 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 159980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463600.4 mean_steps=15.1
|
|
[Episode 159990] reward=-120741822.0 actor_loss=0.3438 critic_loss=90176659456.0000 entropy=17.5293 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 160000] reward=-121812931.4 actor_loss=0.3729 critic_loss=91603604380.9032 entropy=17.5489 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 160000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-522714.1 mean_steps=12.3
|
|
[Episode 160010] reward=-112392302.0 actor_loss=0.3886 critic_loss=85076449778.1622 entropy=17.5597 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 160020] reward=-119826761.6 actor_loss=0.2765 critic_loss=87590870129.7778 entropy=17.5652 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 160020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516422.2 mean_steps=12.9
|
|
[Episode 160030] reward=-118674543.0 actor_loss=0.2458 critic_loss=87224126512.7619 entropy=17.5697 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 160040] reward=-115845866.1 actor_loss=0.3155 critic_loss=87359336306.7586 entropy=17.5705 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 160040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554893.9 mean_steps=12.7
|
|
[Episode 160050] reward=-118345608.2 actor_loss=0.2405 critic_loss=87687238055.7241 entropy=17.5672 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 160060] reward=-160375802.5 actor_loss=0.2954 critic_loss=6815824903463.8223 entropy=17.5870 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 160060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455678.8 mean_steps=15.2
|
|
[Episode 160070] reward=-126512395.0 actor_loss=0.2447 critic_loss=272227884311.2727 entropy=17.5808 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 160080] reward=-114921322.9 actor_loss=0.2929 critic_loss=84767355615.1795 entropy=17.5845 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 160080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-373534.8 mean_steps=17.0
|
|
[Episode 160090] reward=-114642991.8 actor_loss=0.3512 critic_loss=89065107887.1579 entropy=17.5922 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 160100] reward=-114626344.8 actor_loss=0.2924 critic_loss=90821978989.7143 entropy=17.5925 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 160100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532058.8 mean_steps=14.2
|
|
[Episode 160110] reward=-165586486.9 actor_loss=1.8173 critic_loss=7794341579616.7109 entropy=17.6007 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 160120] reward=-113051308.1 actor_loss=0.3045 critic_loss=87218491259.8710 entropy=17.5913 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 160120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547568.0 mean_steps=13.5
|
|
[Episode 160130] reward=-113092299.5 actor_loss=0.2864 critic_loss=85048280860.4444 entropy=17.5869 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 160140] reward=-113512359.1 actor_loss=0.2505 critic_loss=80954640384.0000 entropy=17.5919 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 160140] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-736481.4 mean_steps=10.8
|
|
[Episode 160150] reward=-107281904.3 actor_loss=0.3507 critic_loss=78357426995.2000 entropy=17.6014 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 160160] reward=-118820430.4 actor_loss=0.3201 critic_loss=103852469816.8889 entropy=17.6173 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 160160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492404.6 mean_steps=14.7
|
|
[Episode 160170] reward=-120731464.5 actor_loss=0.2647 critic_loss=98423294088.5333 entropy=17.6215 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 160180] reward=-121773563.4 actor_loss=0.2641 critic_loss=90607798534.5641 entropy=17.6216 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 160180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-487707.3 mean_steps=15.8
|
|
[Episode 160190] reward=-117634612.4 actor_loss=0.2533 critic_loss=88495560931.5556 entropy=17.6211 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 160200] reward=-115944873.0 actor_loss=0.3089 critic_loss=88156751052.8000 entropy=17.6172 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 160200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479168.7 mean_steps=15.1
|
|
[Episode 160210] reward=-110283693.3 actor_loss=0.3454 critic_loss=83552077004.8000 entropy=17.6139 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 160220] reward=-118952684.4 actor_loss=0.2643 critic_loss=86914527505.0667 entropy=17.6065 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 160220] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-684450.3 mean_steps=11.4
|
|
[Episode 160230] reward=-116948402.4 actor_loss=0.3357 critic_loss=84124122832.5926 entropy=17.6066 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 160240] reward=-117749923.5 actor_loss=0.4411 critic_loss=87430894228.6452 entropy=17.5919 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 160240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479201.5 mean_steps=15.0
|
|
[Episode 160250] reward=-118794845.7 actor_loss=0.3802 critic_loss=171528683702.0444 entropy=17.5968 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 160260] reward=-115999639.9 actor_loss=0.2955 critic_loss=85984043918.2222 entropy=17.5849 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 160260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573128.8 mean_steps=13.4
|
|
[Episode 160270] reward=-122503454.7 actor_loss=0.1851 critic_loss=92095350422.5882 entropy=17.5800 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 160280] reward=-120740615.9 actor_loss=0.2377 critic_loss=90034593792.0000 entropy=17.5543 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 160280] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-640793.4 mean_steps=11.2
|
|
[Episode 160290] reward=-117065346.6 actor_loss=0.3198 critic_loss=83745156361.4815 entropy=17.5571 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 160300] reward=-115009966.8 actor_loss=0.3147 critic_loss=153600027696.7619 entropy=17.5527 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 160300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642332.8 mean_steps=13.0
|
|
[Episode 160310] reward=-120047277.0 actor_loss=0.3627 critic_loss=91333436302.2222 entropy=17.5644 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 160320] reward=-115198031.5 actor_loss=0.2397 critic_loss=81352303274.6667 entropy=17.5532 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 160320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391813.2 mean_steps=15.9
|
|
[Episode 160330] reward=-117165396.9 actor_loss=0.2609 critic_loss=104574100912.3556 entropy=17.5393 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 160340] reward=-116290097.5 actor_loss=0.2661 critic_loss=83824757600.7111 entropy=17.5407 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 160340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532638.6 mean_steps=14.4
|
|
[Episode 160350] reward=-121037115.3 actor_loss=0.2324 critic_loss=88523926186.6667 entropy=17.5520 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 160360] reward=-114249881.6 actor_loss=0.3434 critic_loss=86663248099.5556 entropy=17.5493 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 160360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462340.1 mean_steps=14.8
|
|
[Episode 160370] reward=-112928355.8 actor_loss=0.2892 critic_loss=83855159751.1111 entropy=17.5512 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 160380] reward=-113433995.1 actor_loss=0.3702 critic_loss=86609860500.2105 entropy=17.5406 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 160380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510110.2 mean_steps=13.9
|
|
[Episode 160390] reward=-116859327.0 actor_loss=0.3588 critic_loss=87990197725.8667 entropy=17.5314 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 160400] reward=-112133407.2 actor_loss=0.4048 critic_loss=90259092645.1613 entropy=17.5495 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 160400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546143.6 mean_steps=13.3
|
|
[Episode 160410] reward=-112926281.4 actor_loss=0.3093 critic_loss=85707307380.3636 entropy=17.5358 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 160420] reward=-139579879.3 actor_loss=0.2294 critic_loss=1583264244376.2163 entropy=17.5385 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 160420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442519.9 mean_steps=14.4
|
|
[Episode 160430] reward=-118612873.7 actor_loss=0.2865 critic_loss=84458507832.8889 entropy=17.5283 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 160440] reward=-122897253.7 actor_loss=0.2600 critic_loss=94696642059.3778 entropy=17.5247 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 160440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-530956.9 mean_steps=14.9
|
|
[Episode 160450] reward=-118924617.0 actor_loss=0.2466 critic_loss=87778693982.3158 entropy=17.5281 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 160460] reward=-114080473.0 actor_loss=0.3387 critic_loss=85831773508.6829 entropy=17.5236 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 160460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491555.4 mean_steps=14.9
|
|
[Episode 160470] reward=-124396514.1 actor_loss=0.2556 critic_loss=93925487895.2727 entropy=17.5113 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 160480] reward=-118451915.5 actor_loss=0.2987 critic_loss=92644062759.3846 entropy=17.5082 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 160480] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-721828.8 mean_steps=10.8
|
|
[Episode 160490] reward=-122708591.4 actor_loss=0.2636 critic_loss=92650159425.8286 entropy=17.5230 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 160500] reward=-118580855.1 actor_loss=0.3064 critic_loss=86279380992.0000 entropy=17.5008 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 160500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623034.6 mean_steps=12.8
|
|
[Episode 160510] reward=-113555527.8 actor_loss=0.2359 critic_loss=87513976459.6364 entropy=17.4962 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 160520] reward=-121501752.3 actor_loss=0.2692 critic_loss=87188553272.8889 entropy=17.4918 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 160520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635763.1 mean_steps=11.9
|
|
[Episode 160530] reward=-115695254.8 actor_loss=0.2407 critic_loss=85464867037.4054 entropy=17.4807 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 160540] reward=-121125008.8 actor_loss=0.2011 critic_loss=88562574586.3111 entropy=17.4565 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 160540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445688.1 mean_steps=15.3
|
|
[Episode 160550] reward=-116478229.8 actor_loss=0.3137 critic_loss=83271187617.6842 entropy=17.4586 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 160560] reward=-121722821.6 actor_loss=0.3740 critic_loss=88603394412.0889 entropy=17.4606 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 160560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567067.5 mean_steps=13.3
|
|
[Episode 160570] reward=-113703079.4 actor_loss=0.3102 critic_loss=81348575778.1333 entropy=17.4490 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 160580] reward=-113027090.6 actor_loss=0.2932 critic_loss=79693135872.0000 entropy=17.4644 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 160580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-358267.6 mean_steps=14.8
|
|
[Episode 160590] reward=-115655402.4 actor_loss=0.2235 critic_loss=87497371888.9412 entropy=17.4553 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 160600] reward=-116378236.0 actor_loss=0.3553 critic_loss=87008543047.6800 entropy=17.4477 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 160600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533812.7 mean_steps=14.3
|
|
[Episode 160610] reward=-117034959.9 actor_loss=0.2959 critic_loss=84700224261.6889 entropy=17.4430 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 160620] reward=-119994958.9 actor_loss=0.2120 critic_loss=87491500962.9091 entropy=17.4457 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 160620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506182.5 mean_steps=14.0
|
|
[Episode 160630] reward=-115826031.8 actor_loss=0.2682 critic_loss=86593643770.3111 entropy=17.4425 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 160640] reward=-120273600.1 actor_loss=0.3091 critic_loss=89802322557.1555 entropy=17.4351 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 160640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444428.1 mean_steps=15.2
|
|
[Episode 160650] reward=-119005323.3 actor_loss=0.3268 critic_loss=86243323084.8000 entropy=17.4489 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 160660] reward=-115271776.7 actor_loss=0.2707 critic_loss=83812030509.5111 entropy=17.4522 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 160660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-488252.8 mean_steps=15.7
|
|
[Episode 160670] reward=-118984803.8 actor_loss=0.3618 critic_loss=85459385093.6889 entropy=17.4728 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 160680] reward=-114913894.3 actor_loss=0.1966 critic_loss=79488216814.9333 entropy=17.4726 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 160680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-478631.0 mean_steps=15.9
|
|
[Episode 160690] reward=-114574442.1 actor_loss=0.3180 critic_loss=101574944312.8889 entropy=17.4555 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 160700] reward=-111029580.4 actor_loss=0.2322 critic_loss=78350912102.4000 entropy=17.4744 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 160700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550863.3 mean_steps=14.7
|
|
[Episode 160710] reward=-115801962.2 actor_loss=0.3687 critic_loss=84954669966.2222 entropy=17.4798 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 160720] reward=-121192563.7 actor_loss=0.3263 critic_loss=92035683802.5366 entropy=17.4994 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 160720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486649.6 mean_steps=14.1
|
|
[Episode 160730] reward=-118608452.2 actor_loss=0.2084 critic_loss=89587286198.0444 entropy=17.5094 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 160740] reward=-115139780.6 actor_loss=0.2550 critic_loss=92380372114.2857 entropy=17.5134 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 160740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-484308.3 mean_steps=12.9
|
|
[Episode 160750] reward=-117416587.0 actor_loss=0.2109 critic_loss=83145990690.1333 entropy=17.5125 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 160760] reward=-119149160.4 actor_loss=0.3447 critic_loss=85283293866.6667 entropy=17.5147 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 160760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-427506.4 mean_steps=15.9
|
|
[Episode 160770] reward=-118107540.0 actor_loss=0.3031 critic_loss=90712120979.9111 entropy=17.5212 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 160780] reward=-112582786.9 actor_loss=0.3631 critic_loss=84135283961.0811 entropy=17.5235 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 160780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396400.4 mean_steps=15.2
|
|
[Episode 160790] reward=-116796485.0 actor_loss=0.3416 critic_loss=85737473365.3333 entropy=17.5280 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 160800] reward=-113752256.9 actor_loss=0.2498 critic_loss=116088325417.2903 entropy=17.5272 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 160800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522287.2 mean_steps=14.2
|
|
[Episode 160810] reward=-116724684.7 actor_loss=0.3361 critic_loss=82298133735.2258 entropy=17.5247 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 160820] reward=-120117879.0 actor_loss=0.2772 critic_loss=88072179370.6667 entropy=17.5211 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 160820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622121.0 mean_steps=11.8
|
|
[Episode 160830] reward=-118457924.5 actor_loss=0.2891 critic_loss=92082735786.6667 entropy=17.5199 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 160840] reward=-121601647.5 actor_loss=0.2737 critic_loss=89547082410.6667 entropy=17.5212 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 160840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-514994.6 mean_steps=12.3
|
|
[Episode 160850] reward=-116855288.5 actor_loss=0.3046 critic_loss=87455119252.2105 entropy=17.5305 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 160860] reward=-120126880.2 actor_loss=0.2900 critic_loss=92324569998.2222 entropy=17.5263 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 160860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457502.7 mean_steps=14.4
|
|
[Episode 160870] reward=-117579180.9 actor_loss=0.3998 critic_loss=90558303436.8000 entropy=17.5220 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 160880] reward=-115753269.1 actor_loss=0.2520 critic_loss=84265012662.8571 entropy=17.5370 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 160880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545168.8 mean_steps=13.1
|
|
[Episode 160890] reward=-111890930.6 actor_loss=0.3759 critic_loss=80387733258.2400 entropy=17.5252 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 160900] reward=-115850558.7 actor_loss=0.3247 critic_loss=84448490334.3158 entropy=17.5214 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 160900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556742.4 mean_steps=13.3
|
|
[Episode 160910] reward=-110964620.2 actor_loss=0.3728 critic_loss=78661562276.9778 entropy=17.5235 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 160920] reward=-119785815.7 actor_loss=0.2072 critic_loss=92635337781.8947 entropy=17.5145 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 160920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603431.8 mean_steps=12.0
|
|
[Episode 160930] reward=-116577620.6 actor_loss=0.3078 critic_loss=86434831431.4419 entropy=17.5089 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 160940] reward=-120244162.3 actor_loss=0.2366 critic_loss=88276432516.7407 entropy=17.5077 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 160940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559635.6 mean_steps=12.9
|
|
[Episode 160950] reward=-119200824.8 actor_loss=0.2451 critic_loss=87396589568.0000 entropy=17.4941 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 160960] reward=-120502410.0 actor_loss=0.3122 critic_loss=89007968164.9778 entropy=17.5009 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 160960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461426.1 mean_steps=14.6
|
|
[Episode 160970] reward=-122925194.0 actor_loss=0.3187 critic_loss=90053626720.7111 entropy=17.4950 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 160980] reward=-115556636.4 actor_loss=0.3389 critic_loss=86953466993.7778 entropy=17.4816 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 160980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-427012.7 mean_steps=13.4
|
|
[Episode 160990] reward=-122221559.8 actor_loss=0.2831 critic_loss=91617960300.0889 entropy=17.4766 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 161000] reward=-110100613.0 actor_loss=0.3425 critic_loss=78080258503.1111 entropy=17.4755 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 161000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510049.9 mean_steps=15.0
|
|
[Episode 161010] reward=-117194561.8 actor_loss=0.3952 critic_loss=86661891140.2667 entropy=17.4837 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 161020] reward=-114517555.9 actor_loss=0.3478 critic_loss=79743614611.9111 entropy=17.4691 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 161020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476157.0 mean_steps=14.8
|
|
[Episode 161030] reward=-122453012.1 actor_loss=0.1788 critic_loss=86707766886.4000 entropy=17.4785 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 161040] reward=-116910861.1 actor_loss=0.2378 critic_loss=81921799361.7297 entropy=17.4807 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 161040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559111.0 mean_steps=13.3
|
|
[Episode 161050] reward=-121369731.2 actor_loss=0.2189 critic_loss=86257482286.5455 entropy=17.4587 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 161060] reward=-118917274.4 actor_loss=0.3006 critic_loss=84119156843.7895 entropy=17.4510 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 161060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-313707.5 mean_steps=16.2
|
|
[Episode 161070] reward=-117644734.3 actor_loss=0.3347 critic_loss=82721783990.0444 entropy=17.4543 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 161080] reward=-116680638.9 actor_loss=0.2574 critic_loss=84385153388.0889 entropy=17.4623 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 161080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582846.5 mean_steps=12.3
|
|
[Episode 161090] reward=-117035782.0 actor_loss=0.3368 critic_loss=83132828876.8000 entropy=17.4512 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 161100] reward=-108209383.1 actor_loss=0.4379 critic_loss=72584575203.5556 entropy=17.4534 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 161100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430729.1 mean_steps=15.1
|
|
[Episode 161110] reward=-119161185.8 actor_loss=0.3362 critic_loss=90084012487.1111 entropy=17.4557 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 161120] reward=-121348759.0 actor_loss=0.3402 critic_loss=86096473491.3939 entropy=17.4562 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 161120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417952.4 mean_steps=15.9
|
|
[Episode 161130] reward=-122508049.0 actor_loss=0.3024 critic_loss=91426716283.5862 entropy=17.4526 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 161140] reward=-120914376.5 actor_loss=0.2482 critic_loss=88183057846.8571 entropy=17.4594 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 161140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571553.9 mean_steps=13.1
|
|
[Episode 161150] reward=-119179207.4 actor_loss=0.2581 critic_loss=80875429888.0000 entropy=17.4544 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 161160] reward=-120244332.3 actor_loss=0.3220 critic_loss=86922519158.1538 entropy=17.4567 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 161160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-565078.5 mean_steps=12.8
|
|
[Episode 161170] reward=-117631718.8 actor_loss=0.2989 critic_loss=82603602375.1111 entropy=17.4598 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 161180] reward=-121576078.3 actor_loss=0.2860 critic_loss=85795173639.3143 entropy=17.4687 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 161180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-344994.0 mean_steps=16.5
|
|
[Episode 161190] reward=-118613179.2 actor_loss=0.2061 critic_loss=83553756581.6471 entropy=17.4696 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 161200] reward=-111148044.7 actor_loss=0.3282 critic_loss=76586099234.1333 entropy=17.4487 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 161200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-401116.1 mean_steps=14.1
|
|
[Episode 161210] reward=-114506541.9 actor_loss=0.2323 critic_loss=90690471936.0000 entropy=17.4475 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 161220] reward=-114562693.9 actor_loss=0.2851 critic_loss=81389557186.5600 entropy=17.4427 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 161220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498483.1 mean_steps=14.8
|
|
[Episode 161230] reward=-119971305.6 actor_loss=0.3004 critic_loss=89701476472.4706 entropy=17.4485 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 161240] reward=-113465325.9 actor_loss=0.2259 critic_loss=80980114492.2353 entropy=17.4412 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 161240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465006.7 mean_steps=15.6
|
|
[Episode 161250] reward=-114536632.9 actor_loss=0.2603 critic_loss=83386874629.6889 entropy=17.4455 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 161260] reward=-121759460.1 actor_loss=0.2432 critic_loss=81523819966.3590 entropy=17.4629 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 161260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592078.1 mean_steps=13.3
|
|
[Episode 161270] reward=-122444998.6 actor_loss=0.2671 critic_loss=87127717649.8605 entropy=17.4818 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 161280] reward=-125074634.9 actor_loss=0.2222 critic_loss=92412096512.0000 entropy=17.4880 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 161280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473765.1 mean_steps=13.9
|
|
[Episode 161290] reward=-115009355.1 actor_loss=0.4043 critic_loss=79423666257.9200 entropy=17.4791 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 161300] reward=-114445739.8 actor_loss=0.3159 critic_loss=80516791734.8571 entropy=17.4904 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 161300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443634.3 mean_steps=14.2
|
|
[Episode 161310] reward=-115234579.0 actor_loss=0.3403 critic_loss=93663062105.0435 entropy=17.5024 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 161320] reward=-117580309.9 actor_loss=0.2981 critic_loss=90201217858.3704 entropy=17.4971 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 161320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626090.5 mean_steps=12.1
|
|
[Episode 161330] reward=-117527037.7 actor_loss=0.2786 critic_loss=87145100141.7143 entropy=17.5022 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 161340] reward=-118354152.3 actor_loss=0.3035 critic_loss=88150860231.1111 entropy=17.5021 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 161340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624536.9 mean_steps=12.6
|
|
[Episode 161350] reward=-114458770.8 actor_loss=0.3284 critic_loss=78762754958.2222 entropy=17.5057 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 161360] reward=-119958872.8 actor_loss=0.2494 critic_loss=83388997632.0000 entropy=17.5115 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 161360] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-406886.9 mean_steps=16.6
|
|
[Episode 161370] reward=-122057350.3 actor_loss=0.3398 critic_loss=94503109676.5217 entropy=17.5128 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 161380] reward=-118133586.6 actor_loss=0.3012 critic_loss=88556830720.0000 entropy=17.5239 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 161380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-325879.5 mean_steps=16.4
|
|
[Episode 161390] reward=-153979725.8 actor_loss=0.3238 critic_loss=3505249483670.0688 entropy=17.5191 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 161400] reward=-148204069.2 actor_loss=0.2920 critic_loss=3375590983452.4443 entropy=17.5222 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 161400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505603.1 mean_steps=14.4
|
|
[Episode 161410] reward=-120041705.9 actor_loss=0.2464 critic_loss=86862529945.6000 entropy=17.5208 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 161420] reward=-118030135.1 actor_loss=0.2681 critic_loss=87965517050.3111 entropy=17.5430 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 161420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585836.5 mean_steps=12.6
|
|
[Episode 161430] reward=-116430290.8 actor_loss=0.2328 critic_loss=80315004778.1463 entropy=17.5518 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 161440] reward=-120583278.0 actor_loss=0.2790 critic_loss=86151948606.5778 entropy=17.5521 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 161440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564893.6 mean_steps=13.4
|
|
[Episode 161450] reward=-120693310.2 actor_loss=0.3540 critic_loss=90557331928.6154 entropy=17.5500 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 161460] reward=-123472137.9 actor_loss=0.2366 critic_loss=88694463601.7778 entropy=17.5445 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 161460] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-374101.6 mean_steps=16.9
|
|
[Episode 161470] reward=-117703446.7 actor_loss=0.4001 critic_loss=83252793964.6061 entropy=17.5355 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 161480] reward=-122397807.2 actor_loss=0.3259 critic_loss=87793109379.4595 entropy=17.5193 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 161480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541159.4 mean_steps=13.4
|
|
[Episode 161490] reward=-122174481.9 actor_loss=0.3142 critic_loss=88115183400.4211 entropy=17.5248 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 161500] reward=-121765605.3 actor_loss=0.3052 critic_loss=88442969181.0909 entropy=17.5381 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 161500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523087.9 mean_steps=14.3
|
|
[Episode 161510] reward=-111498770.3 actor_loss=0.2516 critic_loss=76390134607.4483 entropy=17.5436 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 161520] reward=-119250613.8 actor_loss=0.1822 critic_loss=84816897966.0800 entropy=17.5323 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 161520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-624191.8 mean_steps=11.8
|
|
[Episode 161530] reward=-116144236.0 actor_loss=0.2723 critic_loss=83810873878.2609 entropy=17.5355 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 161540] reward=-117017848.2 actor_loss=0.3796 critic_loss=95590501034.6667 entropy=17.5363 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 161540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447406.4 mean_steps=15.4
|
|
[Episode 161550] reward=-119650373.9 actor_loss=0.3001 critic_loss=103358189112.8889 entropy=17.5404 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 161560] reward=-111307823.4 actor_loss=0.4195 critic_loss=80444180126.8965 entropy=17.5334 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 161560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552583.6 mean_steps=13.2
|
|
[Episode 161570] reward=-118173259.1 actor_loss=0.2863 critic_loss=89163550628.9778 entropy=17.5223 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 161580] reward=-119902592.1 actor_loss=0.3016 critic_loss=85894760634.1818 entropy=17.5260 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 161580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-448861.7 mean_steps=13.8
|
|
[Episode 161590] reward=-121661568.9 actor_loss=0.3078 critic_loss=177149064621.4193 entropy=17.5124 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 161600] reward=-121949344.1 actor_loss=0.1912 critic_loss=86289338563.0476 entropy=17.5095 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 161600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-627605.2 mean_steps=12.8
|
|
[Episode 161610] reward=-114206798.5 actor_loss=0.3464 critic_loss=100920551378.4889 entropy=17.5123 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 161620] reward=-116141573.9 actor_loss=0.3899 critic_loss=79944350651.7333 entropy=17.5104 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 161620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-436598.3 mean_steps=16.0
|
|
[Episode 161630] reward=-117082187.8 actor_loss=0.2293 critic_loss=79046394718.3158 entropy=17.5056 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 161640] reward=-120187564.5 actor_loss=0.2601 critic_loss=86281291776.0000 entropy=17.5118 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 161640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523822.1 mean_steps=13.6
|
|
[Episode 161650] reward=-121743903.0 actor_loss=0.2012 critic_loss=83202462515.2000 entropy=17.5137 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 161660] reward=-119441964.3 actor_loss=0.3125 critic_loss=83041593168.4571 entropy=17.5088 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 161660] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-627137.5 mean_steps=11.3
|
|
[Episode 161670] reward=-117609208.2 actor_loss=0.2359 critic_loss=82375349979.4286 entropy=17.4856 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 161680] reward=-109146061.4 actor_loss=0.3060 critic_loss=75258976665.6000 entropy=17.4867 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 161680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581728.9 mean_steps=12.6
|
|
[Episode 161690] reward=-121802926.3 actor_loss=0.2526 critic_loss=87154897152.0000 entropy=17.5023 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 161700] reward=-117801889.1 actor_loss=0.1858 critic_loss=80695057976.8889 entropy=17.5000 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 161700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395085.7 mean_steps=15.9
|
|
[Episode 161710] reward=-120074457.0 actor_loss=0.3122 critic_loss=86046980902.7879 entropy=17.5096 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 161720] reward=-117865629.2 actor_loss=0.3025 critic_loss=82376937139.8919 entropy=17.5020 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 161720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549442.8 mean_steps=13.1
|
|
[Episode 161730] reward=-115441402.5 actor_loss=0.2645 critic_loss=81634318628.5714 entropy=17.4955 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 161740] reward=-114023779.6 actor_loss=0.4121 critic_loss=84668070211.3684 entropy=17.4976 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 161740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607630.4 mean_steps=13.0
|
|
[Episode 161750] reward=-122640457.1 actor_loss=0.2821 critic_loss=91115716864.0000 entropy=17.5227 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 161760] reward=-130956276.5 actor_loss=0.2685 critic_loss=1190350969709.7144 entropy=17.5352 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 161760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455887.3 mean_steps=14.3
|
|
[Episode 161770] reward=-119540205.2 actor_loss=0.4316 critic_loss=92308295019.3548 entropy=17.5593 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 161780] reward=-114950043.8 actor_loss=0.2788 critic_loss=82544169084.1212 entropy=17.5637 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 161780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421621.2 mean_steps=15.1
|
|
[Episode 161790] reward=-117525533.3 actor_loss=0.3597 critic_loss=83519915041.0323 entropy=17.5737 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 161800] reward=-129327467.8 actor_loss=0.3280 critic_loss=798138782333.1555 entropy=17.5800 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 161800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521890.0 mean_steps=15.1
|
|
[Episode 161810] reward=-153085052.1 actor_loss=0.2366 critic_loss=3837046419911.1113 entropy=17.6006 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 161820] reward=-156626682.1 actor_loss=0.3259 critic_loss=4429114647074.1338 entropy=17.5991 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 161820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460265.3 mean_steps=14.4
|
|
[Episode 161830] reward=-121361512.1 actor_loss=0.2839 critic_loss=87916734553.0435 entropy=17.5959 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 161840] reward=-124314471.1 actor_loss=0.2037 critic_loss=93148180070.4000 entropy=17.5938 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 161840] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-343137.0 mean_steps=17.4
|
|
[Episode 161850] reward=-114830655.9 actor_loss=0.3011 critic_loss=84705850818.5600 entropy=17.5979 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 161860] reward=-117882178.4 actor_loss=0.2804 critic_loss=86743222226.4889 entropy=17.5944 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 161860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586599.4 mean_steps=12.4
|
|
[Episode 161870] reward=-115878493.5 actor_loss=0.3054 critic_loss=85151347765.8947 entropy=17.6065 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 161880] reward=-115837010.4 actor_loss=0.3134 critic_loss=90332590899.2000 entropy=17.6093 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 161880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514042.9 mean_steps=13.9
|
|
[Episode 161890] reward=-119484790.3 actor_loss=0.2869 critic_loss=93578828399.3044 entropy=17.6115 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 161900] reward=-118837954.8 actor_loss=0.2007 critic_loss=89394777465.2632 entropy=17.6067 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 161900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454025.6 mean_steps=14.1
|
|
[Episode 161910] reward=-116657436.0 actor_loss=0.2507 critic_loss=83519723892.3636 entropy=17.5972 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 161920] reward=-119766015.4 actor_loss=0.2422 critic_loss=88601089820.4444 entropy=17.6084 approx_kl=0.0044 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 161920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-527827.1 mean_steps=15.1
|
|
[Episode 161930] reward=-118105508.6 actor_loss=0.2835 critic_loss=90903098332.6897 entropy=17.6057 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 161940] reward=-115545329.8 actor_loss=0.4132 critic_loss=87016854050.1333 entropy=17.6103 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 161940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431821.6 mean_steps=15.2
|
|
[Episode 161950] reward=-120252914.1 actor_loss=0.3040 critic_loss=85554069807.4074 entropy=17.6019 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 161960] reward=-115275441.1 actor_loss=0.4047 critic_loss=88268464548.1026 entropy=17.6283 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 161960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501642.4 mean_steps=13.3
|
|
[Episode 161970] reward=-122073538.8 actor_loss=0.3306 critic_loss=87885093062.1935 entropy=17.6205 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 161980] reward=-118669904.7 actor_loss=0.3025 critic_loss=87485241793.5610 entropy=17.6149 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 161980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572081.1 mean_steps=13.8
|
|
[Episode 161990] reward=-121460263.4 actor_loss=0.2810 critic_loss=91026467061.7600 entropy=17.6101 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 162000] reward=-115480868.1 actor_loss=0.3708 critic_loss=89441676576.8205 entropy=17.6029 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 162000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451510.5 mean_steps=14.6
|
|
[Episode 162010] reward=-108155331.5 actor_loss=0.4733 critic_loss=77667449969.7778 entropy=17.5931 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 162020] reward=-127633272.5 actor_loss=0.3021 critic_loss=455087867904.0000 entropy=17.5842 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 162020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-474039.7 mean_steps=15.9
|
|
[Episode 162030] reward=-112863555.2 actor_loss=0.2731 critic_loss=82530702865.6552 entropy=17.5724 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 162040] reward=-125952092.8 actor_loss=0.2492 critic_loss=92767094374.4000 entropy=17.5585 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 162040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447437.6 mean_steps=14.7
|
|
[Episode 162050] reward=-120041148.4 actor_loss=0.2780 critic_loss=90745596768.7111 entropy=17.5677 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 162060] reward=-118501817.4 actor_loss=0.2920 critic_loss=82413522670.9333 entropy=17.5690 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 162060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596837.8 mean_steps=12.7
|
|
[Episode 162070] reward=-121457029.2 actor_loss=0.4466 critic_loss=89731222474.1053 entropy=17.5740 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1543 front_blocked=0
|
|
[Episode 162080] reward=-142198680.4 actor_loss=0.3733 critic_loss=3211479735149.7144 entropy=17.5816 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 162080] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-677549.4 mean_steps=11.4
|
|
[Episode 162090] reward=-121600407.8 actor_loss=0.2523 critic_loss=91181573389.4737 entropy=17.5962 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 162100] reward=-119218115.2 actor_loss=0.2901 critic_loss=84120256512.0000 entropy=17.5905 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 162100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-570618.1 mean_steps=14.4
|
|
[Episode 162110] reward=-116533521.0 actor_loss=0.3026 critic_loss=84155714400.7111 entropy=17.5807 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 162120] reward=-123616635.3 actor_loss=0.2761 critic_loss=91122817792.0000 entropy=17.5899 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 162120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-532723.5 mean_steps=12.4
|
|
[Episode 162130] reward=-116992476.3 actor_loss=0.2934 critic_loss=82502452838.4000 entropy=17.5903 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 162140] reward=-120247352.1 actor_loss=0.2456 critic_loss=89624110694.4000 entropy=17.5931 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 162140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-469260.5 mean_steps=15.7
|
|
[Episode 162150] reward=-119483484.0 actor_loss=0.3618 critic_loss=86201871360.0000 entropy=17.6019 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 162160] reward=-118227752.1 actor_loss=0.3225 critic_loss=87568014497.6842 entropy=17.6123 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 162160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540493.7 mean_steps=13.4
|
|
[Episode 162170] reward=-119564558.8 actor_loss=0.3730 critic_loss=85346959360.0000 entropy=17.6069 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 162180] reward=-117897461.6 actor_loss=0.3202 critic_loss=81029142265.4359 entropy=17.6118 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 162180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-547956.8 mean_steps=12.6
|
|
[Episode 162190] reward=-120428946.6 actor_loss=0.2602 critic_loss=86868305578.6667 entropy=17.6098 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 162200] reward=-120799599.8 actor_loss=0.4257 critic_loss=104577911218.4242 entropy=17.6081 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 162200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610686.1 mean_steps=12.8
|
|
[Episode 162210] reward=-120525133.8 actor_loss=0.3053 critic_loss=84000667915.1304 entropy=17.6258 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 162220] reward=-116657892.1 actor_loss=0.2750 critic_loss=80766641038.2222 entropy=17.6433 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 162220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466825.6 mean_steps=14.6
|
|
[Episode 162230] reward=-120043134.3 actor_loss=0.3310 critic_loss=83691642624.0000 entropy=17.6334 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 162240] reward=-119083295.0 actor_loss=0.3304 critic_loss=87755955131.7333 entropy=17.6201 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 162240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429750.2 mean_steps=14.3
|
|
[Episode 162250] reward=-112646407.5 actor_loss=0.3602 critic_loss=82016909084.4444 entropy=17.5922 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 162260] reward=-124331023.9 actor_loss=0.2125 critic_loss=91971348252.4444 entropy=17.5789 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 162260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505777.3 mean_steps=13.8
|
|
[Episode 162270] reward=-119561622.5 actor_loss=0.3568 critic_loss=90972935089.2308 entropy=17.5843 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 162280] reward=-119618770.6 actor_loss=0.2785 critic_loss=84488704819.2000 entropy=17.5765 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 162280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428353.1 mean_steps=15.3
|
|
[Episode 162290] reward=-116502453.5 actor_loss=0.3441 critic_loss=81416863179.0345 entropy=17.5826 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 162300] reward=-122387565.3 actor_loss=0.2301 critic_loss=88917158180.5714 entropy=17.5831 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 162300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481168.7 mean_steps=14.8
|
|
[Episode 162310] reward=-117063935.9 actor_loss=0.2738 critic_loss=85834544067.7647 entropy=17.5813 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 162320] reward=-120417623.3 actor_loss=0.2992 critic_loss=88945508169.9556 entropy=17.5864 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 162320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-685913.7 mean_steps=12.4
|
|
[Episode 162330] reward=-120651684.4 actor_loss=0.3706 critic_loss=90046196394.6667 entropy=17.5679 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 162340] reward=-121449148.3 actor_loss=0.2230 critic_loss=86194718096.6956 entropy=17.5784 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 162340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-564189.7 mean_steps=14.3
|
|
[Episode 162350] reward=-121491334.8 actor_loss=0.2992 critic_loss=87554715062.8571 entropy=17.5782 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 162360] reward=-122996824.4 actor_loss=0.2771 critic_loss=90774355968.0000 entropy=17.5779 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 162360] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-343548.3 mean_steps=16.9
|
|
[Episode 162370] reward=-113707345.0 actor_loss=0.3387 critic_loss=85614737635.5556 entropy=17.5893 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 162380] reward=-122593487.9 actor_loss=0.2152 critic_loss=125935475712.0000 entropy=17.5987 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 162380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-333689.5 mean_steps=16.9
|
|
[Episode 162390] reward=-113838160.3 actor_loss=0.3705 critic_loss=83506809690.8387 entropy=17.5933 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 162400] reward=-122549969.5 actor_loss=0.1562 critic_loss=94691583676.6316 entropy=17.5947 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 162400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473059.6 mean_steps=14.6
|
|
[Episode 162410] reward=-120011086.1 actor_loss=0.2248 critic_loss=86329865180.6897 entropy=17.5854 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 162420] reward=-117988616.5 actor_loss=0.2224 critic_loss=99725234176.0000 entropy=17.5864 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 162420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526873.2 mean_steps=14.0
|
|
[Episode 162430] reward=-122264739.4 actor_loss=0.3857 critic_loss=92952273624.1778 entropy=17.5910 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 162440] reward=-123544391.8 actor_loss=0.2262 critic_loss=92327557030.9565 entropy=17.5913 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 162440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563814.5 mean_steps=12.4
|
|
[Episode 162450] reward=-117206242.7 actor_loss=0.2933 critic_loss=86464349720.3810 entropy=17.5900 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 162460] reward=-126380107.3 actor_loss=0.2444 critic_loss=107953959321.6000 entropy=17.5999 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 162460] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-657877.6 mean_steps=11.2
|
|
[Episode 162470] reward=-120485880.8 actor_loss=0.2994 critic_loss=185219743379.9111 entropy=17.5899 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 162480] reward=-125120002.3 actor_loss=0.2426 critic_loss=128734604401.7778 entropy=17.5963 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 162480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570963.2 mean_steps=13.2
|
|
[Episode 162490] reward=-123528200.9 actor_loss=0.2347 critic_loss=127121428187.4286 entropy=17.6032 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 162500] reward=-122584872.1 actor_loss=0.3160 critic_loss=95470293937.2308 entropy=17.6009 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 162500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494229.8 mean_steps=14.1
|
|
[Episode 162510] reward=-125236797.1 actor_loss=0.3627 critic_loss=162453315268.9231 entropy=17.5940 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 162520] reward=-117646900.1 actor_loss=0.2735 critic_loss=97750817326.5455 entropy=17.6021 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 162520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440469.1 mean_steps=14.4
|
|
[Episode 162530] reward=-124748499.2 actor_loss=0.2796 critic_loss=97480071168.0000 entropy=17.5929 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 162540] reward=-119898220.2 actor_loss=0.2824 critic_loss=93041063799.4667 entropy=17.5926 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 162540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-353541.1 mean_steps=15.8
|
|
[Episode 162550] reward=-115836048.9 actor_loss=0.2760 critic_loss=82347463065.6000 entropy=17.6018 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 162560] reward=-115398432.4 actor_loss=0.3247 critic_loss=84125483008.0000 entropy=17.5898 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 162560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516239.0 mean_steps=13.1
|
|
[Episode 162570] reward=-117768330.3 actor_loss=0.2167 critic_loss=85164855459.8400 entropy=17.5844 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 162580] reward=-119852036.5 actor_loss=0.3474 critic_loss=90749441989.4857 entropy=17.5744 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 162580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513031.3 mean_steps=14.2
|
|
[Episode 162590] reward=-127503664.2 actor_loss=0.2550 critic_loss=390323271168.0000 entropy=17.5857 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 162600] reward=-119117858.6 actor_loss=0.2691 critic_loss=88491809723.7333 entropy=17.5788 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 162600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552985.7 mean_steps=13.3
|
|
[Episode 162610] reward=-115311265.3 actor_loss=0.3470 critic_loss=87297512425.2444 entropy=17.5858 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 162620] reward=-121939295.5 actor_loss=0.2586 critic_loss=91669847381.3333 entropy=17.6008 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 162620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-618853.5 mean_steps=13.1
|
|
[Episode 162630] reward=-120341945.8 actor_loss=0.2829 critic_loss=85240767326.3158 entropy=17.5951 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 162640] reward=-120756008.2 actor_loss=0.2743 critic_loss=88923520087.7714 entropy=17.5877 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 162640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479977.4 mean_steps=15.0
|
|
[Episode 162650] reward=-120938023.4 actor_loss=0.1874 critic_loss=90055705512.2286 entropy=17.5893 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 162660] reward=-116594692.1 actor_loss=0.2875 critic_loss=81722180148.9655 entropy=17.5897 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 162660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548435.6 mean_steps=14.3
|
|
[Episode 162670] reward=-117782322.5 actor_loss=0.3266 critic_loss=88010228736.0000 entropy=17.5931 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 162680] reward=-115429552.0 actor_loss=0.3514 critic_loss=84314816139.6364 entropy=17.5846 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 162680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-663803.9 mean_steps=12.1
|
|
[Episode 162690] reward=-114641591.2 actor_loss=0.3345 critic_loss=81287060759.2727 entropy=17.5798 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 162700] reward=-118653861.7 actor_loss=0.3420 critic_loss=84623881137.2308 entropy=17.5670 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 162700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573047.5 mean_steps=12.7
|
|
[Episode 162710] reward=-118676424.6 actor_loss=0.2872 critic_loss=80194472779.2941 entropy=17.5743 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 162720] reward=-121099770.0 actor_loss=0.2424 critic_loss=89536411355.4286 entropy=17.5878 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 162720] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-225822.1 mean_steps=18.0
|
|
[Episode 162730] reward=-118554132.4 actor_loss=0.2120 critic_loss=86603828824.2759 entropy=17.5825 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 162740] reward=-119116538.7 actor_loss=0.2350 critic_loss=87320657920.0000 entropy=17.5706 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 162740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-442017.5 mean_steps=16.5
|
|
[Episode 162750] reward=-118998906.3 actor_loss=0.2323 critic_loss=89801427482.9474 entropy=17.5794 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 162760] reward=-119058654.4 actor_loss=0.3559 critic_loss=85475137194.6667 entropy=17.6046 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 162760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434093.4 mean_steps=14.2
|
|
[Episode 162770] reward=-116097022.6 actor_loss=0.3093 critic_loss=80186030004.1481 entropy=17.6154 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 162780] reward=-119245181.8 actor_loss=0.2799 critic_loss=87989812400.5517 entropy=17.5999 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 162780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-605929.0 mean_steps=11.8
|
|
[Episode 162790] reward=-124031241.1 actor_loss=0.2140 critic_loss=93417738752.0000 entropy=17.6066 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 162800] reward=-114443866.4 actor_loss=0.2599 critic_loss=83418813076.6452 entropy=17.6061 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 162800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524221.7 mean_steps=13.5
|
|
[Episode 162810] reward=-117792943.5 actor_loss=0.3622 critic_loss=86222648320.0000 entropy=17.6108 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 162820] reward=-114584489.9 actor_loss=0.3506 critic_loss=86294229382.0952 entropy=17.6076 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 162820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-573582.9 mean_steps=14.5
|
|
[Episode 162830] reward=-118285499.6 actor_loss=0.3756 critic_loss=88503710626.9091 entropy=17.5960 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 162840] reward=-123567500.0 actor_loss=0.3023 critic_loss=107061719950.2222 entropy=17.5987 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 162840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-565973.1 mean_steps=12.6
|
|
[Episode 162850] reward=-128238140.0 actor_loss=0.2892 critic_loss=465277538304.0000 entropy=17.6080 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 162860] reward=-118597425.7 actor_loss=0.3425 critic_loss=85898633898.6667 entropy=17.5926 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 162860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518279.6 mean_steps=14.0
|
|
[Episode 162870] reward=-126123228.3 actor_loss=0.3402 critic_loss=139227945837.7143 entropy=17.5932 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 162880] reward=-123517215.1 actor_loss=0.2590 critic_loss=109405526528.0000 entropy=17.5897 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 162880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577528.2 mean_steps=13.4
|
|
[Episode 162890] reward=-120016294.3 actor_loss=0.2034 critic_loss=91068337307.1515 entropy=17.5830 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 162900] reward=-123742961.9 actor_loss=0.3290 critic_loss=91456470757.5172 entropy=17.5664 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 162900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-552275.1 mean_steps=12.8
|
|
[Episode 162910] reward=-171236731.9 actor_loss=0.2709 critic_loss=8555349293283.5557 entropy=17.5764 approx_kl=0.0040 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 162920] reward=-117631877.2 actor_loss=0.3301 critic_loss=93023150080.0000 entropy=17.5954 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 162920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383827.0 mean_steps=16.0
|
|
[Episode 162930] reward=-113963252.5 actor_loss=0.3053 critic_loss=84270159626.2400 entropy=17.5964 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 162940] reward=-259237514.3 actor_loss=0.2740 critic_loss=48420641961119.2891 entropy=17.6003 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 162940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589710.5 mean_steps=13.8
|
|
[Episode 162950] reward=-112410355.9 actor_loss=0.4138 critic_loss=82500286297.9460 entropy=17.6008 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 162960] reward=-122276949.9 actor_loss=0.2537 critic_loss=141908346277.6471 entropy=17.5976 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 162960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-361704.9 mean_steps=16.2
|
|
[Episode 162970] reward=-119314005.4 actor_loss=0.2607 critic_loss=92765824000.0000 entropy=17.6033 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 162980] reward=-115345841.6 actor_loss=0.2550 critic_loss=82805397504.0000 entropy=17.5952 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 162980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446881.6 mean_steps=14.9
|
|
[Episode 162990] reward=-115906784.2 actor_loss=0.2718 critic_loss=84093419088.8421 entropy=17.5999 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 163000] reward=-114158838.1 actor_loss=0.3471 critic_loss=78856374408.5333 entropy=17.5930 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 163000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-624991.8 mean_steps=11.9
|
|
[Episode 163010] reward=-116875080.8 actor_loss=0.1973 critic_loss=87814443622.4000 entropy=17.5894 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 163020] reward=-119841562.6 actor_loss=0.3718 critic_loss=98428484851.8095 entropy=17.5897 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 163020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-376399.7 mean_steps=16.9
|
|
[Episode 163030] reward=-155971376.6 actor_loss=0.3045 critic_loss=3671462751481.0811 entropy=17.5992 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 163040] reward=-115997517.1 actor_loss=0.3593 critic_loss=87197504768.0000 entropy=17.6058 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 163040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587243.2 mean_steps=13.8
|
|
[Episode 163050] reward=-155154029.6 actor_loss=0.2581 critic_loss=2741098350104.3809 entropy=17.6051 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 163060] reward=-120762293.7 actor_loss=0.2120 critic_loss=140015009792.0000 entropy=17.5881 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 163060] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-295144.0 mean_steps=18.4
|
|
[Episode 163070] reward=-121203766.0 actor_loss=0.1803 critic_loss=110937286469.8182 entropy=17.5934 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 163080] reward=-120991819.0 actor_loss=0.2698 critic_loss=107997167173.1892 entropy=17.5984 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 163080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567905.1 mean_steps=13.8
|
|
[Episode 163090] reward=-116492625.3 actor_loss=0.2515 critic_loss=84594973295.3044 entropy=17.5985 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 163100] reward=-117660917.8 actor_loss=0.2434 critic_loss=84430912079.6444 entropy=17.6050 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 163100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-461030.6 mean_steps=16.1
|
|
[Episode 163110] reward=-119400606.3 actor_loss=0.2809 critic_loss=85814058461.8667 entropy=17.6065 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 163120] reward=-119106696.2 actor_loss=0.2618 critic_loss=87457709648.8421 entropy=17.5961 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 163120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-611337.0 mean_steps=14.2
|
|
[Episode 163130] reward=-117847545.9 actor_loss=0.2161 critic_loss=79249113088.0000 entropy=17.5818 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 163140] reward=-118011819.3 actor_loss=0.3322 critic_loss=88628009837.7143 entropy=17.5683 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 163140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497135.2 mean_steps=14.7
|
|
[Episode 163150] reward=-117298510.0 actor_loss=0.2927 critic_loss=89822114377.1429 entropy=17.5816 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 163160] reward=-122141609.5 actor_loss=0.2987 critic_loss=92750206645.6774 entropy=17.5885 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 163160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-428261.3 mean_steps=14.7
|
|
[Episode 163170] reward=-128556970.0 actor_loss=0.2836 critic_loss=318755827288.2759 entropy=17.5880 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 163180] reward=-119052760.3 actor_loss=0.2389 critic_loss=89351997685.7600 entropy=17.5828 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 163180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-486541.2 mean_steps=13.1
|
|
[Episode 163190] reward=-125213800.8 actor_loss=0.2826 critic_loss=100256296069.5652 entropy=17.5830 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 163200] reward=-120886146.5 actor_loss=0.3255 critic_loss=93772932632.3810 entropy=17.5714 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 163200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-513347.9 mean_steps=14.9
|
|
[Episode 163210] reward=-114448224.8 actor_loss=0.3847 critic_loss=88352958236.4444 entropy=17.6037 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 163220] reward=-116815957.3 actor_loss=0.2872 critic_loss=99891996964.5714 entropy=17.5957 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 163220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520600.6 mean_steps=14.2
|
|
[Episode 163230] reward=-111955583.7 actor_loss=0.3437 critic_loss=82755670601.1429 entropy=17.6002 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 163240] reward=-125653627.4 actor_loss=0.3127 critic_loss=452689906892.8000 entropy=17.6050 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 163240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435542.6 mean_steps=14.7
|
|
[Episode 163250] reward=-119952399.9 actor_loss=0.2453 critic_loss=90383528235.7073 entropy=17.6066 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 163260] reward=-117983977.4 actor_loss=0.2606 critic_loss=86121549956.1290 entropy=17.6129 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 163260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528733.3 mean_steps=14.2
|
|
[Episode 163270] reward=-117039774.7 actor_loss=0.3052 critic_loss=83898200064.0000 entropy=17.6213 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 163280] reward=-115327869.0 actor_loss=0.2772 critic_loss=83799344818.6046 entropy=17.6384 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 163280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-470496.6 mean_steps=13.2
|
|
[Episode 163290] reward=-121495247.2 actor_loss=0.3092 critic_loss=149540882773.3333 entropy=17.6250 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 163300] reward=-116856411.6 actor_loss=0.3806 critic_loss=85504795079.1111 entropy=17.6252 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 163300] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-554445.3 mean_steps=10.8
|
|
[Episode 163310] reward=-117856381.5 actor_loss=0.2557 critic_loss=85531459956.3636 entropy=17.6333 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 163320] reward=-122374826.3 actor_loss=0.2660 critic_loss=93022289555.9111 entropy=17.6483 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 163320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602763.9 mean_steps=13.1
|
|
[Episode 163330] reward=-120156593.3 actor_loss=0.3314 critic_loss=88553674459.4286 entropy=17.6486 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 163340] reward=-120858909.6 actor_loss=0.1977 critic_loss=85833963383.4667 entropy=17.6511 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 163340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548466.7 mean_steps=14.3
|
|
[Episode 163350] reward=-125769764.5 actor_loss=0.2440 critic_loss=93931478199.7949 entropy=17.6170 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 163360] reward=-119433408.2 actor_loss=0.2801 critic_loss=88084534676.8372 entropy=17.6247 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 163360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565980.1 mean_steps=13.7
|
|
[Episode 163370] reward=-119812478.1 actor_loss=0.2899 critic_loss=107078821010.2857 entropy=17.6223 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 163380] reward=-113310645.0 actor_loss=0.4317 critic_loss=85888656952.8889 entropy=17.6204 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 163380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430126.0 mean_steps=15.2
|
|
[Episode 163390] reward=-118119529.0 actor_loss=0.3315 critic_loss=88050824113.2308 entropy=17.6246 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 163400] reward=-115687368.0 actor_loss=0.2889 critic_loss=82409640521.1429 entropy=17.6201 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 163400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587500.9 mean_steps=12.6
|
|
[Episode 163410] reward=-118423414.3 actor_loss=0.3460 critic_loss=83986739473.0667 entropy=17.6255 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 163420] reward=-114492032.9 actor_loss=0.3236 critic_loss=82375967582.3158 entropy=17.6283 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 163420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561577.5 mean_steps=14.4
|
|
[Episode 163430] reward=-114420248.1 actor_loss=0.3393 critic_loss=84255417765.6471 entropy=17.6384 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 163440] reward=-118324119.5 actor_loss=0.3114 critic_loss=84412319334.4000 entropy=17.6249 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 163440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461541.7 mean_steps=14.1
|
|
[Episode 163450] reward=-118339396.5 actor_loss=0.2144 critic_loss=87308117060.2667 entropy=17.6442 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 163460] reward=-119618344.8 actor_loss=0.2834 critic_loss=180264529778.7586 entropy=17.6509 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 163460] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-586628.2 mean_steps=10.8
|
|
[Episode 163470] reward=-120558315.8 actor_loss=0.2954 critic_loss=89350217346.9767 entropy=17.6441 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 163480] reward=-256903963.8 actor_loss=0.3352 critic_loss=56555433192470.7578 entropy=17.6466 approx_kl=-0.0002 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 163480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561774.7 mean_steps=13.6
|
|
[Episode 163490] reward=-157789680.4 actor_loss=0.2430 critic_loss=5076813232355.5557 entropy=17.6647 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 163500] reward=-122839433.9 actor_loss=0.3317 critic_loss=119737204736.0000 entropy=17.6502 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 163500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-569586.7 mean_steps=15.7
|
|
[Episode 163510] reward=-118932649.6 actor_loss=0.2433 critic_loss=81153148404.0930 entropy=17.6530 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 163520] reward=-120084129.9 actor_loss=0.2292 critic_loss=121381069430.1538 entropy=17.6466 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 163520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572029.2 mean_steps=13.6
|
|
[Episode 163530] reward=-121554674.6 actor_loss=0.2625 critic_loss=88010413972.2105 entropy=17.6397 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 163540] reward=-120742380.9 actor_loss=0.2652 critic_loss=84600658838.0690 entropy=17.6252 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 163540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401088.1 mean_steps=16.4
|
|
[Episode 163550] reward=-117637556.3 actor_loss=0.3714 critic_loss=86483740444.4444 entropy=17.6406 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 163560] reward=-120822007.5 actor_loss=0.2648 critic_loss=87494597017.6000 entropy=17.6546 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 163560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428410.1 mean_steps=15.4
|
|
[Episode 163570] reward=-118262384.5 actor_loss=0.3135 critic_loss=84331771002.8800 entropy=17.6673 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 163580] reward=-118275238.8 actor_loss=0.2656 critic_loss=82113595864.6154 entropy=17.6528 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 163580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548182.8 mean_steps=13.7
|
|
[Episode 163590] reward=-115204897.0 actor_loss=0.2377 critic_loss=83849484151.4667 entropy=17.6436 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 163600] reward=-119250341.2 actor_loss=0.3201 critic_loss=85272888934.4000 entropy=17.6501 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 163600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-411430.7 mean_steps=16.5
|
|
[Episode 163610] reward=-121664659.0 actor_loss=0.3224 critic_loss=111787001657.8065 entropy=17.6510 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 163620] reward=-122245996.8 actor_loss=0.2526 critic_loss=92296171038.1176 entropy=17.6312 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 163620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-380885.8 mean_steps=15.0
|
|
[Episode 163630] reward=-121630164.8 actor_loss=0.3370 critic_loss=85797255213.5111 entropy=17.6237 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 163640] reward=-117211750.9 actor_loss=0.3362 critic_loss=88638108285.1555 entropy=17.6161 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 163640] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-655276.8 mean_steps=11.3
|
|
[Episode 163650] reward=-118208480.5 actor_loss=0.3168 critic_loss=85048822052.5714 entropy=17.6043 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 163660] reward=-118452586.2 actor_loss=0.3337 critic_loss=87381722726.4000 entropy=17.6036 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 163660] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-324103.8 mean_steps=16.7
|
|
[Episode 163670] reward=-109775589.2 actor_loss=0.3564 critic_loss=77130865049.6000 entropy=17.5898 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 163680] reward=-115404472.0 actor_loss=0.3793 critic_loss=83826124390.4000 entropy=17.5879 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 163680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548237.3 mean_steps=13.4
|
|
[Episode 163690] reward=-119580589.7 actor_loss=0.2692 critic_loss=87294679980.9730 entropy=17.6015 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 163700] reward=-123413684.1 actor_loss=0.2631 critic_loss=92067999565.9130 entropy=17.5989 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 163700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-357496.7 mean_steps=14.2
|
|
[Episode 163710] reward=-120657471.9 actor_loss=0.2505 critic_loss=88788561273.2632 entropy=17.6028 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 163720] reward=-114475159.3 actor_loss=0.2411 critic_loss=80496764336.3556 entropy=17.6007 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 163720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559045.1 mean_steps=12.6
|
|
[Episode 163730] reward=-115667609.1 actor_loss=0.3205 critic_loss=84399827626.6667 entropy=17.6098 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 163740] reward=-120834152.7 actor_loss=0.3232 critic_loss=87543091699.5122 entropy=17.6148 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 163740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517467.3 mean_steps=14.5
|
|
[Episode 163750] reward=-119694077.9 actor_loss=0.3068 critic_loss=107129541836.8000 entropy=17.6072 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 163760] reward=-118388491.2 actor_loss=0.3292 critic_loss=83316809181.8667 entropy=17.6100 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 163760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445891.4 mean_steps=15.6
|
|
[Episode 163770] reward=-120390396.3 actor_loss=0.2941 critic_loss=97792872857.6000 entropy=17.6182 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 163780] reward=-121726424.2 actor_loss=0.3632 critic_loss=119334460129.2800 entropy=17.6232 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 163780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-580411.5 mean_steps=12.1
|
|
[Episode 163790] reward=-134183774.2 actor_loss=0.2880 critic_loss=1555589903491.2820 entropy=17.6309 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 163800] reward=-115948036.8 actor_loss=0.3231 critic_loss=88081841590.8571 entropy=17.6383 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 163800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522976.3 mean_steps=14.1
|
|
[Episode 163810] reward=-125045635.6 actor_loss=0.2841 critic_loss=303552532935.1111 entropy=17.6441 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 163820] reward=-129981461.5 actor_loss=0.2396 critic_loss=629549173115.2593 entropy=17.6671 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 163820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505412.6 mean_steps=14.1
|
|
[Episode 163830] reward=-115338827.6 actor_loss=0.3557 critic_loss=149894757307.7333 entropy=17.6654 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 163840] reward=-118707341.3 actor_loss=0.3389 critic_loss=83138500403.2000 entropy=17.6635 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 163840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526000.8 mean_steps=14.6
|
|
[Episode 163850] reward=-120511474.3 actor_loss=0.2901 critic_loss=87265404830.4762 entropy=17.6618 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 163860] reward=-118055973.9 actor_loss=0.2948 critic_loss=87106253568.0000 entropy=17.6791 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 163860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408047.5 mean_steps=15.2
|
|
[Episode 163870] reward=-119791797.9 actor_loss=0.2998 critic_loss=86712682023.3846 entropy=17.6743 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 163880] reward=-124397720.6 actor_loss=0.2674 critic_loss=167914323968.0000 entropy=17.6723 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 163880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394088.8 mean_steps=15.5
|
|
[Episode 163890] reward=-118240167.2 actor_loss=0.3044 critic_loss=173234692608.0000 entropy=17.6689 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 163900] reward=-116729725.2 actor_loss=0.2435 critic_loss=93227090141.4054 entropy=17.6740 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 163900] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-356013.0 mean_steps=16.6
|
|
[Episode 163910] reward=-109472289.9 actor_loss=0.3306 critic_loss=88555611204.2667 entropy=17.6792 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 163920] reward=-118757607.6 actor_loss=0.2428 critic_loss=89112806589.6296 entropy=17.6985 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 163920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-613164.9 mean_steps=13.9
|
|
[Episode 163930] reward=-119852748.0 actor_loss=0.2187 critic_loss=96069751421.1555 entropy=17.7023 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 163940] reward=-113656239.9 actor_loss=0.3993 critic_loss=85942055003.0222 entropy=17.6918 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 163940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467037.3 mean_steps=14.5
|
|
[Episode 163950] reward=-114236495.8 actor_loss=0.2945 critic_loss=83275698267.0222 entropy=17.6907 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 163960] reward=-118479663.3 actor_loss=0.2981 critic_loss=86050513859.7647 entropy=17.6731 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 163960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536354.1 mean_steps=13.6
|
|
[Episode 163970] reward=-117281492.1 actor_loss=0.3092 critic_loss=91259259562.6667 entropy=17.6612 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 163980] reward=-124035732.7 actor_loss=0.2299 critic_loss=111399171072.0000 entropy=17.6692 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 163980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519767.6 mean_steps=13.4
|
|
[Episode 163990] reward=-118245822.6 actor_loss=0.4140 critic_loss=87125216768.0000 entropy=17.6658 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 164000] reward=-118998451.9 actor_loss=0.3523 critic_loss=91557074716.4444 entropy=17.6749 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 164000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432101.2 mean_steps=15.7
|
|
[Episode 164010] reward=-120922273.9 actor_loss=0.1440 critic_loss=87922384896.0000 entropy=17.6767 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 164020] reward=-119882554.2 actor_loss=0.2534 critic_loss=89002724352.0000 entropy=17.6866 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 164020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580602.1 mean_steps=12.7
|
|
[Episode 164030] reward=-118454830.9 actor_loss=0.3827 critic_loss=86138660636.4444 entropy=17.6813 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 164040] reward=-121238147.5 actor_loss=0.3440 critic_loss=88750730035.2000 entropy=17.6761 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 164040] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-304744.4 mean_steps=17.2
|
|
[Episode 164050] reward=-117583657.7 actor_loss=0.2933 critic_loss=79909349512.5333 entropy=17.6707 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 164060] reward=-115033676.7 actor_loss=0.3477 critic_loss=84726304312.8889 entropy=17.6790 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 164060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-627861.2 mean_steps=14.2
|
|
[Episode 164070] reward=-122655920.9 actor_loss=0.3864 critic_loss=89089061494.1538 entropy=17.6736 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 164080] reward=-120689549.2 actor_loss=0.2822 critic_loss=89367866572.8000 entropy=17.6596 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 164080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-627195.6 mean_steps=12.1
|
|
[Episode 164090] reward=-121800124.2 actor_loss=0.2323 critic_loss=88901506284.3077 entropy=17.6327 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 164100] reward=-111179363.0 actor_loss=0.3308 critic_loss=81269727232.0000 entropy=17.6288 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 164100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492314.8 mean_steps=14.0
|
|
[Episode 164110] reward=-118363299.4 actor_loss=0.2535 critic_loss=80182947566.9333 entropy=17.6216 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 164120] reward=-121719200.0 actor_loss=0.2980 critic_loss=90347622400.0000 entropy=17.6390 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 164120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498915.8 mean_steps=14.0
|
|
[Episode 164130] reward=-115732552.9 actor_loss=0.3230 critic_loss=80153006275.0476 entropy=17.6440 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 164140] reward=-122213882.6 actor_loss=0.2256 critic_loss=87996205528.6154 entropy=17.6507 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 164140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-311672.7 mean_steps=16.3
|
|
[Episode 164150] reward=-121183602.0 actor_loss=0.2789 critic_loss=90734645885.1555 entropy=17.6499 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 164160] reward=-114382313.7 actor_loss=0.3341 critic_loss=78853462659.6572 entropy=17.6500 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 164160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-456849.1 mean_steps=15.5
|
|
[Episode 164170] reward=-118618366.6 actor_loss=0.3225 critic_loss=92365259407.3600 entropy=17.6572 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 164180] reward=-115165467.8 actor_loss=0.3226 critic_loss=88591156317.0909 entropy=17.6499 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 164180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478586.6 mean_steps=13.9
|
|
[Episode 164190] reward=-119036019.2 actor_loss=0.3039 critic_loss=89545073623.0400 entropy=17.6496 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 164200] reward=-113079016.6 actor_loss=0.2530 critic_loss=79319541350.4000 entropy=17.6450 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 164200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535793.9 mean_steps=14.2
|
|
[Episode 164210] reward=-119726104.0 actor_loss=0.3355 critic_loss=86523187415.5789 entropy=17.6385 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 164220] reward=-122128155.1 actor_loss=0.3943 critic_loss=103080057059.5556 entropy=17.6453 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 164220] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-631741.6 mean_steps=11.4
|
|
[Episode 164230] reward=-116133889.3 actor_loss=0.3028 critic_loss=89155699169.8824 entropy=17.6460 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 164240] reward=-114689978.2 actor_loss=0.3896 critic_loss=81698861590.2609 entropy=17.6301 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 164240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-497595.4 mean_steps=12.3
|
|
[Episode 164250] reward=-112317555.1 actor_loss=0.4093 critic_loss=84960608256.0000 entropy=17.6208 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 164260] reward=-115077762.9 actor_loss=0.3384 critic_loss=77501627616.7805 entropy=17.6451 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 164260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402257.2 mean_steps=16.5
|
|
[Episode 164270] reward=-117698357.8 actor_loss=0.2517 critic_loss=84215479929.9048 entropy=17.6392 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 164280] reward=-121260150.4 actor_loss=0.2504 critic_loss=95037856321.6410 entropy=17.6231 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 164280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484650.1 mean_steps=13.7
|
|
[Episode 164290] reward=-113314392.2 actor_loss=0.3333 critic_loss=79935937467.7333 entropy=17.6188 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 164300] reward=-119134960.9 actor_loss=0.3591 critic_loss=87438926536.3478 entropy=17.6089 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 164300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566090.1 mean_steps=13.0
|
|
[Episode 164310] reward=-114853617.9 actor_loss=0.2243 critic_loss=80334487369.9556 entropy=17.6058 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 164320] reward=-121770924.0 actor_loss=0.2880 critic_loss=86958870660.1290 entropy=17.6136 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 164320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569680.7 mean_steps=13.6
|
|
[Episode 164330] reward=-120019769.3 actor_loss=0.2549 critic_loss=90572406374.4000 entropy=17.6226 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 164340] reward=-120298122.9 actor_loss=0.2938 critic_loss=86080697244.9032 entropy=17.6387 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 164340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531091.9 mean_steps=13.5
|
|
[Episode 164350] reward=-114830171.7 actor_loss=0.3072 critic_loss=85490224566.8571 entropy=17.6516 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 164360] reward=-115637680.2 actor_loss=0.3095 critic_loss=79467057034.9714 entropy=17.6584 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 164360] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-632326.2 mean_steps=11.1
|
|
[Episode 164370] reward=-120391300.4 actor_loss=0.4138 critic_loss=91758301821.1555 entropy=17.6675 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 164380] reward=-116149841.9 actor_loss=0.2531 critic_loss=84277494406.7368 entropy=17.6692 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 164380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553655.7 mean_steps=13.2
|
|
[Episode 164390] reward=-122895559.5 actor_loss=0.2599 critic_loss=101486602563.3684 entropy=17.6625 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 164400] reward=-114914371.2 actor_loss=0.2430 critic_loss=81943308354.0645 entropy=17.6603 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 164400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499014.5 mean_steps=15.1
|
|
[Episode 164410] reward=-116673426.4 actor_loss=0.3440 critic_loss=89194682075.4286 entropy=17.6602 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 164420] reward=-113875365.7 actor_loss=0.3617 critic_loss=88803201165.2414 entropy=17.6548 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 164420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482665.7 mean_steps=14.7
|
|
[Episode 164430] reward=-114989926.7 actor_loss=0.3109 critic_loss=87238512298.6667 entropy=17.6496 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 164440] reward=-119924068.4 actor_loss=0.2440 critic_loss=91426954936.3200 entropy=17.6434 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 164440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458409.7 mean_steps=14.8
|
|
[Episode 164450] reward=-116827341.6 actor_loss=0.2084 critic_loss=87750565143.2727 entropy=17.6400 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 164460] reward=-118543581.7 actor_loss=0.2142 critic_loss=87214553646.5455 entropy=17.6338 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 164460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458759.3 mean_steps=15.4
|
|
[Episode 164470] reward=-118570303.9 actor_loss=0.3184 critic_loss=88447323098.0741 entropy=17.6397 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 164480] reward=-119228765.1 actor_loss=0.2102 critic_loss=83980223256.7742 entropy=17.6443 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 164480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332510.9 mean_steps=16.6
|
|
[Episode 164490] reward=-117800256.7 actor_loss=0.2495 critic_loss=105362194000.8421 entropy=17.6435 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 164500] reward=-120009487.4 actor_loss=0.3069 critic_loss=88667821260.8000 entropy=17.6357 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 164500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540233.0 mean_steps=13.3
|
|
[Episode 164510] reward=-114417387.7 actor_loss=0.3855 critic_loss=81008858577.4545 entropy=17.6312 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 164520] reward=-123113677.7 actor_loss=0.2986 critic_loss=103233875804.1600 entropy=17.6306 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 164520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459614.2 mean_steps=14.3
|
|
[Episode 164530] reward=-115667787.3 actor_loss=0.3108 critic_loss=85483618304.0000 entropy=17.6371 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 164540] reward=-116485018.3 actor_loss=0.2706 critic_loss=87177510502.4000 entropy=17.6364 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 164540] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-677770.2 mean_steps=11.5
|
|
[Episode 164550] reward=-121449234.4 actor_loss=0.3869 critic_loss=87045503749.6889 entropy=17.6305 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 164560] reward=-116794398.2 actor_loss=0.3237 critic_loss=92462244295.1111 entropy=17.6340 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 164560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-404538.2 mean_steps=14.5
|
|
[Episode 164570] reward=-118309280.6 actor_loss=0.2968 critic_loss=85832180675.7647 entropy=17.6338 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 164580] reward=-121875439.1 actor_loss=0.3070 critic_loss=88071458816.0000 entropy=17.6313 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 164580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-702122.4 mean_steps=12.1
|
|
[Episode 164590] reward=-125724379.7 actor_loss=0.3400 critic_loss=95713992192.0000 entropy=17.6287 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 164600] reward=-121195144.9 actor_loss=0.2138 critic_loss=85981405928.7273 entropy=17.6297 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 164600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-456473.6 mean_steps=15.6
|
|
[Episode 164610] reward=-117545303.4 actor_loss=0.2583 critic_loss=78084458682.1818 entropy=17.6377 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 164620] reward=-115121026.5 actor_loss=0.2316 critic_loss=78885282398.8148 entropy=17.6358 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 164620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-528927.4 mean_steps=15.1
|
|
[Episode 164630] reward=-124275135.1 actor_loss=0.2728 critic_loss=90518561731.7647 entropy=17.6302 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 164640] reward=-114207097.6 actor_loss=0.2750 critic_loss=76697814841.8065 entropy=17.6267 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 164640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-343877.4 mean_steps=17.2
|
|
[Episode 164650] reward=-121088351.4 actor_loss=0.2462 critic_loss=85626897246.3158 entropy=17.6277 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 164660] reward=-119618415.0 actor_loss=0.3398 critic_loss=87011615539.2000 entropy=17.6287 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 164660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491308.1 mean_steps=14.2
|
|
[Episode 164670] reward=-119104810.2 actor_loss=0.1899 critic_loss=86413995043.3103 entropy=17.6249 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 164680] reward=-116821827.8 actor_loss=0.2685 critic_loss=81383237073.4545 entropy=17.6166 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 164680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535680.4 mean_steps=14.5
|
|
[Episode 164690] reward=-118375925.1 actor_loss=0.3636 critic_loss=85161944300.3077 entropy=17.6063 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 164700] reward=-118890408.8 actor_loss=0.3035 critic_loss=91549680360.7273 entropy=17.6095 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 164700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425963.7 mean_steps=15.4
|
|
[Episode 164710] reward=-118780027.1 actor_loss=0.3132 critic_loss=83525016576.0000 entropy=17.6194 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 164720] reward=-124258934.1 actor_loss=0.2832 critic_loss=181345010270.8148 entropy=17.6141 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 164720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459442.5 mean_steps=15.8
|
|
[Episode 164730] reward=-119192497.2 actor_loss=0.2736 critic_loss=83842594816.0000 entropy=17.6158 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 164740] reward=-118211900.7 actor_loss=0.2918 critic_loss=86520330397.5385 entropy=17.6154 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 164740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523698.9 mean_steps=13.3
|
|
[Episode 164750] reward=-115330573.9 actor_loss=0.2501 critic_loss=80052817408.0000 entropy=17.6151 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 164760] reward=-118104763.0 actor_loss=0.3189 critic_loss=88637641614.2222 entropy=17.6082 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 164760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613042.6 mean_steps=13.0
|
|
[Episode 164770] reward=-121888524.7 actor_loss=0.2408 critic_loss=89190722109.4400 entropy=17.6006 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 164780] reward=-110078748.1 actor_loss=0.3982 critic_loss=76424877312.0000 entropy=17.6040 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 164780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504447.7 mean_steps=14.7
|
|
[Episode 164790] reward=-118419732.9 actor_loss=0.2622 critic_loss=84905878449.2308 entropy=17.6096 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 164800] reward=-114617438.1 actor_loss=0.2963 critic_loss=80342132112.6956 entropy=17.5928 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 164800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441536.7 mean_steps=14.8
|
|
[Episode 164810] reward=-118929888.6 actor_loss=0.3111 critic_loss=83168257024.0000 entropy=17.5975 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 164820] reward=-124246695.4 actor_loss=0.2599 critic_loss=91044651417.6000 entropy=17.6089 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 164820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-374763.2 mean_steps=16.1
|
|
[Episode 164830] reward=-112788683.4 actor_loss=0.3865 critic_loss=79591242547.2000 entropy=17.6015 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 164840] reward=-119289495.8 actor_loss=0.3304 critic_loss=89836806144.0000 entropy=17.6019 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 164840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-419392.3 mean_steps=16.1
|
|
[Episode 164850] reward=-116322667.3 actor_loss=0.3198 critic_loss=83168608256.0000 entropy=17.6137 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 164860] reward=-120910154.8 actor_loss=0.2447 critic_loss=84109240427.7895 entropy=17.6058 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 164860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540224.1 mean_steps=13.6
|
|
[Episode 164870] reward=-121666763.6 actor_loss=0.3499 critic_loss=89578669943.4667 entropy=17.6120 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 164880] reward=-117628263.3 actor_loss=0.3561 critic_loss=121440371817.9310 entropy=17.6013 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 164880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562189.1 mean_steps=13.3
|
|
[Episode 164890] reward=-113369123.0 actor_loss=0.2672 critic_loss=76897245476.5714 entropy=17.5844 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 164900] reward=-118749114.2 actor_loss=0.2382 critic_loss=97788533820.2353 entropy=17.5954 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 164900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488367.6 mean_steps=13.8
|
|
[Episode 164910] reward=-115088017.7 actor_loss=0.2437 critic_loss=80069338512.6956 entropy=17.5867 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 164920] reward=-116367737.8 actor_loss=0.4035 critic_loss=106711453696.0000 entropy=17.5964 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 164920] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-763061.7 mean_steps=10.7
|
|
[Episode 164930] reward=-120592242.2 actor_loss=0.2694 critic_loss=131357924147.2000 entropy=17.5999 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 164940] reward=-121451431.9 actor_loss=0.2695 critic_loss=93219752430.3448 entropy=17.6019 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 164940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412126.8 mean_steps=16.2
|
|
[Episode 164950] reward=-120944339.9 actor_loss=0.2595 critic_loss=108836388507.8261 entropy=17.5964 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 164960] reward=-119254276.4 actor_loss=0.3529 critic_loss=88448685149.0909 entropy=17.5924 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 164960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467159.0 mean_steps=13.8
|
|
[Episode 164970] reward=-117293805.1 actor_loss=0.2886 critic_loss=82815232089.0435 entropy=17.5875 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 164980] reward=-113820942.5 actor_loss=0.2981 critic_loss=82169122178.8445 entropy=17.5815 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 164980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-617540.3 mean_steps=11.2
|
|
[Episode 164990] reward=-115384892.7 actor_loss=0.2193 critic_loss=81406316999.1111 entropy=17.5766 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 165000] reward=-121551535.8 actor_loss=0.3108 critic_loss=84889428377.6000 entropy=17.5770 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 165000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-523688.5 mean_steps=12.3
|
|
[Episode 165010] reward=-118616918.5 actor_loss=0.2921 critic_loss=82899948766.6087 entropy=17.5684 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 165020] reward=-120363930.0 actor_loss=0.2994 critic_loss=85535797463.5789 entropy=17.5767 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 165020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-601241.0 mean_steps=12.7
|
|
[Episode 165030] reward=-120228835.4 actor_loss=0.2950 critic_loss=83757106289.7778 entropy=17.5608 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 165040] reward=-119197588.5 actor_loss=0.3086 critic_loss=111745027276.8000 entropy=17.5555 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 165040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486681.4 mean_steps=14.1
|
|
[Episode 165050] reward=-117094591.8 actor_loss=0.2426 critic_loss=83452723746.1333 entropy=17.5621 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 165060] reward=-118426411.7 actor_loss=0.2325 critic_loss=84685240911.6444 entropy=17.5639 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 165060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-518449.5 mean_steps=12.0
|
|
[Episode 165070] reward=-124373604.5 actor_loss=0.2932 critic_loss=133478594423.4667 entropy=17.5622 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 165080] reward=-121645516.4 actor_loss=0.3064 critic_loss=86486578517.3333 entropy=17.5517 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 165080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474845.6 mean_steps=13.8
|
|
[Episode 165090] reward=-118029763.1 actor_loss=0.3701 critic_loss=81749047296.0000 entropy=17.5681 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 165100] reward=-119866813.5 actor_loss=0.1930 critic_loss=85665353636.9778 entropy=17.5722 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 165100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547291.0 mean_steps=13.5
|
|
[Episode 165110] reward=-117189862.0 actor_loss=0.2317 critic_loss=83142676831.0857 entropy=17.5737 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 165120] reward=-122472564.6 actor_loss=0.2718 critic_loss=89306600106.6667 entropy=17.5806 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 165120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410903.3 mean_steps=15.2
|
|
[Episode 165130] reward=-121121584.7 actor_loss=0.1906 critic_loss=87065962414.0800 entropy=17.5818 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 165140] reward=-113953959.8 actor_loss=0.3161 critic_loss=83012028188.4444 entropy=17.5802 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 165140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455945.2 mean_steps=14.4
|
|
[Episode 165150] reward=-114921536.4 actor_loss=0.3105 critic_loss=76862228616.5333 entropy=17.5810 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 165160] reward=-117766612.0 actor_loss=0.2276 critic_loss=88084837990.4000 entropy=17.5814 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 165160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531031.6 mean_steps=13.3
|
|
[Episode 165170] reward=-119294304.5 actor_loss=0.2055 critic_loss=87989359957.3333 entropy=17.5756 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 165180] reward=-121625458.6 actor_loss=0.2564 critic_loss=90482871091.2000 entropy=17.5664 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 165180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614062.2 mean_steps=11.8
|
|
[Episode 165190] reward=-110796402.2 actor_loss=0.4133 critic_loss=87443131491.0968 entropy=17.5681 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 165200] reward=-121556061.5 actor_loss=0.1941 critic_loss=116519476130.9091 entropy=17.5613 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 165200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-345306.6 mean_steps=16.9
|
|
[Episode 165210] reward=-113445314.2 actor_loss=0.2639 critic_loss=83239412456.7273 entropy=17.5634 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 165220] reward=-117570396.1 actor_loss=0.2838 critic_loss=86834377386.6667 entropy=17.5836 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 165220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-606563.8 mean_steps=12.0
|
|
[Episode 165230] reward=-118190460.4 actor_loss=0.2194 critic_loss=87443937052.4444 entropy=17.5828 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 165240] reward=-119738695.8 actor_loss=0.2913 critic_loss=85813948074.6667 entropy=17.5770 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 165240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453513.7 mean_steps=15.0
|
|
[Episode 165250] reward=-117566940.7 actor_loss=0.2929 critic_loss=84356517614.9333 entropy=17.5809 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 165260] reward=-119519696.5 actor_loss=0.2464 critic_loss=80343983072.9697 entropy=17.5720 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 165260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631530.0 mean_steps=11.9
|
|
[Episode 165270] reward=-119020401.1 actor_loss=0.3078 critic_loss=84768784091.4286 entropy=17.5840 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 165280] reward=-115491288.9 actor_loss=0.3075 critic_loss=82033329989.8182 entropy=17.5900 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 165280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588366.0 mean_steps=13.5
|
|
[Episode 165290] reward=-120765169.8 actor_loss=0.2939 critic_loss=86337710421.3333 entropy=17.5911 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 165300] reward=-119528964.1 actor_loss=0.3114 critic_loss=78820235264.0000 entropy=17.6008 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 165300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414186.1 mean_steps=15.4
|
|
[Episode 165310] reward=-114090004.1 actor_loss=0.3598 critic_loss=83686153849.9048 entropy=17.6043 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 165320] reward=-115771903.2 actor_loss=0.3200 critic_loss=85577077321.1429 entropy=17.6098 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 165320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470959.3 mean_steps=15.2
|
|
[Episode 165330] reward=-118648879.4 actor_loss=0.4742 critic_loss=87189397807.4074 entropy=17.6108 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1523 front_blocked=0
|
|
[Episode 165340] reward=-117220634.6 actor_loss=0.3319 critic_loss=85485844268.1379 entropy=17.6189 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 165340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518486.9 mean_steps=14.1
|
|
[Episode 165350] reward=-112979112.1 actor_loss=0.2221 critic_loss=80926904692.3636 entropy=17.6182 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 165360] reward=-118264620.1 actor_loss=0.3266 critic_loss=87352126208.0000 entropy=17.6204 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 165360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509038.9 mean_steps=14.1
|
|
[Episode 165370] reward=-118534449.8 actor_loss=0.3449 critic_loss=86083496448.0000 entropy=17.6249 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 165380] reward=-119757250.2 actor_loss=0.3897 critic_loss=86739219797.3333 entropy=17.6237 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 165380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-565048.9 mean_steps=12.5
|
|
[Episode 165390] reward=-118476606.1 actor_loss=0.2718 critic_loss=80563491971.2821 entropy=17.6276 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 165400] reward=-117648250.9 actor_loss=0.2816 critic_loss=82663674502.7368 entropy=17.6335 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 165400] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-354383.6 mean_steps=16.9
|
|
[Episode 165410] reward=-123854603.5 actor_loss=0.3335 critic_loss=92011839703.5789 entropy=17.6243 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 165420] reward=-118969445.2 actor_loss=0.4559 critic_loss=85066325196.8000 entropy=17.6199 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 165420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-331688.8 mean_steps=16.9
|
|
[Episode 165430] reward=-117232083.5 actor_loss=0.3393 critic_loss=89457124498.2857 entropy=17.6230 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 165440] reward=-118292607.7 actor_loss=0.2835 critic_loss=84564781056.0000 entropy=17.6186 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 165440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447132.5 mean_steps=14.4
|
|
[Episode 165450] reward=-119778070.4 actor_loss=0.3737 critic_loss=86975691889.7778 entropy=17.6247 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 165460] reward=-115878328.0 actor_loss=0.3722 critic_loss=82931186654.9677 entropy=17.6186 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 165460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-508717.5 mean_steps=15.7
|
|
[Episode 165470] reward=-112490267.0 actor_loss=0.1756 critic_loss=78413133931.7895 entropy=17.6109 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 165480] reward=-119490549.4 actor_loss=0.2386 critic_loss=86900523364.1739 entropy=17.6085 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 165480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578622.8 mean_steps=13.7
|
|
[Episode 165490] reward=-115730546.6 actor_loss=0.2308 critic_loss=81872806203.0769 entropy=17.6024 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 165500] reward=-121956470.9 actor_loss=0.2576 critic_loss=87365394075.8261 entropy=17.5985 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 165500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-408284.7 mean_steps=15.8
|
|
[Episode 165510] reward=-117540527.7 actor_loss=0.3246 critic_loss=83435039012.5714 entropy=17.5949 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 165520] reward=-114850079.2 actor_loss=0.3668 critic_loss=83910607112.2581 entropy=17.6000 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 165520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-604866.8 mean_steps=13.8
|
|
[Episode 165530] reward=-119649930.0 actor_loss=0.2840 critic_loss=82870685440.0000 entropy=17.5966 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 165540] reward=-118724773.8 actor_loss=0.2981 critic_loss=91054420943.2381 entropy=17.5838 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 165540] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-697845.1 mean_steps=11.4
|
|
[Episode 165550] reward=-118525197.1 actor_loss=0.2895 critic_loss=84041496364.1379 entropy=17.5888 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 165560] reward=-125024434.6 actor_loss=0.2942 critic_loss=97521756947.6923 entropy=17.5801 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 165560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491676.3 mean_steps=15.2
|
|
[Episode 165570] reward=-119374421.7 actor_loss=0.2674 critic_loss=84575974546.2857 entropy=17.5825 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 165580] reward=-116864367.3 actor_loss=0.2979 critic_loss=81534225563.1515 entropy=17.5610 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 165580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420813.5 mean_steps=15.7
|
|
[Episode 165590] reward=-119031956.3 actor_loss=0.2971 critic_loss=82016327179.3778 entropy=17.5550 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 165600] reward=-113570239.1 actor_loss=0.3490 critic_loss=79408174057.2444 entropy=17.5690 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 165600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592479.6 mean_steps=13.8
|
|
[Episode 165610] reward=-123812978.9 actor_loss=0.2166 critic_loss=89400181480.7273 entropy=17.5593 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 165620] reward=-121652584.3 actor_loss=0.2470 critic_loss=85588398740.6452 entropy=17.5558 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 165620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565849.0 mean_steps=13.8
|
|
[Episode 165630] reward=-117308363.7 actor_loss=0.2307 critic_loss=87095279972.1739 entropy=17.5541 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 165640] reward=-120236072.9 actor_loss=0.2001 critic_loss=87443915264.0000 entropy=17.5546 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 165640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-527895.7 mean_steps=15.2
|
|
[Episode 165650] reward=-117288336.3 actor_loss=0.2889 critic_loss=85207932108.8000 entropy=17.5676 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 165660] reward=-115407728.4 actor_loss=0.2866 critic_loss=89035400260.2667 entropy=17.5558 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 165660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-403211.7 mean_steps=14.3
|
|
[Episode 165670] reward=-121249219.3 actor_loss=0.3297 critic_loss=87717955128.8889 entropy=17.5572 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 165680] reward=-119735413.2 actor_loss=0.4299 critic_loss=90489034849.5238 entropy=17.5530 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 165680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495645.0 mean_steps=13.9
|
|
[Episode 165690] reward=-122274815.7 actor_loss=0.3554 critic_loss=93165082851.5556 entropy=17.5468 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 165700] reward=-118100534.5 actor_loss=0.2148 critic_loss=87740068986.8800 entropy=17.5462 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 165700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514939.0 mean_steps=13.9
|
|
[Episode 165710] reward=-118574043.2 actor_loss=0.3880 critic_loss=83497387429.6471 entropy=17.5675 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 165720] reward=-124877628.2 actor_loss=0.3427 critic_loss=444045074978.1334 entropy=17.5774 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 165720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483477.5 mean_steps=14.8
|
|
[Episode 165730] reward=-118984195.7 actor_loss=0.2824 critic_loss=81471226636.1905 entropy=17.5930 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 165740] reward=-117908000.9 actor_loss=0.3123 critic_loss=81708772693.3333 entropy=17.5978 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 165740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419498.8 mean_steps=15.4
|
|
[Episode 165750] reward=-116869403.0 actor_loss=0.2840 critic_loss=79812796962.1333 entropy=17.6076 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 165760] reward=-118045127.8 actor_loss=0.3565 critic_loss=102812447744.0000 entropy=17.5997 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 165760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620853.3 mean_steps=12.1
|
|
[Episode 165770] reward=-122545969.7 actor_loss=0.2521 critic_loss=89545249972.7059 entropy=17.5924 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 165780] reward=-118757425.3 actor_loss=0.1926 critic_loss=84771025169.0667 entropy=17.5942 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 165780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622866.9 mean_steps=12.0
|
|
[Episode 165790] reward=-123632160.7 actor_loss=0.2321 critic_loss=94233746227.2000 entropy=17.5854 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 165800] reward=-120403213.5 actor_loss=0.2736 critic_loss=85180521633.6842 entropy=17.5786 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 165800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449946.8 mean_steps=15.1
|
|
[Episode 165810] reward=-123407907.4 actor_loss=0.2428 critic_loss=95130529069.1765 entropy=17.5844 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 165820] reward=-123859063.4 actor_loss=0.4837 critic_loss=92046994545.7778 entropy=17.5841 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 165820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564798.6 mean_steps=13.2
|
|
[Episode 165830] reward=-118257534.0 actor_loss=0.2351 critic_loss=85707363917.5758 entropy=17.5871 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 165840] reward=-119710551.9 actor_loss=0.3199 critic_loss=85122017641.4118 entropy=17.5842 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 165840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503033.1 mean_steps=15.2
|
|
[Episode 165850] reward=-121063273.6 actor_loss=0.3451 critic_loss=89160432753.7778 entropy=17.5820 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 165860] reward=-124011668.9 actor_loss=0.2662 critic_loss=91729428662.0444 entropy=17.5872 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 165860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436838.7 mean_steps=14.6
|
|
[Episode 165870] reward=-122467899.6 actor_loss=0.3045 critic_loss=89522493807.5897 entropy=17.5881 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 165880] reward=-120067136.5 actor_loss=0.3116 critic_loss=85090591712.9697 entropy=17.5866 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 165880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546564.5 mean_steps=14.3
|
|
[Episode 165890] reward=-120218900.0 actor_loss=0.3487 critic_loss=89678680215.7037 entropy=17.5812 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 165900] reward=-123226298.9 actor_loss=0.2871 critic_loss=111788995220.6452 entropy=17.5768 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 165900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-556927.1 mean_steps=12.3
|
|
[Episode 165910] reward=-120489858.5 actor_loss=0.3422 critic_loss=88586833161.4815 entropy=17.5783 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 165920] reward=-114359852.8 actor_loss=0.3282 critic_loss=78953649675.3778 entropy=17.5774 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 165920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610111.2 mean_steps=13.2
|
|
[Episode 165930] reward=-121245353.5 actor_loss=0.3702 critic_loss=86106159224.4706 entropy=17.5768 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 165940] reward=-121583107.3 actor_loss=0.1766 critic_loss=87279748796.6316 entropy=17.5627 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 165940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-530032.9 mean_steps=15.3
|
|
[Episode 165950] reward=-123867307.1 actor_loss=0.2744 critic_loss=94283750968.8889 entropy=17.5719 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 165960] reward=-122481786.9 actor_loss=0.2607 critic_loss=86799893162.6667 entropy=17.5752 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 165960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-492413.7 mean_steps=15.8
|
|
[Episode 165970] reward=-118455642.2 actor_loss=0.3533 critic_loss=82755229484.1379 entropy=17.5654 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 165980] reward=-118710273.7 actor_loss=0.2135 critic_loss=86901631906.9091 entropy=17.5641 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 165980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-689489.3 mean_steps=11.3
|
|
[Episode 165990] reward=-113211199.4 actor_loss=0.4260 critic_loss=82309044633.6000 entropy=17.5574 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 166000] reward=-122507929.7 actor_loss=0.3478 critic_loss=86693800906.1053 entropy=17.5605 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 166000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611669.7 mean_steps=12.8
|
|
[Episode 166010] reward=-120934132.1 actor_loss=0.3470 critic_loss=95590937320.7273 entropy=17.5568 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 166020] reward=-122571101.8 actor_loss=0.3309 critic_loss=92788144947.2000 entropy=17.5538 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 166020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616581.8 mean_steps=12.6
|
|
[Episode 166030] reward=-125150183.9 actor_loss=0.2659 critic_loss=89688323413.3333 entropy=17.5573 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 166040] reward=-119702535.5 actor_loss=0.2886 critic_loss=88965638920.8276 entropy=17.5491 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 166040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539806.9 mean_steps=14.2
|
|
[Episode 166050] reward=-115920723.9 actor_loss=0.3138 critic_loss=84181745033.8462 entropy=17.5563 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 166060] reward=-113908529.4 actor_loss=0.2702 critic_loss=81862169500.9032 entropy=17.5536 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 166060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-416067.0 mean_steps=17.0
|
|
[Episode 166070] reward=-118119333.0 actor_loss=0.2957 critic_loss=86788468345.9048 entropy=17.5664 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 166080] reward=-122139302.2 actor_loss=0.2094 critic_loss=86457182110.4762 entropy=17.5708 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 166080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412721.3 mean_steps=15.9
|
|
[Episode 166090] reward=-122530157.9 actor_loss=0.2160 critic_loss=85890052369.0667 entropy=17.5507 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 166100] reward=-114653649.4 actor_loss=0.2871 critic_loss=83575095296.0000 entropy=17.5562 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 166100] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-691447.6 mean_steps=11.7
|
|
[Episode 166110] reward=-116609660.4 actor_loss=0.2866 critic_loss=91583342650.5143 entropy=17.5600 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 166120] reward=-120168235.1 actor_loss=0.2557 critic_loss=95209600404.8372 entropy=17.5585 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 166120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-543048.3 mean_steps=14.8
|
|
[Episode 166130] reward=-113358404.4 actor_loss=0.3827 critic_loss=78911915429.6471 entropy=17.5578 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 166140] reward=-115437030.2 actor_loss=0.3788 critic_loss=86204520704.0000 entropy=17.5528 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 166140] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620775.3 mean_steps=11.9
|
|
[Episode 166150] reward=-119901745.2 actor_loss=0.2856 critic_loss=84350838891.7895 entropy=17.5402 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 166160] reward=-122182350.2 actor_loss=0.3215 critic_loss=87858975948.8000 entropy=17.5346 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 166160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417421.6 mean_steps=15.3
|
|
[Episode 166170] reward=-122454409.1 actor_loss=0.3257 critic_loss=89366225871.2381 entropy=17.5386 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 166180] reward=-119336988.5 actor_loss=0.3097 critic_loss=82783856640.0000 entropy=17.5294 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 166180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506934.2 mean_steps=12.9
|
|
[Episode 166190] reward=-117131319.9 actor_loss=0.3528 critic_loss=84925040558.0800 entropy=17.5299 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 166200] reward=-119047457.9 actor_loss=0.2745 critic_loss=85533828983.4667 entropy=17.5281 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 166200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434016.6 mean_steps=14.8
|
|
[Episode 166210] reward=-119014935.0 actor_loss=0.2968 critic_loss=85845707707.7333 entropy=17.5276 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 166220] reward=-117902312.3 actor_loss=0.1754 critic_loss=81743467202.2069 entropy=17.5194 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 166220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557397.9 mean_steps=12.6
|
|
[Episode 166230] reward=-117485870.3 actor_loss=0.1937 critic_loss=86546846230.2609 entropy=17.5298 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 166240] reward=-110728032.0 actor_loss=0.1661 critic_loss=80637209440.7111 entropy=17.5306 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 166240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642593.1 mean_steps=12.7
|
|
[Episode 166250] reward=-117447263.7 actor_loss=0.2626 critic_loss=84268010496.0000 entropy=17.5216 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 166260] reward=-120780841.7 actor_loss=0.2297 critic_loss=85360401152.0000 entropy=17.5184 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 166260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-590945.6 mean_steps=11.7
|
|
[Episode 166270] reward=-114649704.4 actor_loss=0.2713 critic_loss=79720754254.7692 entropy=17.5121 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 166280] reward=-122798314.7 actor_loss=0.1649 critic_loss=84875845150.1176 entropy=17.5021 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 166280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-551937.4 mean_steps=14.3
|
|
[Episode 166290] reward=-125019106.2 actor_loss=0.1481 critic_loss=90318475264.0000 entropy=17.4904 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 166300] reward=-120988330.0 actor_loss=0.2817 critic_loss=88095288282.0741 entropy=17.4905 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 166300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479223.3 mean_steps=14.3
|
|
[Episode 166310] reward=-120203044.8 actor_loss=0.3427 critic_loss=91571073024.0000 entropy=17.4879 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 166320] reward=-119724495.9 actor_loss=0.2572 critic_loss=90779234125.9130 entropy=17.4917 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 166320] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-688508.1 mean_steps=10.7
|
|
[Episode 166330] reward=-120929142.8 actor_loss=0.3369 critic_loss=85903666517.3333 entropy=17.4994 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 166340] reward=-122553118.5 actor_loss=0.3067 critic_loss=92062022255.3044 entropy=17.4856 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 166340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-352944.5 mean_steps=16.7
|
|
[Episode 166350] reward=-113373976.5 actor_loss=0.2622 critic_loss=76708446970.0465 entropy=17.4920 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 166360] reward=-114990756.4 actor_loss=0.4123 critic_loss=86840090624.0000 entropy=17.4938 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 166360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614673.3 mean_steps=12.5
|
|
[Episode 166370] reward=-120040331.2 actor_loss=0.2336 critic_loss=85426741794.1333 entropy=17.4889 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 166380] reward=-121646391.9 actor_loss=0.1966 critic_loss=86614842481.7778 entropy=17.4894 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 166380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442326.3 mean_steps=15.3
|
|
[Episode 166390] reward=-117412920.4 actor_loss=0.2051 critic_loss=89523340044.1905 entropy=17.5073 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 166400] reward=-118619557.4 actor_loss=0.2107 critic_loss=88604929915.8710 entropy=17.5073 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 166400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-348527.7 mean_steps=15.7
|
|
[Episode 166410] reward=-121274111.5 actor_loss=0.1966 critic_loss=88285524208.9412 entropy=17.5025 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 166420] reward=-118675255.7 actor_loss=0.3407 critic_loss=85548213248.0000 entropy=17.5024 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 166420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524093.8 mean_steps=13.8
|
|
[Episode 166430] reward=-115974124.3 actor_loss=0.2967 critic_loss=88934837686.8571 entropy=17.5009 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 166440] reward=-120766624.0 actor_loss=0.3709 critic_loss=86437867324.9524 entropy=17.4989 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 166440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642470.0 mean_steps=11.8
|
|
[Episode 166450] reward=-120297596.4 actor_loss=0.2999 critic_loss=85969388612.2667 entropy=17.4988 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 166460] reward=-117779051.8 actor_loss=0.2644 critic_loss=83814095932.2353 entropy=17.5001 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 166460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465204.0 mean_steps=15.5
|
|
[Episode 166470] reward=-119313709.0 actor_loss=0.3267 critic_loss=84063090501.8182 entropy=17.4979 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 166480] reward=-116736688.3 actor_loss=0.3240 critic_loss=83034600613.1613 entropy=17.5005 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 166480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482314.3 mean_steps=13.9
|
|
[Episode 166490] reward=-118090569.1 actor_loss=0.3298 critic_loss=86446869974.4865 entropy=17.4983 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 166500] reward=-115807234.1 actor_loss=0.2809 critic_loss=88771968592.8421 entropy=17.4998 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 166500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-355982.3 mean_steps=14.9
|
|
[Episode 166510] reward=-113037681.3 actor_loss=0.3027 critic_loss=81363159598.5455 entropy=17.5055 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 166520] reward=-122389109.0 actor_loss=0.2551 critic_loss=87107228182.2609 entropy=17.4925 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 166520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434463.6 mean_steps=14.5
|
|
[Episode 166530] reward=-121470147.8 actor_loss=0.2604 critic_loss=88208707128.8889 entropy=17.4879 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 166540] reward=-121840692.1 actor_loss=0.3010 critic_loss=89112758044.4444 entropy=17.4887 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 166540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-657879.1 mean_steps=12.5
|
|
[Episode 166550] reward=-118659439.2 actor_loss=0.2737 critic_loss=86133908973.0370 entropy=17.4928 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 166560] reward=-119754063.0 actor_loss=0.3691 critic_loss=212888703658.6667 entropy=17.5050 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 166560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488207.4 mean_steps=14.6
|
|
[Episode 166570] reward=-114116045.4 actor_loss=0.3733 critic_loss=89558233088.0000 entropy=17.5044 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 166580] reward=-120150072.7 actor_loss=0.2753 critic_loss=84843832661.3333 entropy=17.4991 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 166580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607909.5 mean_steps=12.8
|
|
[Episode 166590] reward=-114486645.9 actor_loss=0.3774 critic_loss=108981068458.6667 entropy=17.4974 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 166600] reward=-119669746.1 actor_loss=0.1924 critic_loss=91105127502.7692 entropy=17.4961 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 166600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503196.9 mean_steps=13.8
|
|
[Episode 166610] reward=-118339562.5 actor_loss=0.3700 critic_loss=84422058393.6000 entropy=17.4934 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 166620] reward=-116208058.4 actor_loss=0.2678 critic_loss=80224738270.9677 entropy=17.4958 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 166620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436874.8 mean_steps=14.1
|
|
[Episode 166630] reward=-114418576.0 actor_loss=0.3997 critic_loss=87631090868.7059 entropy=17.4881 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 166640] reward=-118380612.0 actor_loss=0.3174 critic_loss=88788592412.4444 entropy=17.4894 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 166640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447738.3 mean_steps=14.3
|
|
[Episode 166650] reward=-119862859.3 actor_loss=0.3002 critic_loss=87379451539.9111 entropy=17.5087 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 166660] reward=-117266157.2 actor_loss=0.2813 critic_loss=81412483413.3333 entropy=17.5088 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 166660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595180.2 mean_steps=12.5
|
|
[Episode 166670] reward=-117718793.3 actor_loss=0.1771 critic_loss=93186530364.2353 entropy=17.5108 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 166680] reward=-118934995.3 actor_loss=0.2741 critic_loss=111034212111.0588 entropy=17.5224 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 166680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541107.7 mean_steps=14.1
|
|
[Episode 166690] reward=-122315323.9 actor_loss=0.3603 critic_loss=125331902248.4211 entropy=17.5281 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 166700] reward=-122850974.1 actor_loss=0.3390 critic_loss=109152643754.6667 entropy=17.5197 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 166700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478322.0 mean_steps=15.2
|
|
[Episode 166710] reward=-115342758.7 actor_loss=0.2431 critic_loss=81750845781.3333 entropy=17.5200 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 166720] reward=-122980580.4 actor_loss=0.1745 critic_loss=126997835776.0000 entropy=17.5103 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 166720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588636.8 mean_steps=12.4
|
|
[Episode 166730] reward=-120852522.2 actor_loss=0.3473 critic_loss=84159422464.0000 entropy=17.5241 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 166740] reward=-119921803.7 actor_loss=0.2987 critic_loss=96095774856.5333 entropy=17.5293 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 166740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531340.4 mean_steps=13.9
|
|
[Episode 166750] reward=-120842980.4 actor_loss=0.2542 critic_loss=142546469156.5714 entropy=17.5319 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 166760] reward=-121524752.8 actor_loss=0.2908 critic_loss=137592879689.1429 entropy=17.5246 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 166760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526043.5 mean_steps=13.8
|
|
[Episode 166770] reward=-118099909.2 actor_loss=0.3171 critic_loss=126567118585.4359 entropy=17.5288 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 166780] reward=-115972680.4 actor_loss=0.3395 critic_loss=90610438850.2069 entropy=17.5215 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 166780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478932.8 mean_steps=14.7
|
|
[Episode 166790] reward=-120592767.8 actor_loss=0.2621 critic_loss=85942494031.4483 entropy=17.5252 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 166800] reward=-121802745.5 actor_loss=0.2884 critic_loss=90209501184.0000 entropy=17.5232 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 166800] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-630672.9 mean_steps=11.2
|
|
[Episode 166810] reward=-117440860.8 actor_loss=0.4014 critic_loss=86520805785.6000 entropy=17.5290 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 166820] reward=-115525861.9 actor_loss=0.3495 critic_loss=80506554014.8965 entropy=17.5263 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 166820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425562.6 mean_steps=14.5
|
|
[Episode 166830] reward=-118118593.1 actor_loss=0.2753 critic_loss=95322543250.2857 entropy=17.5182 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 166840] reward=-118921988.9 actor_loss=0.3143 critic_loss=84280977066.6667 entropy=17.5071 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 166840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493020.3 mean_steps=14.9
|
|
[Episode 166850] reward=-118508145.3 actor_loss=0.3719 critic_loss=85116311324.4444 entropy=17.5196 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 166860] reward=-115898077.7 actor_loss=0.3038 critic_loss=82532354542.3448 entropy=17.5243 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 166860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-628544.3 mean_steps=12.1
|
|
[Episode 166870] reward=-123056320.4 actor_loss=0.2917 critic_loss=88361509608.7273 entropy=17.5266 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 166880] reward=-122578666.4 actor_loss=0.2660 critic_loss=88139831057.8605 entropy=17.5213 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 166880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486870.8 mean_steps=13.6
|
|
[Episode 166890] reward=-121024333.7 actor_loss=0.2206 critic_loss=105647673753.6000 entropy=17.5137 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 166900] reward=-123303194.4 actor_loss=0.2822 critic_loss=91449367483.7333 entropy=17.5119 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 166900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-645647.1 mean_steps=13.7
|
|
[Episode 166910] reward=-126151152.6 actor_loss=0.2740 critic_loss=121929688820.8696 entropy=17.5107 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 166920] reward=-123992133.4 actor_loss=0.2774 critic_loss=122872888320.0000 entropy=17.5110 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 166920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459649.5 mean_steps=15.7
|
|
[Episode 166930] reward=-119362095.0 actor_loss=0.1840 critic_loss=85241706233.4359 entropy=17.5104 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 166940] reward=-114709471.7 actor_loss=0.3427 critic_loss=82843961273.3793 entropy=17.5032 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 166940] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-363949.9 mean_steps=17.9
|
|
[Episode 166950] reward=-114479986.7 actor_loss=0.2463 critic_loss=77731791433.1429 entropy=17.5076 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 166960] reward=-115751063.1 actor_loss=0.2918 critic_loss=81488144611.5556 entropy=17.5094 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 166960] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-330209.0 mean_steps=17.1
|
|
[Episode 166970] reward=-123394565.7 actor_loss=0.2790 critic_loss=88370506020.5714 entropy=17.5055 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 166980] reward=-119220616.1 actor_loss=0.2481 critic_loss=83190730752.0000 entropy=17.5005 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 166980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-558519.5 mean_steps=12.5
|
|
[Episode 166990] reward=-120247583.7 actor_loss=0.3573 critic_loss=84877197312.0000 entropy=17.4993 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 167000] reward=-120877739.5 actor_loss=0.2504 critic_loss=81175740112.5926 entropy=17.5015 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 167000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567399.7 mean_steps=13.7
|
|
[Episode 167010] reward=-116045346.4 actor_loss=0.3423 critic_loss=81969068962.9091 entropy=17.5001 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 167020] reward=-117932288.8 actor_loss=0.2874 critic_loss=81778182485.3333 entropy=17.4991 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 167020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454472.4 mean_steps=15.7
|
|
[Episode 167030] reward=-116919413.7 actor_loss=0.3630 critic_loss=107789979062.8571 entropy=17.4892 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 167040] reward=-118539592.0 actor_loss=0.2307 critic_loss=82074080870.4000 entropy=17.4897 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 167040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545528.5 mean_steps=13.6
|
|
[Episode 167050] reward=-123467308.9 actor_loss=0.2543 critic_loss=193376742058.6667 entropy=17.4812 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 167060] reward=-121285889.4 actor_loss=0.3092 critic_loss=86643019403.6364 entropy=17.4834 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 167060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454431.1 mean_steps=14.7
|
|
[Episode 167070] reward=-113899148.9 actor_loss=0.3361 critic_loss=77975251101.5385 entropy=17.4870 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 167080] reward=-122054358.1 actor_loss=0.3717 critic_loss=163994362973.0909 entropy=17.4885 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 167080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559170.4 mean_steps=13.6
|
|
[Episode 167090] reward=-117399556.2 actor_loss=0.3656 critic_loss=82450292736.0000 entropy=17.4888 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 167100] reward=-122636011.4 actor_loss=0.2883 critic_loss=88093730133.3333 entropy=17.4819 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 167100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535006.0 mean_steps=13.2
|
|
[Episode 167110] reward=-121787654.9 actor_loss=0.2064 critic_loss=86738995561.4118 entropy=17.4833 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 167120] reward=-124856869.9 actor_loss=0.2379 critic_loss=88890031672.8889 entropy=17.4804 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 167120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526957.5 mean_steps=13.9
|
|
[Episode 167130] reward=-118920166.0 actor_loss=0.2793 critic_loss=79633436823.7037 entropy=17.4834 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 167140] reward=-114149846.8 actor_loss=0.4108 critic_loss=82001886139.7333 entropy=17.4761 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 167140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557195.6 mean_steps=12.4
|
|
[Episode 167150] reward=-118040872.8 actor_loss=0.2607 critic_loss=83841403640.6857 entropy=17.4748 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 167160] reward=-120853281.4 actor_loss=0.3086 critic_loss=85829031981.5111 entropy=17.4781 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 167160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525992.2 mean_steps=14.3
|
|
[Episode 167170] reward=-120653265.0 actor_loss=0.2382 critic_loss=87449347218.2857 entropy=17.4548 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 167180] reward=-120957232.3 actor_loss=0.2253 critic_loss=84681452397.7143 entropy=17.4487 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 167180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519613.3 mean_steps=14.1
|
|
[Episode 167190] reward=-120190763.0 actor_loss=0.2367 critic_loss=86177542963.2000 entropy=17.4445 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 167200] reward=-118119797.9 actor_loss=0.2970 critic_loss=84439121616.5926 entropy=17.4490 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 167200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-576431.0 mean_steps=10.8
|
|
[Episode 167210] reward=-116977609.5 actor_loss=0.2836 critic_loss=82754563584.0000 entropy=17.4578 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 167220] reward=-121693708.1 actor_loss=0.2427 critic_loss=87932688091.4286 entropy=17.4618 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 167220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510120.3 mean_steps=15.1
|
|
[Episode 167230] reward=-117879762.3 actor_loss=0.2557 critic_loss=80808959696.5926 entropy=17.4611 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 167240] reward=-119188072.6 actor_loss=0.3858 critic_loss=86959178590.3158 entropy=17.4706 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 167240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635104.8 mean_steps=12.6
|
|
[Episode 167250] reward=-118566182.6 actor_loss=0.3432 critic_loss=85839737651.2000 entropy=17.4663 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 167260] reward=-126058940.6 actor_loss=0.3209 critic_loss=88311176098.9091 entropy=17.4687 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 167260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-579861.5 mean_steps=14.6
|
|
[Episode 167270] reward=-122510303.8 actor_loss=0.2777 critic_loss=87126791242.9268 entropy=17.4743 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 167280] reward=-121704765.1 actor_loss=0.1747 critic_loss=83221336701.1555 entropy=17.4734 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 167280] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-217654.2 mean_steps=17.6
|
|
[Episode 167290] reward=-116991849.4 actor_loss=0.3361 critic_loss=82032110062.3448 entropy=17.4838 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 167300] reward=-120194458.2 actor_loss=0.3086 critic_loss=149232182385.7778 entropy=17.4804 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 167300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561961.7 mean_steps=14.2
|
|
[Episode 167310] reward=-121398502.6 actor_loss=0.2332 critic_loss=88482983708.4444 entropy=17.4857 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 167320] reward=-116897221.3 actor_loss=0.2723 critic_loss=81252527636.4800 entropy=17.4978 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 167320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467619.5 mean_steps=14.2
|
|
[Episode 167330] reward=-121042616.9 actor_loss=0.2561 critic_loss=83968732774.4000 entropy=17.4901 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 167340] reward=-117650876.1 actor_loss=0.3206 critic_loss=81772225716.7059 entropy=17.4871 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 167340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-398646.1 mean_steps=16.9
|
|
[Episode 167350] reward=-116142606.6 actor_loss=0.2550 critic_loss=88613152768.0000 entropy=17.4992 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 167360] reward=-121502706.5 actor_loss=0.3301 critic_loss=105696839787.7895 entropy=17.5059 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 167360] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-350572.5 mean_steps=17.6
|
|
[Episode 167370] reward=-118898572.5 actor_loss=0.3234 critic_loss=82704766976.0000 entropy=17.5132 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 167380] reward=-120639481.2 actor_loss=0.2269 critic_loss=90095425399.4667 entropy=17.5192 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 167380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-455733.6 mean_steps=16.6
|
|
[Episode 167390] reward=-119111482.6 actor_loss=0.2110 critic_loss=90942541238.8571 entropy=17.5196 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 167400] reward=-121263683.5 actor_loss=0.2655 critic_loss=87195609661.4400 entropy=17.5151 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 167400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-358270.6 mean_steps=15.8
|
|
[Episode 167410] reward=-120296904.0 actor_loss=0.2447 critic_loss=85622464768.0000 entropy=17.5085 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 167420] reward=-124397801.6 actor_loss=0.2474 critic_loss=90129052876.8000 entropy=17.5054 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 167420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623524.8 mean_steps=13.3
|
|
[Episode 167430] reward=-115518294.3 actor_loss=0.3171 critic_loss=83334579450.3111 entropy=17.5039 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 167440] reward=-120579078.6 actor_loss=0.3485 critic_loss=87631776730.0741 entropy=17.5026 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 167440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452564.0 mean_steps=14.4
|
|
[Episode 167450] reward=-117292210.6 actor_loss=0.3047 critic_loss=85576947029.3333 entropy=17.4924 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 167460] reward=-122678508.9 actor_loss=0.2667 critic_loss=92587507280.8421 entropy=17.4927 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 167460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532046.8 mean_steps=13.3
|
|
[Episode 167470] reward=-116511697.0 actor_loss=0.2725 critic_loss=80210709029.4634 entropy=17.4977 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 167480] reward=-117945800.9 actor_loss=0.2790 critic_loss=81116949162.6667 entropy=17.5048 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 167480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-645026.3 mean_steps=12.9
|
|
[Episode 167490] reward=-118361587.8 actor_loss=0.3262 critic_loss=81486828465.2308 entropy=17.5004 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 167500] reward=-121650793.8 actor_loss=0.3264 critic_loss=86802537813.3333 entropy=17.4930 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 167500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435408.3 mean_steps=15.4
|
|
[Episode 167510] reward=-118278369.8 actor_loss=0.2980 critic_loss=103469332550.6207 entropy=17.4898 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 167520] reward=-117856711.0 actor_loss=0.2501 critic_loss=85760689038.2222 entropy=17.4801 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 167520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-574607.1 mean_steps=14.6
|
|
[Episode 167530] reward=-112827645.2 actor_loss=0.3863 critic_loss=86739206826.6667 entropy=17.4868 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 167540] reward=-121270269.8 actor_loss=0.2136 critic_loss=88038904263.1111 entropy=17.4906 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 167540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-602292.8 mean_steps=11.9
|
|
[Episode 167550] reward=-107702064.7 actor_loss=0.3435 critic_loss=87223176681.7391 entropy=17.4914 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 167560] reward=-119346018.4 actor_loss=0.2417 critic_loss=91091746360.8889 entropy=17.4944 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 167560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527831.4 mean_steps=14.2
|
|
[Episode 167570] reward=-115205108.6 actor_loss=0.2915 critic_loss=84013226054.6207 entropy=17.4934 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 167580] reward=-117907223.5 actor_loss=0.2836 critic_loss=94592408429.7143 entropy=17.4862 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 167580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527301.0 mean_steps=13.9
|
|
[Episode 167590] reward=-119282144.7 actor_loss=0.2796 critic_loss=90559523840.0000 entropy=17.4808 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 167600] reward=-112843252.4 actor_loss=0.3984 critic_loss=78752345292.8000 entropy=17.4737 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 167600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503656.2 mean_steps=14.4
|
|
[Episode 167610] reward=-112713260.8 actor_loss=0.4074 critic_loss=112788931015.1111 entropy=17.4719 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 167620] reward=-119387907.0 actor_loss=0.3022 critic_loss=121789801078.1538 entropy=17.4539 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 167620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465279.0 mean_steps=13.8
|
|
[Episode 167630] reward=-122087750.3 actor_loss=0.3705 critic_loss=88765738598.4000 entropy=17.4575 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 167640] reward=-113118517.3 actor_loss=0.2787 critic_loss=76381496606.7200 entropy=17.4569 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 167640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472116.6 mean_steps=14.0
|
|
[Episode 167650] reward=-112845736.5 actor_loss=0.2813 critic_loss=79440136005.8182 entropy=17.4447 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 167660] reward=-118673945.0 actor_loss=0.3777 critic_loss=85859722854.4000 entropy=17.4370 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 167660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475405.2 mean_steps=14.5
|
|
[Episode 167670] reward=-116778434.2 actor_loss=0.3165 critic_loss=88428951340.1379 entropy=17.4305 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 167680] reward=-114346249.4 actor_loss=0.2699 critic_loss=82810168700.3428 entropy=17.4319 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 167680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443673.5 mean_steps=15.6
|
|
[Episode 167690] reward=-118546444.9 actor_loss=0.2550 critic_loss=82472598689.6842 entropy=17.4255 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 167700] reward=-117519144.3 actor_loss=0.2373 critic_loss=80087807317.3333 entropy=17.4248 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 167700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-681463.2 mean_steps=13.3
|
|
[Episode 167710] reward=-120021669.8 actor_loss=0.2689 critic_loss=83522725478.4000 entropy=17.4119 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 167720] reward=-115905780.5 actor_loss=0.3623 critic_loss=82132605907.4783 entropy=17.4142 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 167720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-380791.2 mean_steps=16.1
|
|
[Episode 167730] reward=-121356970.9 actor_loss=0.2121 critic_loss=94487998148.9231 entropy=17.4224 approx_kl=0.0114 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 167740] reward=-116985005.1 actor_loss=0.2962 critic_loss=88215212210.0870 entropy=17.4220 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 167740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612513.4 mean_steps=11.8
|
|
[Episode 167750] reward=-115340264.5 actor_loss=0.4156 critic_loss=84916709603.5556 entropy=17.4214 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 167760] reward=-118185608.4 actor_loss=0.3776 critic_loss=84231497591.4667 entropy=17.4218 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 167760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-489247.2 mean_steps=13.2
|
|
[Episode 167770] reward=-117734146.2 actor_loss=0.4697 critic_loss=84314821973.3333 entropy=17.4279 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1517 front_blocked=0
|
|
[Episode 167780] reward=-118814637.1 actor_loss=0.3185 critic_loss=81942606064.9412 entropy=17.4179 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 167780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452849.8 mean_steps=15.8
|
|
[Episode 167790] reward=-111426898.0 actor_loss=0.4520 critic_loss=77574755194.4348 entropy=17.4151 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 167800] reward=-116453193.2 actor_loss=0.2407 critic_loss=82470408192.0000 entropy=17.4150 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 167800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525896.8 mean_steps=14.4
|
|
[Episode 167810] reward=-120616520.2 actor_loss=0.3970 critic_loss=85834763667.3939 entropy=17.4227 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 167820] reward=-113604099.3 actor_loss=0.2950 critic_loss=78963248241.7778 entropy=17.4358 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 167820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383127.4 mean_steps=15.9
|
|
[Episode 167830] reward=-112695601.3 actor_loss=0.2488 critic_loss=75111990693.6471 entropy=17.4393 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 167840] reward=-122195050.5 actor_loss=0.2795 critic_loss=89498110244.5714 entropy=17.4438 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 167840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433468.7 mean_steps=14.3
|
|
[Episode 167850] reward=-116037779.2 actor_loss=0.3468 critic_loss=81217745481.1429 entropy=17.4530 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 167860] reward=-117825271.2 actor_loss=0.3407 critic_loss=82368674511.5676 entropy=17.4392 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 167860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494500.0 mean_steps=14.1
|
|
[Episode 167870] reward=-116698954.1 actor_loss=0.3222 critic_loss=110960425915.7333 entropy=17.4313 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 167880] reward=-116400775.5 actor_loss=0.2380 critic_loss=82439301481.4118 entropy=17.4302 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 167880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526050.1 mean_steps=14.3
|
|
[Episode 167890] reward=-117289122.8 actor_loss=0.2996 critic_loss=80995330234.1818 entropy=17.4349 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 167900] reward=-117887493.7 actor_loss=0.3584 critic_loss=81207849948.6897 entropy=17.4325 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 167900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-466212.2 mean_steps=12.8
|
|
[Episode 167910] reward=-120319978.8 actor_loss=0.2620 critic_loss=92473582080.0000 entropy=17.4375 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 167920] reward=-116927314.9 actor_loss=0.3729 critic_loss=89295613460.4800 entropy=17.4268 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 167920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432812.8 mean_steps=15.5
|
|
[Episode 167930] reward=-117763410.9 actor_loss=0.3292 critic_loss=87917700778.6667 entropy=17.4333 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 167940] reward=-120703027.9 actor_loss=0.2817 critic_loss=85622529392.6400 entropy=17.4200 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 167940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525239.6 mean_steps=14.0
|
|
[Episode 167950] reward=-111999253.1 actor_loss=0.2310 critic_loss=74131173376.0000 entropy=17.4143 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 167960] reward=-116838433.1 actor_loss=0.2536 critic_loss=83262260317.0909 entropy=17.4260 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 167960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-475530.8 mean_steps=13.1
|
|
[Episode 167970] reward=-118627854.0 actor_loss=0.3766 critic_loss=88719856107.5200 entropy=17.4249 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 167980] reward=-117939367.1 actor_loss=0.2953 critic_loss=88068719198.8148 entropy=17.4112 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 167980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620293.4 mean_steps=12.1
|
|
[Episode 167990] reward=-119807140.4 actor_loss=0.2188 critic_loss=86489248167.7241 entropy=17.3952 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 168000] reward=-112062050.5 actor_loss=0.3278 critic_loss=78463165781.3333 entropy=17.3982 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 168000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504030.8 mean_steps=15.0
|
|
[Episode 168010] reward=-117033161.8 actor_loss=0.2861 critic_loss=79312879057.4545 entropy=17.3906 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 168020] reward=-113600762.1 actor_loss=0.2424 critic_loss=80547584372.3636 entropy=17.3989 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 168020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420924.2 mean_steps=14.3
|
|
[Episode 168030] reward=-112368550.1 actor_loss=0.3151 critic_loss=83482872490.6667 entropy=17.4039 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 168040] reward=-119231203.7 actor_loss=0.3535 critic_loss=86980924029.1555 entropy=17.4168 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 168040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518640.0 mean_steps=14.2
|
|
[Episode 168050] reward=-116363674.5 actor_loss=0.2923 critic_loss=89284302486.5882 entropy=17.4366 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 168060] reward=-117703364.4 actor_loss=0.3300 critic_loss=81548292778.6667 entropy=17.4461 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 168060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537740.3 mean_steps=14.2
|
|
[Episode 168070] reward=-119974758.7 actor_loss=0.3701 critic_loss=82961431405.7143 entropy=17.4322 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 168080] reward=-115433750.0 actor_loss=0.2167 critic_loss=77337073862.1935 entropy=17.4295 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 168080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451974.7 mean_steps=14.7
|
|
[Episode 168090] reward=-110915218.4 actor_loss=0.3455 critic_loss=75166900224.0000 entropy=17.4216 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 168100] reward=-124037834.5 actor_loss=0.3059 critic_loss=94096354742.8571 entropy=17.4232 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 168100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412203.8 mean_steps=16.2
|
|
[Episode 168110] reward=-116635329.5 actor_loss=0.3482 critic_loss=83187892527.4074 entropy=17.4188 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 168120] reward=-118938749.8 actor_loss=0.4210 critic_loss=83594076672.0000 entropy=17.4164 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 168120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-742490.8 mean_steps=13.8
|
|
[Episode 168130] reward=-111077439.6 actor_loss=0.3326 critic_loss=74538062568.7273 entropy=17.4198 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 168140] reward=-115619144.5 actor_loss=0.2093 critic_loss=78814815846.4000 entropy=17.4179 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 168140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507851.3 mean_steps=14.4
|
|
[Episode 168150] reward=-121098482.6 actor_loss=0.2606 critic_loss=84415890014.8148 entropy=17.4121 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 168160] reward=-120247773.5 actor_loss=0.2124 critic_loss=83017965568.0000 entropy=17.4092 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 168160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-558217.3 mean_steps=14.7
|
|
[Episode 168170] reward=-120105310.1 actor_loss=0.3102 critic_loss=81442055168.0000 entropy=17.4012 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 168180] reward=-112754474.9 actor_loss=0.3289 critic_loss=80849800630.8571 entropy=17.3922 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 168180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449074.7 mean_steps=15.4
|
|
[Episode 168190] reward=-114059151.9 actor_loss=0.2812 critic_loss=77488405323.2941 entropy=17.4037 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 168200] reward=-113777464.5 actor_loss=0.3371 critic_loss=84008887532.3077 entropy=17.4015 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 168200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-428523.2 mean_steps=14.1
|
|
[Episode 168210] reward=-115322271.2 actor_loss=0.2893 critic_loss=74851109156.5714 entropy=17.4040 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 168220] reward=-118567257.1 actor_loss=0.2834 critic_loss=101055376822.8571 entropy=17.4022 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 168220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483034.9 mean_steps=14.3
|
|
[Episode 168230] reward=-119989081.2 actor_loss=0.3247 critic_loss=113747386368.0000 entropy=17.3975 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 168240] reward=-123825972.8 actor_loss=0.3993 critic_loss=273000413593.6000 entropy=17.3929 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 168240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559189.5 mean_steps=12.4
|
|
[Episode 168250] reward=-118896456.4 actor_loss=0.4377 critic_loss=86751132876.8000 entropy=17.3892 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 168260] reward=-122840601.4 actor_loss=0.2433 critic_loss=92629922007.5789 entropy=17.3826 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 168260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419628.8 mean_steps=14.2
|
|
[Episode 168270] reward=-117284183.6 actor_loss=0.2983 critic_loss=81458314936.3200 entropy=17.3827 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 168280] reward=-120189697.6 actor_loss=0.2987 critic_loss=114436473360.5161 entropy=17.3880 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 168280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-531051.2 mean_steps=15.3
|
|
[Episode 168290] reward=-120001323.9 actor_loss=0.3140 critic_loss=88556742125.0370 entropy=17.3892 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 168300] reward=-113215296.5 actor_loss=0.3806 critic_loss=105162442997.7600 entropy=17.4009 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 168300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521784.9 mean_steps=15.2
|
|
[Episode 168310] reward=-117227736.1 actor_loss=0.2752 critic_loss=80697004227.0476 entropy=17.4078 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 168320] reward=-116330423.1 actor_loss=0.2477 critic_loss=86474565101.0370 entropy=17.4144 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 168320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-556676.6 mean_steps=12.4
|
|
[Episode 168330] reward=-117046817.3 actor_loss=0.3800 critic_loss=81740063837.0909 entropy=17.4121 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 168340] reward=-115068684.4 actor_loss=0.4563 critic_loss=85756992443.7333 entropy=17.4229 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1530 front_blocked=0
|
|
[Eval 168340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598399.4 mean_steps=12.8
|
|
[Episode 168350] reward=-111573834.1 actor_loss=0.4969 critic_loss=93495959005.8667 entropy=17.4339 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 168360] reward=-114692716.9 actor_loss=0.2891 critic_loss=79998384915.6923 entropy=17.4219 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 168360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592784.9 mean_steps=13.8
|
|
[Episode 168370] reward=-116080555.3 actor_loss=0.2983 critic_loss=79744740693.3333 entropy=17.4217 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 168380] reward=-116503594.4 actor_loss=0.3323 critic_loss=83778293760.0000 entropy=17.4257 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 168380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-327877.6 mean_steps=16.0
|
|
[Episode 168390] reward=-119422541.8 actor_loss=0.2467 critic_loss=84252655616.0000 entropy=17.4113 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 168400] reward=-118489809.0 actor_loss=0.3228 critic_loss=96414853632.0000 entropy=17.4193 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 168400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-584209.5 mean_steps=11.9
|
|
[Episode 168410] reward=-116036043.8 actor_loss=0.2450 critic_loss=80730974137.3793 entropy=17.4474 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 168420] reward=-121324631.5 actor_loss=0.3127 critic_loss=83394181963.2941 entropy=17.4496 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 168420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513595.0 mean_steps=12.9
|
|
[Episode 168430] reward=-115366788.8 actor_loss=0.3106 critic_loss=80633703992.8889 entropy=17.4405 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 168440] reward=-123826234.1 actor_loss=0.2676 critic_loss=88012087599.4074 entropy=17.4348 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 168440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509985.5 mean_steps=14.9
|
|
[Episode 168450] reward=-119711021.2 actor_loss=0.3361 critic_loss=90013580141.7143 entropy=17.4368 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 168460] reward=-112903881.9 actor_loss=0.3258 critic_loss=81848058129.0667 entropy=17.4357 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 168460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-395215.5 mean_steps=15.1
|
|
[Episode 168470] reward=-121480833.5 actor_loss=0.3524 critic_loss=84951626547.2000 entropy=17.4466 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 168480] reward=-117363152.7 actor_loss=0.3012 critic_loss=84869097314.4615 entropy=17.4462 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 168480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510631.5 mean_steps=14.2
|
|
[Episode 168490] reward=-119286346.6 actor_loss=0.2775 critic_loss=100777802215.6190 entropy=17.4564 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 168500] reward=-116101975.3 actor_loss=0.3189 critic_loss=84501232114.1622 entropy=17.4456 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 168500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435395.4 mean_steps=15.6
|
|
[Episode 168510] reward=-115378975.6 actor_loss=0.3145 critic_loss=77776804977.7778 entropy=17.4496 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 168520] reward=-117916567.4 actor_loss=0.2644 critic_loss=117960674508.8000 entropy=17.4656 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 168520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-362496.6 mean_steps=16.6
|
|
[Episode 168530] reward=-114344626.1 actor_loss=0.3318 critic_loss=83351707989.3333 entropy=17.4560 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 168540] reward=-113357134.8 actor_loss=0.3199 critic_loss=79351219996.4444 entropy=17.4461 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 168540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596790.7 mean_steps=12.7
|
|
[Episode 168550] reward=-115213711.6 actor_loss=0.3677 critic_loss=86756716009.7391 entropy=17.4327 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 168560] reward=-118111864.4 actor_loss=0.3457 critic_loss=79155444849.7778 entropy=17.4665 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 168560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446792.4 mean_steps=15.2
|
|
[Episode 168570] reward=-121619388.1 actor_loss=0.3724 critic_loss=85480297813.3333 entropy=17.4671 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 168580] reward=-117701254.9 actor_loss=0.3119 critic_loss=79976640012.4878 entropy=17.4569 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 168580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479844.9 mean_steps=13.9
|
|
[Episode 168590] reward=-116554186.0 actor_loss=0.3193 critic_loss=83882491422.1176 entropy=17.4633 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 168600] reward=-118948818.7 actor_loss=0.2914 critic_loss=81589195935.2889 entropy=17.4641 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 168600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-388545.1 mean_steps=15.9
|
|
[Episode 168610] reward=-117553649.3 actor_loss=0.4085 critic_loss=80212661428.7059 entropy=17.4686 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 168620] reward=-115075659.3 actor_loss=0.3036 critic_loss=82703903948.8000 entropy=17.4538 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 168620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505030.9 mean_steps=13.2
|
|
[Episode 168630] reward=-116805813.9 actor_loss=0.3798 critic_loss=79212942313.2444 entropy=17.4693 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 168640] reward=-120371283.2 actor_loss=0.2042 critic_loss=86736655911.3846 entropy=17.4687 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 168640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394625.9 mean_steps=14.8
|
|
[Episode 168650] reward=-115600412.8 actor_loss=0.2995 critic_loss=79046751300.2667 entropy=17.4661 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 168660] reward=-114882399.4 actor_loss=0.3769 critic_loss=79437566589.1555 entropy=17.4612 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 168660] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-697208.6 mean_steps=11.8
|
|
[Episode 168670] reward=-115619452.5 actor_loss=0.2904 critic_loss=81449313103.4483 entropy=17.4802 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 168680] reward=-118981816.8 actor_loss=0.2668 critic_loss=83287306148.9778 entropy=17.4764 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 168680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473460.4 mean_steps=14.1
|
|
[Episode 168690] reward=-118936135.3 actor_loss=0.2894 critic_loss=80175089982.5778 entropy=17.4787 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 168700] reward=-113398999.6 actor_loss=0.3214 critic_loss=79028696769.4222 entropy=17.4750 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 168700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-352316.6 mean_steps=16.8
|
|
[Episode 168710] reward=-115150565.0 actor_loss=0.3714 critic_loss=77846219616.7111 entropy=17.4599 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 168720] reward=-120145953.1 actor_loss=0.3268 critic_loss=84923052305.0667 entropy=17.4452 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 168720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-566061.8 mean_steps=14.2
|
|
[Episode 168730] reward=-117320818.6 actor_loss=0.3347 critic_loss=87286080565.8947 entropy=17.4389 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 168740] reward=-112325396.8 actor_loss=0.3594 critic_loss=80809445512.5333 entropy=17.4435 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 168740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589474.0 mean_steps=12.7
|
|
[Episode 168750] reward=-111072268.0 actor_loss=0.2857 critic_loss=80176550889.2444 entropy=17.4560 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 168760] reward=-113728985.9 actor_loss=0.4420 critic_loss=74260092928.0000 entropy=17.4476 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 168760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531622.8 mean_steps=13.5
|
|
[Episode 168770] reward=-113312606.5 actor_loss=0.3782 critic_loss=78050562503.1111 entropy=17.4459 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 168780] reward=-116591233.8 actor_loss=0.2562 critic_loss=83638502216.2051 entropy=17.4419 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 168780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474620.9 mean_steps=13.9
|
|
[Episode 168790] reward=-115191685.4 actor_loss=0.3665 critic_loss=80858190233.6000 entropy=17.4338 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 168800] reward=-117239841.0 actor_loss=0.3569 critic_loss=85342172997.8182 entropy=17.4234 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 168800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435644.9 mean_steps=14.5
|
|
[Episode 168810] reward=-122328113.4 actor_loss=0.2933 critic_loss=85195341342.1176 entropy=17.4306 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 168820] reward=-115456917.2 actor_loss=0.3436 critic_loss=83528920678.4000 entropy=17.4316 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 168820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512989.5 mean_steps=14.1
|
|
[Episode 168830] reward=-117089500.2 actor_loss=0.3508 critic_loss=83406236093.2174 entropy=17.4270 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 168840] reward=-123553914.5 actor_loss=0.3058 critic_loss=93095332398.5455 entropy=17.4313 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 168840] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-289740.7 mean_steps=17.1
|
|
[Episode 168850] reward=-117406758.0 actor_loss=0.2604 critic_loss=82654940672.0000 entropy=17.4237 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 168860] reward=-117469538.2 actor_loss=0.4105 critic_loss=79893490528.7111 entropy=17.4178 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 168860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506477.6 mean_steps=14.1
|
|
[Episode 168870] reward=-116864445.1 actor_loss=0.2547 critic_loss=82847465799.6800 entropy=17.4135 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 168880] reward=-114726199.4 actor_loss=0.2508 critic_loss=79419481747.9111 entropy=17.4028 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 168880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445847.1 mean_steps=14.7
|
|
[Episode 168890] reward=-112338392.1 actor_loss=0.3708 critic_loss=73282982798.2222 entropy=17.4152 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 168900] reward=-119196336.9 actor_loss=0.4658 critic_loss=85596035299.5556 entropy=17.4128 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 168900] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-695788.9 mean_steps=11.4
|
|
[Episode 168910] reward=-120689888.7 actor_loss=0.2806 critic_loss=83178460416.0000 entropy=17.4167 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 168920] reward=-116896244.1 actor_loss=0.3186 critic_loss=83010093738.6667 entropy=17.4226 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 168920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549393.7 mean_steps=13.0
|
|
[Episode 168930] reward=-122431724.0 actor_loss=0.2662 critic_loss=85533688111.4074 entropy=17.4162 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 168940] reward=-121980705.6 actor_loss=0.2913 critic_loss=85206241926.7368 entropy=17.4156 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 168940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431695.4 mean_steps=15.2
|
|
[Episode 168950] reward=-121709563.8 actor_loss=0.3738 critic_loss=85963355113.2444 entropy=17.4122 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 168960] reward=-117618205.5 actor_loss=0.3064 critic_loss=82303388876.8000 entropy=17.4189 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 168960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548167.6 mean_steps=13.3
|
|
[Episode 168970] reward=-114314318.1 actor_loss=0.3121 critic_loss=75171928109.5111 entropy=17.4139 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 168980] reward=-117084591.6 actor_loss=0.3328 critic_loss=80749534789.1892 entropy=17.4158 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 168980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482843.6 mean_steps=13.8
|
|
[Episode 168990] reward=-114378983.4 actor_loss=0.3373 critic_loss=80454098761.9556 entropy=17.4143 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 169000] reward=-115870179.1 actor_loss=0.2705 critic_loss=82432089770.6667 entropy=17.4273 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 169000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-611276.4 mean_steps=13.9
|
|
[Episode 169010] reward=-115668545.4 actor_loss=0.3262 critic_loss=83049636386.1333 entropy=17.4486 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 169020] reward=-116545833.2 actor_loss=0.3902 critic_loss=80032738081.3913 entropy=17.4444 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 169020] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-296393.2 mean_steps=17.6
|
|
[Episode 169030] reward=-116552559.8 actor_loss=0.3170 critic_loss=80571632844.8000 entropy=17.4469 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 169040] reward=-115574169.7 actor_loss=0.2817 critic_loss=78714682895.5152 entropy=17.4506 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 169040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623383.0 mean_steps=11.9
|
|
[Episode 169050] reward=-120977067.5 actor_loss=0.3405 critic_loss=83964656251.5862 entropy=17.4484 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 169060] reward=-120050405.9 actor_loss=0.3247 critic_loss=84785651009.8286 entropy=17.4495 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 169060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-522040.3 mean_steps=12.2
|
|
[Episode 169070] reward=-112875626.7 actor_loss=0.2867 critic_loss=79032920356.5714 entropy=17.4491 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 169080] reward=-120033150.1 actor_loss=0.1913 critic_loss=83274559488.0000 entropy=17.4595 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 169080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574540.0 mean_steps=12.6
|
|
[Episode 169090] reward=-115768314.3 actor_loss=0.3149 critic_loss=75566227563.7895 entropy=17.4501 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 169100] reward=-116855199.6 actor_loss=0.3135 critic_loss=79547705636.5714 entropy=17.4423 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 169100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-595762.2 mean_steps=14.8
|
|
[Episode 169110] reward=-118924710.3 actor_loss=0.2868 critic_loss=86947912315.5862 entropy=17.4585 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 169120] reward=-121393418.6 actor_loss=0.2905 critic_loss=84188572148.6222 entropy=17.4620 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 169120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538852.1 mean_steps=14.2
|
|
[Episode 169130] reward=-123310452.1 actor_loss=0.2678 critic_loss=95218582449.2308 entropy=17.4565 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 169140] reward=-116539281.3 actor_loss=0.2274 critic_loss=79473802999.7419 entropy=17.4651 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 169140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476614.3 mean_steps=14.8
|
|
[Episode 169150] reward=-120357179.1 actor_loss=0.3291 critic_loss=79302495982.9333 entropy=17.4676 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 169160] reward=-120286879.8 actor_loss=0.2338 critic_loss=86437919675.7333 entropy=17.4478 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 169160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520612.6 mean_steps=13.8
|
|
[Episode 169170] reward=-114845690.4 actor_loss=0.1883 critic_loss=79424730033.2308 entropy=17.4550 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 169180] reward=-112840602.0 actor_loss=0.3930 critic_loss=78253915795.9111 entropy=17.4606 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 169180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-534689.8 mean_steps=15.2
|
|
[Episode 169190] reward=-119442468.3 actor_loss=0.3408 critic_loss=102961195417.6000 entropy=17.4637 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 169200] reward=-122721553.9 actor_loss=0.3060 critic_loss=88196988563.9111 entropy=17.4595 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 169200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552851.3 mean_steps=14.4
|
|
[Episode 169210] reward=-117270549.2 actor_loss=0.2271 critic_loss=82878947510.0444 entropy=17.4523 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 169220] reward=-120070344.3 actor_loss=0.3083 critic_loss=80426313272.8889 entropy=17.4508 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 169220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486789.4 mean_steps=13.9
|
|
[Episode 169230] reward=-116318843.7 actor_loss=0.3337 critic_loss=77375794674.8718 entropy=17.4492 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 169240] reward=-120223238.3 actor_loss=0.3205 critic_loss=86396245515.3778 entropy=17.4433 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 169240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-466082.7 mean_steps=13.8
|
|
[Episode 169250] reward=-124866195.5 actor_loss=0.2976 critic_loss=88433764761.6000 entropy=17.4494 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 169260] reward=-121436697.9 actor_loss=0.2643 critic_loss=83641797017.6000 entropy=17.4726 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 169260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-615079.5 mean_steps=14.5
|
|
[Episode 169270] reward=-120584422.7 actor_loss=0.2885 critic_loss=83889334857.1429 entropy=17.4725 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 169280] reward=-119175991.7 actor_loss=0.3545 critic_loss=81844429528.1778 entropy=17.4733 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 169280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-505196.2 mean_steps=15.8
|
|
[Episode 169290] reward=-117859588.6 actor_loss=0.3081 critic_loss=83347774122.6667 entropy=17.4790 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 169300] reward=-117098815.9 actor_loss=0.2527 critic_loss=85703925395.9111 entropy=17.4790 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 169300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454720.2 mean_steps=15.3
|
|
[Episode 169310] reward=-120537850.6 actor_loss=0.2413 critic_loss=87165762048.0000 entropy=17.4926 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 169320] reward=-115248673.4 actor_loss=0.3200 critic_loss=78534884205.7143 entropy=17.4931 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 169320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449358.7 mean_steps=15.5
|
|
[Episode 169330] reward=-128423911.0 actor_loss=0.2332 critic_loss=679323381760.0000 entropy=17.4871 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 169340] reward=-116144083.8 actor_loss=0.2947 critic_loss=84139111219.2000 entropy=17.4890 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 169340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541900.2 mean_steps=13.4
|
|
[Episode 169350] reward=-112483300.1 actor_loss=0.3049 critic_loss=82715638027.1304 entropy=17.4872 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 169360] reward=-117421195.7 actor_loss=0.3543 critic_loss=83804228707.0968 entropy=17.4881 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 169360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437671.6 mean_steps=14.5
|
|
[Episode 169370] reward=-116541577.7 actor_loss=0.3834 critic_loss=85314000301.4194 entropy=17.5060 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 169380] reward=-114041442.8 actor_loss=0.3414 critic_loss=82285850445.9130 entropy=17.4941 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 169380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488778.0 mean_steps=15.1
|
|
[Episode 169390] reward=-115217318.1 actor_loss=0.3767 critic_loss=81590886741.3333 entropy=17.4943 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 169400] reward=-120603981.6 actor_loss=0.2704 critic_loss=85070902110.3158 entropy=17.4858 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 169400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459550.9 mean_steps=14.5
|
|
[Episode 169410] reward=-125314536.1 actor_loss=0.2226 critic_loss=93641041822.4762 entropy=17.4772 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 169420] reward=-117517323.8 actor_loss=0.2785 critic_loss=79362993356.8000 entropy=17.4901 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 169420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-675734.8 mean_steps=12.8
|
|
[Episode 169430] reward=-111672025.4 actor_loss=0.3064 critic_loss=82111923665.4545 entropy=17.5022 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 169440] reward=-117662282.7 actor_loss=0.2640 critic_loss=82619555020.8000 entropy=17.5046 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 169440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630110.5 mean_steps=12.9
|
|
[Episode 169450] reward=-121511956.2 actor_loss=0.2972 critic_loss=87997024635.2593 entropy=17.4954 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 169460] reward=-111980918.5 actor_loss=0.2909 critic_loss=83443375542.8571 entropy=17.4881 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 169460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454968.6 mean_steps=15.2
|
|
[Episode 169470] reward=-116278875.3 actor_loss=0.3847 critic_loss=87097735905.2800 entropy=17.4840 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 169480] reward=-116661776.7 actor_loss=0.1920 critic_loss=80517606838.8571 entropy=17.4853 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 169480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532044.6 mean_steps=14.4
|
|
[Episode 169490] reward=-122346871.8 actor_loss=0.2354 critic_loss=93070058700.8000 entropy=17.4919 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 169500] reward=-116799440.0 actor_loss=0.2597 critic_loss=88910897542.0952 entropy=17.4920 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 169500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477417.5 mean_steps=14.2
|
|
[Episode 169510] reward=-119876870.2 actor_loss=0.2903 critic_loss=88246159360.0000 entropy=17.4910 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 169520] reward=-121598774.7 actor_loss=0.2908 critic_loss=93181839805.2174 entropy=17.4862 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 169520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414281.3 mean_steps=15.2
|
|
[Episode 169530] reward=-117002204.2 actor_loss=0.3059 critic_loss=86938710991.2381 entropy=17.4800 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 169540] reward=-121026559.8 actor_loss=0.2536 critic_loss=92874625024.0000 entropy=17.4847 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 169540] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-708358.5 mean_steps=10.7
|
|
[Episode 169550] reward=-120694152.4 actor_loss=0.2887 critic_loss=89016508944.5161 entropy=17.4804 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 169560] reward=-118015878.5 actor_loss=0.4453 critic_loss=178160076966.0540 entropy=17.4692 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 169560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434715.2 mean_steps=14.7
|
|
[Episode 169570] reward=-114598342.6 actor_loss=0.3299 critic_loss=83243622400.0000 entropy=17.4719 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 169580] reward=-115358518.1 actor_loss=0.3250 critic_loss=83226123317.8947 entropy=17.4777 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 169580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597731.8 mean_steps=12.8
|
|
[Episode 169590] reward=-117930911.8 actor_loss=0.3471 critic_loss=85687587446.1538 entropy=17.4770 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 169600] reward=-116362370.1 actor_loss=0.3482 critic_loss=89645167669.8947 entropy=17.4808 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 169600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-632972.0 mean_steps=13.1
|
|
[Episode 169610] reward=-116444755.0 actor_loss=0.2713 critic_loss=80415547582.5116 entropy=17.4858 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 169620] reward=-113084719.4 actor_loss=0.3609 critic_loss=78264085897.8462 entropy=17.4909 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 169620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483425.1 mean_steps=14.8
|
|
[Episode 169630] reward=-117940845.1 actor_loss=0.4280 critic_loss=101895201678.2222 entropy=17.5047 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 169640] reward=-122066463.5 actor_loss=0.2938 critic_loss=92590732902.4000 entropy=17.5104 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 169640] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-720033.5 mean_steps=11.9
|
|
[Episode 169650] reward=-111989014.7 actor_loss=0.3892 critic_loss=81759741366.8571 entropy=17.4957 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 169660] reward=-120404284.9 actor_loss=0.2451 critic_loss=96871754787.3103 entropy=17.5043 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 169660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533770.9 mean_steps=13.0
|
|
[Episode 169670] reward=-120138449.3 actor_loss=0.2851 critic_loss=87444951859.2000 entropy=17.5143 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 169680] reward=-117583326.1 actor_loss=0.3846 critic_loss=85697160806.4000 entropy=17.5214 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 169680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437731.7 mean_steps=15.5
|
|
[Episode 169690] reward=-117349432.7 actor_loss=0.2360 critic_loss=107013895168.0000 entropy=17.5360 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 169700] reward=-120438107.6 actor_loss=0.2671 critic_loss=85956349245.7931 entropy=17.5342 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 169700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546923.5 mean_steps=13.6
|
|
[Episode 169710] reward=-111975016.6 actor_loss=0.2564 critic_loss=74699554816.0000 entropy=17.5218 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 169720] reward=-113503158.4 actor_loss=0.3728 critic_loss=81521687713.6842 entropy=17.5282 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 169720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-558921.4 mean_steps=14.4
|
|
[Episode 169730] reward=-118281617.3 actor_loss=0.3682 critic_loss=88374938112.0000 entropy=17.5281 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 169740] reward=-122162040.7 actor_loss=0.2161 critic_loss=85885080917.3333 entropy=17.5262 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 169740] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-331795.6 mean_steps=18.4
|
|
[Episode 169750] reward=-117706793.4 actor_loss=0.3692 critic_loss=86072383585.5238 entropy=17.5285 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 169760] reward=-123188805.0 actor_loss=0.1420 critic_loss=88178908273.7778 entropy=17.5228 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 169760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-630546.3 mean_steps=12.2
|
|
[Episode 169770] reward=-115295225.2 actor_loss=0.4033 critic_loss=81980828330.6667 entropy=17.5193 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 169780] reward=-120731374.3 actor_loss=0.2435 critic_loss=88754893877.8947 entropy=17.5220 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 169780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-637582.5 mean_steps=12.9
|
|
[Episode 169790] reward=-117470313.0 actor_loss=0.3674 critic_loss=87868774263.4667 entropy=17.5231 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 169800] reward=-119102983.2 actor_loss=0.3131 critic_loss=90050104758.8571 entropy=17.5144 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 169800] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-372102.3 mean_steps=16.8
|
|
[Episode 169810] reward=-115003665.5 actor_loss=0.3049 critic_loss=82020078387.2000 entropy=17.5304 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 169820] reward=-114652956.6 actor_loss=0.2225 critic_loss=77433362245.8182 entropy=17.5330 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 169820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-621340.3 mean_steps=12.7
|
|
[Episode 169830] reward=-120846527.1 actor_loss=0.2956 critic_loss=91435132518.4000 entropy=17.5312 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 169840] reward=-119257818.8 actor_loss=0.2304 critic_loss=89494951594.6667 entropy=17.5353 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 169840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-645870.0 mean_steps=14.2
|
|
[Episode 169850] reward=-119152724.0 actor_loss=0.3022 critic_loss=108201608685.0370 entropy=17.5327 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 169860] reward=-125128524.6 actor_loss=0.2936 critic_loss=95283708416.0000 entropy=17.5289 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 169860] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-400056.5 mean_steps=18.1
|
|
[Episode 169870] reward=-116992043.5 actor_loss=0.2537 critic_loss=85375811584.0000 entropy=17.5347 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 169880] reward=-122319444.8 actor_loss=0.3414 critic_loss=90127049185.8824 entropy=17.5395 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 169880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-374974.3 mean_steps=16.7
|
|
[Episode 169890] reward=-122029333.8 actor_loss=0.2520 critic_loss=87537351533.7143 entropy=17.5343 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 169900] reward=-118817928.2 actor_loss=0.2937 critic_loss=87663681926.0952 entropy=17.5328 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 169900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588867.9 mean_steps=12.9
|
|
[Episode 169910] reward=-119405514.1 actor_loss=0.2466 critic_loss=86529026925.7143 entropy=17.5240 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 169920] reward=-116737828.9 actor_loss=0.3007 critic_loss=82802581094.4000 entropy=17.5310 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 169920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518177.0 mean_steps=14.2
|
|
[Episode 169930] reward=-121182685.4 actor_loss=0.2995 critic_loss=85062466977.1852 entropy=17.5218 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 169940] reward=-119119268.8 actor_loss=0.2928 critic_loss=86788107170.9091 entropy=17.5211 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 169940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-611566.6 mean_steps=13.6
|
|
[Episode 169950] reward=-122974657.5 actor_loss=0.2072 critic_loss=95900093644.8000 entropy=17.5216 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 169960] reward=-122629191.6 actor_loss=0.2781 critic_loss=85892966645.7600 entropy=17.5103 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 169960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578567.1 mean_steps=13.7
|
|
[Episode 169970] reward=-112657262.4 actor_loss=0.3192 critic_loss=77335248896.0000 entropy=17.5170 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 169980] reward=-119199671.8 actor_loss=0.3117 critic_loss=91181948381.8667 entropy=17.5117 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 169980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572232.1 mean_steps=12.2
|
|
[Episode 169990] reward=-119000038.3 actor_loss=0.2663 critic_loss=85720444446.1176 entropy=17.5043 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 170000] reward=-118261025.6 actor_loss=0.2632 critic_loss=89394986263.2727 entropy=17.5018 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 170000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536822.1 mean_steps=13.2
|
|
[Episode 170010] reward=-119152834.3 actor_loss=0.3277 critic_loss=84187757499.7333 entropy=17.5079 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 170020] reward=-119161015.6 actor_loss=0.3104 critic_loss=78220171421.5385 entropy=17.5097 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 170020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574089.8 mean_steps=12.7
|
|
[Episode 170030] reward=-118093215.8 actor_loss=0.2992 critic_loss=84872764447.0303 entropy=17.5106 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 170040] reward=-117713677.2 actor_loss=0.3775 critic_loss=83276577686.0690 entropy=17.5107 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 170040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484458.8 mean_steps=13.8
|
|
[Episode 170050] reward=-118516471.9 actor_loss=0.2430 critic_loss=78631899409.0667 entropy=17.5171 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 170060] reward=-119683256.3 actor_loss=0.3031 critic_loss=84873578496.0000 entropy=17.5137 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 170060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469540.2 mean_steps=15.2
|
|
[Episode 170070] reward=-119333680.3 actor_loss=0.2564 critic_loss=81971676774.4000 entropy=17.5050 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 170080] reward=-121115957.3 actor_loss=0.3519 critic_loss=86316957413.5172 entropy=17.5242 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 170080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536273.3 mean_steps=14.2
|
|
[Episode 170090] reward=-114622877.0 actor_loss=0.2621 critic_loss=78717974027.3778 entropy=17.5073 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 170100] reward=-117247682.3 actor_loss=0.2557 critic_loss=84927937114.3529 entropy=17.5002 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 170100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517062.4 mean_steps=14.0
|
|
[Episode 170110] reward=-118799169.2 actor_loss=0.2875 critic_loss=89068097991.1111 entropy=17.5154 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 170120] reward=-114989705.9 actor_loss=0.2173 critic_loss=82661679559.1111 entropy=17.5126 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 170120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-594399.9 mean_steps=14.4
|
|
[Episode 170130] reward=-121973502.0 actor_loss=0.3157 critic_loss=85489201561.6000 entropy=17.5112 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 170140] reward=-116306265.9 actor_loss=0.2759 critic_loss=80199672289.8824 entropy=17.5255 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 170140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-419920.7 mean_steps=16.6
|
|
[Episode 170150] reward=-117183939.0 actor_loss=0.3422 critic_loss=88883902756.5714 entropy=17.5366 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 170160] reward=-119285273.4 actor_loss=0.2002 critic_loss=78903499525.6889 entropy=17.5454 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 170160] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-260148.0 mean_steps=17.4
|
|
[Episode 170170] reward=-115287732.1 actor_loss=0.3198 critic_loss=84024120770.5600 entropy=17.5749 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 170180] reward=-115136531.1 actor_loss=0.3548 critic_loss=83109322752.0000 entropy=17.5877 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 170180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579277.4 mean_steps=12.7
|
|
[Episode 170190] reward=-120349197.9 actor_loss=0.3011 critic_loss=83144127465.2444 entropy=17.5915 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 170200] reward=-116109191.3 actor_loss=0.3482 critic_loss=77828611003.7333 entropy=17.5723 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 170200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-373135.6 mean_steps=16.4
|
|
[Episode 170210] reward=-116710528.7 actor_loss=0.2907 critic_loss=111753363456.0000 entropy=17.5558 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 170220] reward=-110595504.8 actor_loss=0.3590 critic_loss=86158824789.3333 entropy=17.5569 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 170220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516184.4 mean_steps=14.4
|
|
[Episode 170230] reward=-115685037.7 actor_loss=0.2368 critic_loss=85812603562.6667 entropy=17.5675 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 170240] reward=-117801577.0 actor_loss=0.3083 critic_loss=81074159616.0000 entropy=17.5604 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 170240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-635971.7 mean_steps=13.0
|
|
[Episode 170250] reward=-116100558.3 actor_loss=0.2840 critic_loss=80294053114.3111 entropy=17.5454 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 170260] reward=-119353765.7 actor_loss=0.1694 critic_loss=83068114013.0909 entropy=17.5144 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 170260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533026.9 mean_steps=14.2
|
|
[Episode 170270] reward=-114406246.0 actor_loss=0.3222 critic_loss=78412597566.5778 entropy=17.5056 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 170280] reward=-119517432.5 actor_loss=0.1512 critic_loss=82577898583.7714 entropy=17.5096 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 170280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433402.5 mean_steps=14.9
|
|
[Episode 170290] reward=-112612375.2 actor_loss=0.3663 critic_loss=77659633163.3778 entropy=17.5169 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 170300] reward=-116266104.4 actor_loss=0.3971 critic_loss=85139621120.0000 entropy=17.5147 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 170300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516430.7 mean_steps=13.1
|
|
[Episode 170310] reward=-120364715.8 actor_loss=0.2712 critic_loss=87569028332.3077 entropy=17.5162 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 170320] reward=-121065958.1 actor_loss=0.3382 critic_loss=90531380766.1176 entropy=17.5312 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 170320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520229.8 mean_steps=14.2
|
|
[Episode 170330] reward=-119016373.0 actor_loss=0.2822 critic_loss=96596654578.1622 entropy=17.5353 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 170340] reward=-117746495.5 actor_loss=0.3290 critic_loss=83569722072.1778 entropy=17.5311 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 170340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387895.9 mean_steps=15.4
|
|
[Episode 170350] reward=-117699299.5 actor_loss=0.3807 critic_loss=85536901620.6222 entropy=17.5311 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 170360] reward=-118952631.5 actor_loss=0.2519 critic_loss=85982922524.4444 entropy=17.5394 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 170360] success_rate=0.750 qp_infeasible_rate=0.250 mean_return=-185561.0 mean_steps=19.6
|
|
[Episode 170370] reward=-114235388.0 actor_loss=0.4495 critic_loss=82392389565.9355 entropy=17.5377 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 170380] reward=-120630087.7 actor_loss=0.1961 critic_loss=85057832823.4667 entropy=17.5364 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 170380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590782.8 mean_steps=12.8
|
|
[Episode 170390] reward=-120506699.7 actor_loss=0.3496 critic_loss=86604945999.6444 entropy=17.5490 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 170400] reward=-120945260.5 actor_loss=0.2323 critic_loss=92794944443.7333 entropy=17.5526 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 170400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-435845.0 mean_steps=16.4
|
|
[Episode 170410] reward=-118031483.2 actor_loss=0.3664 critic_loss=81285983292.2353 entropy=17.5629 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 170420] reward=-110881575.4 actor_loss=0.3687 critic_loss=76468411687.8222 entropy=17.5728 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 170420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473244.0 mean_steps=14.7
|
|
[Episode 170430] reward=-119335348.9 actor_loss=0.1972 critic_loss=82208854308.5714 entropy=17.5790 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 170440] reward=-119450627.3 actor_loss=0.2835 critic_loss=409502367744.0000 entropy=17.5836 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 170440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588693.2 mean_steps=13.8
|
|
[Episode 170450] reward=-114461170.5 actor_loss=0.3795 critic_loss=75411602432.0000 entropy=17.5912 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 170460] reward=-120000798.8 actor_loss=0.2864 critic_loss=87847482461.0909 entropy=17.5627 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 170460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-451388.4 mean_steps=16.4
|
|
[Episode 170470] reward=-119066808.3 actor_loss=0.2525 critic_loss=82239156315.0222 entropy=17.5743 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 170480] reward=-118089508.8 actor_loss=0.3041 critic_loss=80750327905.5238 entropy=17.5766 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 170480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-646956.8 mean_steps=12.1
|
|
[Episode 170490] reward=-116178312.6 actor_loss=0.2512 critic_loss=80041497222.7368 entropy=17.5832 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 170500] reward=-120709484.5 actor_loss=0.2601 critic_loss=88675669674.6667 entropy=17.5901 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 170500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553158.4 mean_steps=13.2
|
|
[Episode 170510] reward=-167100011.4 actor_loss=0.2990 critic_loss=10876549688342.7559 entropy=17.5906 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 170520] reward=-119931951.3 actor_loss=0.3112 critic_loss=86105573218.4615 entropy=17.5969 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 170520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540478.1 mean_steps=13.8
|
|
[Episode 170530] reward=-117761822.2 actor_loss=0.2858 critic_loss=86999526793.8462 entropy=17.6087 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 170540] reward=-115856538.4 actor_loss=0.3624 critic_loss=85287845531.8261 entropy=17.6079 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 170540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396449.2 mean_steps=15.3
|
|
[Episode 170550] reward=-118359442.4 actor_loss=0.3016 critic_loss=82410553999.3600 entropy=17.6084 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 170560] reward=-121349948.7 actor_loss=0.2558 critic_loss=84959023104.0000 entropy=17.6009 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 170560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510049.8 mean_steps=15.1
|
|
[Episode 170570] reward=-118615771.9 actor_loss=0.3156 critic_loss=82150758520.4706 entropy=17.6044 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 170580] reward=-119949283.4 actor_loss=0.1728 critic_loss=81605452520.7273 entropy=17.6046 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 170580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521553.7 mean_steps=14.1
|
|
[Episode 170590] reward=-120831506.2 actor_loss=0.2942 critic_loss=87478145609.1429 entropy=17.6138 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 170600] reward=-120923603.2 actor_loss=0.3957 critic_loss=88787344777.8462 entropy=17.6199 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 170600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-333501.4 mean_steps=16.6
|
|
[Episode 170610] reward=-117376477.2 actor_loss=0.3889 critic_loss=84732146284.6061 entropy=17.6160 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 170620] reward=-118249034.6 actor_loss=0.4400 critic_loss=88490962520.2759 entropy=17.6275 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 170620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469289.3 mean_steps=14.8
|
|
[Episode 170630] reward=-116520129.0 actor_loss=0.3203 critic_loss=82870364062.4762 entropy=17.6204 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 170640] reward=-115408723.1 actor_loss=0.3072 critic_loss=81990428262.4000 entropy=17.6075 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 170640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422325.3 mean_steps=15.6
|
|
[Episode 170650] reward=-116703577.8 actor_loss=0.3007 critic_loss=80933292198.0540 entropy=17.6157 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 170660] reward=-116182405.0 actor_loss=0.2802 critic_loss=86088207609.0811 entropy=17.6036 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 170660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-496598.6 mean_steps=13.2
|
|
[Episode 170670] reward=-119689361.8 actor_loss=0.2318 critic_loss=86265704879.1579 entropy=17.6143 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 170680] reward=-114361137.9 actor_loss=0.3497 critic_loss=84885091039.1795 entropy=17.6194 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 170680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536408.8 mean_steps=13.2
|
|
[Episode 170690] reward=-118292804.0 actor_loss=0.3134 critic_loss=88831377408.0000 entropy=17.6142 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 170700] reward=-117234447.4 actor_loss=0.2227 critic_loss=84723093230.9333 entropy=17.6096 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 170700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588805.3 mean_steps=13.1
|
|
[Episode 170710] reward=-118763121.0 actor_loss=0.2772 critic_loss=79911118438.4000 entropy=17.6057 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 170720] reward=-120031768.5 actor_loss=0.3513 critic_loss=83203222627.0968 entropy=17.6088 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 170720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565242.0 mean_steps=13.7
|
|
[Episode 170730] reward=-119938786.4 actor_loss=0.2855 critic_loss=83697593856.0000 entropy=17.6070 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 170740] reward=-115951181.8 actor_loss=0.3199 critic_loss=76811128591.0588 entropy=17.6026 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 170740] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-577051.4 mean_steps=10.8
|
|
[Episode 170750] reward=-119261273.9 actor_loss=0.2908 critic_loss=84879764386.9091 entropy=17.6109 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 170760] reward=-116176784.9 actor_loss=0.3393 critic_loss=87062270851.1219 entropy=17.6105 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 170760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562693.0 mean_steps=12.5
|
|
[Episode 170770] reward=-121229343.8 actor_loss=0.2793 critic_loss=87849184008.8276 entropy=17.6074 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 170780] reward=-115391209.2 actor_loss=0.2866 critic_loss=79849392128.0000 entropy=17.6049 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 170780] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-403624.8 mean_steps=17.1
|
|
[Episode 170790] reward=-121578440.3 actor_loss=0.3425 critic_loss=85136542130.4242 entropy=17.6063 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 170800] reward=-117373434.3 actor_loss=0.2341 critic_loss=78371443334.7368 entropy=17.6131 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 170800] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-325517.5 mean_steps=16.4
|
|
[Episode 170810] reward=-119620859.0 actor_loss=0.2234 critic_loss=79447237778.2857 entropy=17.6076 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 170820] reward=-122976741.5 actor_loss=0.2608 critic_loss=88976228352.0000 entropy=17.6036 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 170820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-620643.3 mean_steps=12.7
|
|
[Episode 170830] reward=-122146711.6 actor_loss=0.3638 critic_loss=88218492563.9111 entropy=17.6108 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 170840] reward=-117998190.8 actor_loss=0.3238 critic_loss=83433632059.0769 entropy=17.6092 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 170840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500130.3 mean_steps=14.0
|
|
[Episode 170850] reward=-114150866.1 actor_loss=0.3022 critic_loss=86673349017.6000 entropy=17.6023 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 170860] reward=-118935693.2 actor_loss=0.2812 critic_loss=83702994582.5882 entropy=17.6066 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 170860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537444.5 mean_steps=13.3
|
|
[Episode 170870] reward=-118678154.0 actor_loss=0.2766 critic_loss=82155446587.0769 entropy=17.6014 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 170880] reward=-117056329.5 actor_loss=0.2913 critic_loss=81135729907.8095 entropy=17.6052 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 170880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-602781.1 mean_steps=11.9
|
|
[Episode 170890] reward=-114960612.3 actor_loss=0.2783 critic_loss=82320966296.2162 entropy=17.5927 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 170900] reward=-114518912.7 actor_loss=0.2690 critic_loss=79258469444.2667 entropy=17.5951 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 170900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-569639.7 mean_steps=11.8
|
|
[Episode 170910] reward=-121794518.6 actor_loss=0.3670 critic_loss=83166003200.0000 entropy=17.5899 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 170920] reward=-117171219.6 actor_loss=0.3264 critic_loss=80617536275.6923 entropy=17.5931 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 170920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458985.1 mean_steps=14.6
|
|
[Episode 170930] reward=-120146543.4 actor_loss=0.3223 critic_loss=88870471412.8696 entropy=17.6020 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 170940] reward=-112472950.6 actor_loss=0.3514 critic_loss=77703982648.8889 entropy=17.6011 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 170940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485368.8 mean_steps=14.8
|
|
[Episode 170950] reward=-123129571.2 actor_loss=0.3019 critic_loss=89180118337.8286 entropy=17.6083 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 170960] reward=-112683915.9 actor_loss=0.3609 critic_loss=75818482892.8000 entropy=17.6066 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 170960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-487024.2 mean_steps=16.6
|
|
[Episode 170970] reward=-117936475.7 actor_loss=0.2724 critic_loss=84504764664.2424 entropy=17.5865 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 170980] reward=-123664629.2 actor_loss=0.3613 critic_loss=336422546870.8571 entropy=17.5913 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 170980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-385795.1 mean_steps=15.0
|
|
[Episode 170990] reward=-113668283.0 actor_loss=0.4297 critic_loss=82043945332.3636 entropy=17.5914 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 171000] reward=-109689925.5 actor_loss=0.3347 critic_loss=79737407078.4000 entropy=17.5899 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 171000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615185.9 mean_steps=12.8
|
|
[Episode 171010] reward=-122453045.1 actor_loss=0.2947 critic_loss=83226137311.1795 entropy=17.5956 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 171020] reward=-118535143.0 actor_loss=0.3159 critic_loss=85180102566.9565 entropy=17.5851 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 171020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641744.9 mean_steps=12.2
|
|
[Episode 171030] reward=-154806694.1 actor_loss=2.7034 critic_loss=6113344248410.3525 entropy=17.5810 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 171040] reward=-114895237.2 actor_loss=0.3883 critic_loss=80178960384.0000 entropy=17.5820 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 171040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-403464.2 mean_steps=16.2
|
|
[Episode 171050] reward=-120354768.0 actor_loss=0.2857 critic_loss=84647991758.4516 entropy=17.5862 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 171060] reward=-119408303.8 actor_loss=0.3028 critic_loss=81946814208.0000 entropy=17.5730 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 171060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479024.3 mean_steps=14.8
|
|
[Episode 171070] reward=-119507080.4 actor_loss=0.3646 critic_loss=88955232559.4074 entropy=17.5748 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 171080] reward=-122054195.3 actor_loss=0.1950 critic_loss=139812393459.5122 entropy=17.5796 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 171080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-594680.7 mean_steps=13.8
|
|
[Episode 171090] reward=-117631211.4 actor_loss=0.3029 critic_loss=81934017805.4737 entropy=17.5792 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 171100] reward=-114117898.7 actor_loss=0.3122 critic_loss=77789976439.4667 entropy=17.5826 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 171100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-663135.6 mean_steps=14.9
|
|
[Episode 171110] reward=-112757242.4 actor_loss=0.2599 critic_loss=73359075238.9565 entropy=17.5990 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 171120] reward=-122144107.2 actor_loss=0.3444 critic_loss=113079719556.7407 entropy=17.6047 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 171120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591093.4 mean_steps=13.7
|
|
[Episode 171130] reward=-122045407.4 actor_loss=0.2839 critic_loss=100375677838.2222 entropy=17.6006 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 171140] reward=-124901927.9 actor_loss=0.2686 critic_loss=91795807436.8000 entropy=17.5921 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 171140] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-616855.7 mean_steps=10.7
|
|
[Episode 171150] reward=-122438981.9 actor_loss=0.3368 critic_loss=151787851971.0476 entropy=17.5990 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 171160] reward=-111149125.7 actor_loss=0.3451 critic_loss=87431980568.3810 entropy=17.5898 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 171160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621482.4 mean_steps=11.8
|
|
[Episode 171170] reward=-119302174.2 actor_loss=0.2914 critic_loss=90687664128.0000 entropy=17.5991 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 171180] reward=-121747725.0 actor_loss=0.3361 critic_loss=121359424365.7143 entropy=17.5980 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 171180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526122.1 mean_steps=13.0
|
|
[Episode 171190] reward=-115828005.8 actor_loss=0.4186 critic_loss=89574007076.5714 entropy=17.5928 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 171200] reward=-117278471.4 actor_loss=0.3277 critic_loss=84632087040.0000 entropy=17.5918 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 171200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543311.1 mean_steps=13.2
|
|
[Episode 171210] reward=-120360591.4 actor_loss=0.3219 critic_loss=85550027980.8000 entropy=17.6065 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 171220] reward=-117298213.9 actor_loss=0.2989 critic_loss=83270221031.2258 entropy=17.5985 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 171220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588466.7 mean_steps=12.3
|
|
[Episode 171230] reward=-123366821.9 actor_loss=0.3536 critic_loss=100685270129.7778 entropy=17.6057 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 171240] reward=-117503648.1 actor_loss=0.3773 critic_loss=87891029800.4211 entropy=17.5983 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 171240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444571.8 mean_steps=15.5
|
|
[Episode 171250] reward=-120641830.3 actor_loss=0.4194 critic_loss=85741387776.0000 entropy=17.6001 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 171260] reward=-132992831.0 actor_loss=0.2951 critic_loss=717943753728.0000 entropy=17.5982 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 171260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549643.9 mean_steps=13.6
|
|
[Episode 171270] reward=-115993703.0 actor_loss=0.3151 critic_loss=85189901042.5263 entropy=17.6057 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 171280] reward=-121054445.1 actor_loss=0.2420 critic_loss=99301312619.7895 entropy=17.6166 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 171280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416481.1 mean_steps=16.2
|
|
[Episode 171290] reward=-117604334.4 actor_loss=0.2417 critic_loss=85378609392.9412 entropy=17.6130 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 171300] reward=-231094256.3 actor_loss=25.1626 critic_loss=46225336457066.1484 entropy=17.6276 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 171300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434155.5 mean_steps=15.4
|
|
[Episode 171310] reward=-123164121.2 actor_loss=0.3724 critic_loss=170863525257.8462 entropy=17.6386 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 171320] reward=-121313229.6 actor_loss=0.1720 critic_loss=85023842910.8148 entropy=17.6465 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 171320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497325.1 mean_steps=14.9
|
|
[Episode 171330] reward=-118900101.3 actor_loss=0.3240 critic_loss=109383498725.7436 entropy=17.6475 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 171340] reward=-117620806.4 actor_loss=0.2938 critic_loss=83257421157.2093 entropy=17.6632 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 171340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-520940.0 mean_steps=15.1
|
|
[Episode 171350] reward=-122352335.0 actor_loss=0.3679 critic_loss=89406486685.5385 entropy=17.6748 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 171360] reward=-123573096.6 actor_loss=0.2285 critic_loss=97660542976.0000 entropy=17.6652 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 171360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587055.9 mean_steps=12.6
|
|
[Episode 171370] reward=-119783587.9 actor_loss=0.3020 critic_loss=86625269077.3333 entropy=17.6555 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 171380] reward=-124055310.0 actor_loss=0.2393 critic_loss=122499508809.1429 entropy=17.6429 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 171380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-631796.3 mean_steps=12.9
|
|
[Episode 171390] reward=-121141257.0 actor_loss=0.2935 critic_loss=90653616701.4400 entropy=17.6342 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 171400] reward=-123294296.3 actor_loss=0.2741 critic_loss=97827230813.0909 entropy=17.6342 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 171400] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-689614.5 mean_steps=11.1
|
|
[Episode 171410] reward=-114877504.7 actor_loss=0.2661 critic_loss=77889650041.2632 entropy=17.6293 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 171420] reward=-118016629.9 actor_loss=0.3288 critic_loss=85659527413.7600 entropy=17.6207 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 171420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434205.1 mean_steps=14.4
|
|
[Episode 171430] reward=-118276706.0 actor_loss=0.3329 critic_loss=85763612975.4074 entropy=17.6241 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 171440] reward=-120260335.8 actor_loss=0.2338 critic_loss=96226421964.8000 entropy=17.6373 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 171440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546422.5 mean_steps=14.1
|
|
[Episode 171450] reward=-119317393.2 actor_loss=0.3182 critic_loss=80369019172.5714 entropy=17.6366 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 171460] reward=-120866622.1 actor_loss=0.2811 critic_loss=85690834033.7778 entropy=17.6361 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 171460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433946.6 mean_steps=15.5
|
|
[Episode 171470] reward=-116008993.4 actor_loss=0.2946 critic_loss=249990274003.4783 entropy=17.6632 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 171480] reward=-113027422.9 actor_loss=0.2154 critic_loss=79176182205.2174 entropy=17.6650 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 171480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479961.5 mean_steps=13.8
|
|
[Episode 171490] reward=-119622159.9 actor_loss=0.2828 critic_loss=114656845433.9048 entropy=17.6517 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 171500] reward=-116424412.2 actor_loss=0.3471 critic_loss=80425871155.2000 entropy=17.6578 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 171500] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-330835.3 mean_steps=16.4
|
|
[Episode 171510] reward=-119853558.9 actor_loss=0.2857 critic_loss=89159654912.0000 entropy=17.6602 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 171520] reward=-117530801.4 actor_loss=0.3370 critic_loss=82766762188.8000 entropy=17.6700 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 171520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514645.1 mean_steps=14.1
|
|
[Episode 171530] reward=-119576964.4 actor_loss=0.2343 critic_loss=84562326674.2857 entropy=17.6782 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 171540] reward=-117303148.4 actor_loss=0.3217 critic_loss=85819822221.2414 entropy=17.6822 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 171540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-629075.3 mean_steps=13.9
|
|
[Episode 171550] reward=-119339930.3 actor_loss=0.2625 critic_loss=86575442890.1053 entropy=17.6921 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 171560] reward=-115591122.9 actor_loss=0.2238 critic_loss=83592745984.0000 entropy=17.6838 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 171560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492405.8 mean_steps=14.4
|
|
[Episode 171570] reward=-121758877.7 actor_loss=0.2779 critic_loss=133719233877.3333 entropy=17.6751 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 171580] reward=-115281852.7 actor_loss=0.3292 critic_loss=79306925712.4103 entropy=17.6883 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 171580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494565.6 mean_steps=13.8
|
|
[Episode 171590] reward=-115049381.9 actor_loss=0.3616 critic_loss=78958326705.2308 entropy=17.6951 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 171600] reward=-123039032.3 actor_loss=0.3319 critic_loss=101546205366.0444 entropy=17.7060 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 171600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577669.9 mean_steps=12.7
|
|
[Episode 171610] reward=-117724019.7 actor_loss=0.2236 critic_loss=78151629824.0000 entropy=17.7027 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 171620] reward=-119348094.9 actor_loss=0.2373 critic_loss=83708155796.2105 entropy=17.6938 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 171620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366652.5 mean_steps=16.9
|
|
[Episode 171630] reward=-115530060.1 actor_loss=0.2931 critic_loss=87660691200.0000 entropy=17.6936 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 171640] reward=-129248067.1 actor_loss=0.3074 critic_loss=341527747584.0000 entropy=17.6932 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 171640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438673.5 mean_steps=14.7
|
|
[Episode 171650] reward=-119672426.0 actor_loss=0.2638 critic_loss=96290376635.7333 entropy=17.6885 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 171660] reward=-129335740.1 actor_loss=0.3195 critic_loss=409074883347.6923 entropy=17.6980 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 171660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-502928.0 mean_steps=15.8
|
|
[Episode 171670] reward=-116562848.2 actor_loss=0.2995 critic_loss=95943494811.1515 entropy=17.7192 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 171680] reward=-120424505.8 actor_loss=0.3549 critic_loss=99093346614.3030 entropy=17.7260 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 171680] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-737983.7 mean_steps=11.8
|
|
[Episode 171690] reward=-110961564.4 actor_loss=0.3159 critic_loss=80690032171.8857 entropy=17.7368 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 171700] reward=-120589207.9 actor_loss=0.2815 critic_loss=102660781834.2400 entropy=17.7308 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 171700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-619446.6 mean_steps=11.8
|
|
[Episode 171710] reward=-118946086.5 actor_loss=0.3542 critic_loss=91713284162.0645 entropy=17.7373 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 171720] reward=-114536460.3 actor_loss=0.2453 critic_loss=82944605739.8857 entropy=17.7382 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 171720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522502.4 mean_steps=14.1
|
|
[Episode 171730] reward=-118162180.8 actor_loss=0.2280 critic_loss=84645676974.0800 entropy=17.7344 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 171740] reward=-118692458.0 actor_loss=0.2738 critic_loss=96439811803.4286 entropy=17.7415 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 171740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436546.4 mean_steps=15.8
|
|
[Episode 171750] reward=-119829074.5 actor_loss=0.2654 critic_loss=92372191981.2683 entropy=17.7200 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 171760] reward=-123026167.4 actor_loss=0.1463 critic_loss=96563898034.6046 entropy=17.7245 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 171760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-584573.5 mean_steps=13.8
|
|
[Episode 171770] reward=-114791257.9 actor_loss=0.2415 critic_loss=80752437248.0000 entropy=17.7183 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 171780] reward=-115692436.7 actor_loss=0.3253 critic_loss=77665215903.1351 entropy=17.7148 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 171780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-461864.7 mean_steps=15.7
|
|
[Episode 171790] reward=-123865726.0 actor_loss=0.2375 critic_loss=91639159739.7333 entropy=17.7042 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 171800] reward=-116402768.9 actor_loss=0.4013 critic_loss=84040903884.8000 entropy=17.6994 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 171800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483399.2 mean_steps=14.2
|
|
[Episode 171810] reward=-124267824.7 actor_loss=0.2884 critic_loss=382750756700.1600 entropy=17.6809 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 171820] reward=-122749546.3 actor_loss=0.2823 critic_loss=84657885476.5714 entropy=17.6695 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 171820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575203.3 mean_steps=12.6
|
|
[Episode 171830] reward=-121898135.8 actor_loss=0.3101 critic_loss=91696069231.3044 entropy=17.6728 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 171840] reward=-117920834.4 actor_loss=0.2481 critic_loss=81639735808.0000 entropy=17.6611 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 171840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488410.3 mean_steps=13.7
|
|
[Episode 171850] reward=-120015624.9 actor_loss=0.3491 critic_loss=90437599480.2424 entropy=17.6370 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 171860] reward=-116876817.2 actor_loss=0.3210 critic_loss=82054746637.1282 entropy=17.6358 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 171860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616879.6 mean_steps=12.9
|
|
[Episode 171870] reward=-125327812.1 actor_loss=0.2818 critic_loss=105345148660.8696 entropy=17.6321 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 171880] reward=-117840154.0 actor_loss=0.2236 critic_loss=83295779451.5862 entropy=17.6285 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 171880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540021.9 mean_steps=14.6
|
|
[Episode 171890] reward=-119400122.6 actor_loss=0.2964 critic_loss=83378908546.8445 entropy=17.6313 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 171900] reward=-121095189.4 actor_loss=0.2411 critic_loss=90581251868.4444 entropy=17.6264 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 171900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637399.5 mean_steps=12.2
|
|
[Episode 171910] reward=-119439162.4 actor_loss=0.2781 critic_loss=85837411305.2444 entropy=17.6285 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 171920] reward=-114845554.6 actor_loss=0.2718 critic_loss=84033134592.0000 entropy=17.6221 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 171920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-388605.4 mean_steps=15.1
|
|
[Episode 171930] reward=-118749794.5 actor_loss=0.3316 critic_loss=84384498270.8148 entropy=17.6124 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 171940] reward=-112617952.5 actor_loss=0.2965 critic_loss=77717267456.0000 entropy=17.6091 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 171940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392609.3 mean_steps=16.1
|
|
[Episode 171950] reward=-119237668.7 actor_loss=0.2683 critic_loss=87256244224.0000 entropy=17.6076 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 171960] reward=-117011557.5 actor_loss=0.3039 critic_loss=89122397739.8857 entropy=17.6263 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 171960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555699.3 mean_steps=13.6
|
|
[Episode 171970] reward=-112572906.7 actor_loss=0.3180 critic_loss=79318568504.8889 entropy=17.6099 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 171980] reward=-118245792.1 actor_loss=0.2403 critic_loss=79763519042.7826 entropy=17.6251 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 171980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-670920.0 mean_steps=12.3
|
|
[Episode 171990] reward=-121259425.0 actor_loss=0.2488 critic_loss=100359711061.3333 entropy=17.6174 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 172000] reward=-122463737.6 actor_loss=0.2429 critic_loss=84952588005.5172 entropy=17.6089 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 172000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546465.3 mean_steps=13.2
|
|
[Episode 172010] reward=-117035871.3 actor_loss=0.2682 critic_loss=85805708447.2889 entropy=17.6142 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 172020] reward=-117438138.6 actor_loss=0.3091 critic_loss=84179802831.5676 entropy=17.6263 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 172020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546963.5 mean_steps=13.2
|
|
[Episode 172030] reward=-119886939.8 actor_loss=0.1910 critic_loss=86983047395.5556 entropy=17.6132 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 172040] reward=-115307677.7 actor_loss=0.3497 critic_loss=80578175249.0667 entropy=17.6398 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 172040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616021.4 mean_steps=12.7
|
|
[Episode 172050] reward=-117679953.1 actor_loss=0.3540 critic_loss=89502444020.6222 entropy=17.6482 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 172060] reward=-123934867.2 actor_loss=0.2173 critic_loss=93176738869.8947 entropy=17.6561 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 172060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477720.4 mean_steps=13.8
|
|
[Episode 172070] reward=-114796410.2 actor_loss=0.3440 critic_loss=83219106153.4118 entropy=17.6493 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 172080] reward=-117814896.9 actor_loss=0.2653 critic_loss=79722622876.0976 entropy=17.6466 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 172080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359545.7 mean_steps=16.8
|
|
[Episode 172090] reward=-113724004.1 actor_loss=0.2757 critic_loss=77175645306.8800 entropy=17.6445 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 172100] reward=-117007085.7 actor_loss=0.2567 critic_loss=82737526283.3778 entropy=17.6580 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 172100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544065.8 mean_steps=13.2
|
|
[Episode 172110] reward=-122297484.8 actor_loss=0.2764 critic_loss=83001251521.4222 entropy=17.6311 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 172120] reward=-121644789.0 actor_loss=0.2514 critic_loss=81540461909.3333 entropy=17.6198 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 172120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-413475.6 mean_steps=13.3
|
|
[Episode 172130] reward=-117385909.9 actor_loss=0.3168 critic_loss=80622333207.2727 entropy=17.6175 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 172140] reward=-120216827.9 actor_loss=0.2807 critic_loss=108814527566.7692 entropy=17.6264 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 172140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411952.1 mean_steps=15.0
|
|
[Episode 172150] reward=-117568326.5 actor_loss=0.2852 critic_loss=85492605650.8235 entropy=17.6206 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 172160] reward=-117828867.5 actor_loss=0.2865 critic_loss=87084658961.0667 entropy=17.6171 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 172160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484159.7 mean_steps=14.7
|
|
[Episode 172170] reward=-123353947.0 actor_loss=0.2038 critic_loss=87040396492.8000 entropy=17.6321 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 172180] reward=-116212250.7 actor_loss=0.3043 critic_loss=78542925732.9778 entropy=17.6247 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 172180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430593.8 mean_steps=15.1
|
|
[Episode 172190] reward=-117620219.1 actor_loss=0.3521 critic_loss=82029338441.9556 entropy=17.6311 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 172200] reward=-121869915.6 actor_loss=0.2805 critic_loss=85727104204.8000 entropy=17.6255 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 172200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598759.6 mean_steps=12.8
|
|
[Episode 172210] reward=-122332888.6 actor_loss=0.3529 critic_loss=87428969130.6667 entropy=17.6313 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 172220] reward=-119833857.0 actor_loss=0.2269 critic_loss=84192570208.7111 entropy=17.6495 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 172220] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-685646.7 mean_steps=10.6
|
|
[Episode 172230] reward=-116577982.0 actor_loss=0.3212 critic_loss=84710685991.8222 entropy=17.6268 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 172240] reward=-118052379.8 actor_loss=0.2281 critic_loss=82646516053.3333 entropy=17.6290 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 172240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592670.0 mean_steps=13.6
|
|
[Episode 172250] reward=-120115387.8 actor_loss=0.2602 critic_loss=103797974841.8065 entropy=17.6325 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 172260] reward=-112670943.4 actor_loss=0.3270 critic_loss=86655638095.6444 entropy=17.6358 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 172260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433360.0 mean_steps=15.5
|
|
[Episode 172270] reward=-124402919.2 actor_loss=0.2424 critic_loss=111255148953.6000 entropy=17.6507 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 172280] reward=-117307758.3 actor_loss=0.2149 critic_loss=104039217743.6444 entropy=17.6576 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 172280] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-639727.2 mean_steps=11.2
|
|
[Episode 172290] reward=-125555207.6 actor_loss=0.2527 critic_loss=113512013277.8667 entropy=17.6419 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 172300] reward=-118414271.0 actor_loss=0.2644 critic_loss=83440811791.0588 entropy=17.6340 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 172300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540386.9 mean_steps=14.2
|
|
[Episode 172310] reward=-111970867.0 actor_loss=0.3108 critic_loss=87846234794.6667 entropy=17.6237 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 172320] reward=-119950716.1 actor_loss=0.2494 critic_loss=99109048156.1600 entropy=17.6202 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 172320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583702.9 mean_steps=12.6
|
|
[Episode 172330] reward=-121769352.9 actor_loss=0.3801 critic_loss=85668037263.3600 entropy=17.6200 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 172340] reward=-124151773.8 actor_loss=0.3158 critic_loss=96000481159.5294 entropy=17.6308 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 172340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-436043.3 mean_steps=16.6
|
|
[Episode 172350] reward=-117306204.0 actor_loss=0.2406 critic_loss=83434849280.0000 entropy=17.6337 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 172360] reward=-121719651.9 actor_loss=0.2217 critic_loss=87280353280.0000 entropy=17.6326 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 172360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378342.1 mean_steps=16.1
|
|
[Episode 172370] reward=-122241221.6 actor_loss=0.2800 critic_loss=82934856908.8000 entropy=17.6315 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 172380] reward=-120744674.2 actor_loss=0.2963 critic_loss=105924579012.9231 entropy=17.6338 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 172380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-395814.3 mean_steps=14.9
|
|
[Episode 172390] reward=-119668300.8 actor_loss=0.3185 critic_loss=90980089424.8421 entropy=17.6298 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 172400] reward=-118741045.6 actor_loss=0.3641 critic_loss=84505681920.0000 entropy=17.6252 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 172400] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-681310.4 mean_steps=11.0
|
|
[Episode 172410] reward=-120754401.8 actor_loss=0.2696 critic_loss=91203095990.8571 entropy=17.6344 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 172420] reward=-118380595.8 actor_loss=0.2485 critic_loss=84962612565.3333 entropy=17.6478 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 172420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569543.8 mean_steps=13.4
|
|
[Episode 172430] reward=-120967508.9 actor_loss=0.3510 critic_loss=82693201578.6667 entropy=17.6464 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 172440] reward=-118857183.4 actor_loss=0.2667 critic_loss=84526444357.8182 entropy=17.6461 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 172440] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-641030.4 mean_steps=10.3
|
|
[Episode 172450] reward=-115308057.7 actor_loss=0.3844 critic_loss=81285717606.4000 entropy=17.6375 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 172460] reward=-121460091.3 actor_loss=0.2890 critic_loss=83463699219.6923 entropy=17.6184 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 172460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533129.4 mean_steps=14.2
|
|
[Episode 172470] reward=-123356653.5 actor_loss=0.2050 critic_loss=87132832736.9697 entropy=17.6158 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 172480] reward=-121406890.0 actor_loss=0.1827 critic_loss=88923583488.0000 entropy=17.6023 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 172480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619577.9 mean_steps=13.2
|
|
[Episode 172490] reward=-117719085.8 actor_loss=0.3194 critic_loss=80050013330.2857 entropy=17.6064 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 172500] reward=-122223594.9 actor_loss=0.3063 critic_loss=85494204661.7600 entropy=17.6153 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 172500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577394.9 mean_steps=13.3
|
|
[Episode 172510] reward=-116659005.1 actor_loss=0.3311 critic_loss=81713156551.1111 entropy=17.6101 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 172520] reward=-116100531.1 actor_loss=0.3076 critic_loss=88420867132.2353 entropy=17.5955 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 172520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458390.1 mean_steps=14.8
|
|
[Episode 172530] reward=-116298865.9 actor_loss=0.3192 critic_loss=80725256055.4667 entropy=17.5982 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 172540] reward=-116977944.0 actor_loss=0.2635 critic_loss=81180964352.0000 entropy=17.5987 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 172540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521482.3 mean_steps=14.8
|
|
[Episode 172550] reward=-115776663.7 actor_loss=0.3710 critic_loss=79906236552.5333 entropy=17.6123 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 172560] reward=-149586589.1 actor_loss=0.4501 critic_loss=3748525414809.6001 entropy=17.6176 approx_kl=0.0040 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 172560] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-270381.5 mean_steps=18.1
|
|
[Episode 172570] reward=-119072769.8 actor_loss=0.2777 critic_loss=87279538688.0000 entropy=17.6204 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 172580] reward=-125539031.9 actor_loss=0.1983 critic_loss=91047166915.7647 entropy=17.6098 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 172580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438877.5 mean_steps=15.6
|
|
[Episode 172590] reward=-117344955.6 actor_loss=0.3294 critic_loss=80181604019.8919 entropy=17.6034 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 172600] reward=-121681687.0 actor_loss=0.3380 critic_loss=83991805952.0000 entropy=17.6066 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 172600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351762.3 mean_steps=15.7
|
|
[Episode 172610] reward=-119389544.7 actor_loss=0.3404 critic_loss=84868795641.0811 entropy=17.6068 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 172620] reward=-135377462.6 actor_loss=0.3845 critic_loss=1635021629685.7600 entropy=17.6077 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 172620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570317.1 mean_steps=13.6
|
|
[Episode 172630] reward=-117676202.4 actor_loss=0.3879 critic_loss=87537032734.1176 entropy=17.6193 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 172640] reward=-189938207.2 actor_loss=0.4213 critic_loss=12804107266366.5781 entropy=17.6188 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 172640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481410.3 mean_steps=14.8
|
|
[Episode 172650] reward=-115065347.1 actor_loss=0.2870 critic_loss=76725801915.7333 entropy=17.6322 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 172660] reward=-120553715.3 actor_loss=0.2438 critic_loss=81470008758.8571 entropy=17.6293 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 172660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550914.0 mean_steps=14.1
|
|
[Episode 172670] reward=-147413971.2 actor_loss=0.3665 critic_loss=3061303552949.0732 entropy=17.6330 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 172680] reward=-115799911.0 actor_loss=0.5274 critic_loss=91996759943.5294 entropy=17.6308 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1536 front_blocked=0
|
|
[Eval 172680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508902.1 mean_steps=14.1
|
|
[Episode 172690] reward=-121103508.6 actor_loss=0.2570 critic_loss=119040943970.4615 entropy=17.6253 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 172700] reward=-120683338.5 actor_loss=0.1814 critic_loss=88063672866.1333 entropy=17.6221 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 172700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551709.0 mean_steps=13.1
|
|
[Episode 172710] reward=-118940164.5 actor_loss=0.2059 critic_loss=81509838424.2759 entropy=17.6195 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 172720] reward=-120178408.8 actor_loss=0.3062 critic_loss=87391527160.2424 entropy=17.6172 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 172720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496147.8 mean_steps=15.1
|
|
[Episode 172730] reward=-124235765.9 actor_loss=0.2961 critic_loss=187455776085.3333 entropy=17.6311 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 172740] reward=-122401251.5 actor_loss=0.4029 critic_loss=88330675851.6364 entropy=17.6456 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 172740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443039.1 mean_steps=15.2
|
|
[Episode 172750] reward=-120930689.3 actor_loss=0.2907 critic_loss=86355848471.2727 entropy=17.6504 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 172760] reward=-114585198.4 actor_loss=0.2685 critic_loss=76091210605.7143 entropy=17.6570 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 172760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555295.8 mean_steps=12.2
|
|
[Episode 172770] reward=-112789600.1 actor_loss=0.2550 critic_loss=78335429836.8000 entropy=17.6619 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 172780] reward=-118847777.7 actor_loss=0.3139 critic_loss=80292642620.9524 entropy=17.6683 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 172780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581576.5 mean_steps=12.5
|
|
[Episode 172790] reward=-120090203.2 actor_loss=0.3652 critic_loss=85159534080.0000 entropy=17.6607 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 172800] reward=-124049697.7 actor_loss=0.3841 critic_loss=479501967360.0000 entropy=17.6783 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 172800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578012.0 mean_steps=12.7
|
|
[Episode 172810] reward=-120768797.5 actor_loss=0.3246 critic_loss=176387238570.6667 entropy=17.7000 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 172820] reward=-127753565.8 actor_loss=0.2911 critic_loss=146508484235.6364 entropy=17.7026 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 172820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-609310.4 mean_steps=12.6
|
|
[Episode 172830] reward=-114915636.7 actor_loss=0.2568 critic_loss=79891498234.3111 entropy=17.6989 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 172840] reward=-114099253.7 actor_loss=0.4055 critic_loss=78040642173.1555 entropy=17.6850 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 172840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611856.9 mean_steps=12.3
|
|
[Episode 172850] reward=-122350183.3 actor_loss=0.3079 critic_loss=88047836752.8421 entropy=17.6780 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 172860] reward=-121013688.3 actor_loss=0.3398 critic_loss=103456423936.0000 entropy=17.6710 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 172860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-619059.4 mean_steps=13.6
|
|
[Episode 172870] reward=-115532641.0 actor_loss=0.3596 critic_loss=83158066858.6667 entropy=17.6495 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 172880] reward=-121427764.9 actor_loss=0.2680 critic_loss=93857200751.3044 entropy=17.6537 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 172880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530129.5 mean_steps=14.1
|
|
[Episode 172890] reward=-122867819.2 actor_loss=0.2902 critic_loss=86358842368.0000 entropy=17.6550 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 172900] reward=-119624488.6 actor_loss=0.3883 critic_loss=84678852608.0000 entropy=17.6511 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 172900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527236.7 mean_steps=14.2
|
|
[Episode 172910] reward=-115930560.6 actor_loss=0.2960 critic_loss=94206234787.8400 entropy=17.6545 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 172920] reward=-119833864.5 actor_loss=0.3648 critic_loss=88858161607.1111 entropy=17.6518 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 172920] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-633873.9 mean_steps=11.2
|
|
[Episode 172930] reward=-115121966.7 actor_loss=0.2683 critic_loss=76607187720.8276 entropy=17.6741 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 172940] reward=-118560377.1 actor_loss=0.2934 critic_loss=86955597095.8222 entropy=17.6681 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 172940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-375949.7 mean_steps=15.3
|
|
[Episode 172950] reward=-179951970.2 actor_loss=0.5051 critic_loss=9964451311316.2930 entropy=17.6676 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 172960] reward=-122168746.9 actor_loss=0.2383 critic_loss=84383619859.6923 entropy=17.6596 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 172960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510032.3 mean_steps=13.2
|
|
[Episode 172970] reward=-120779020.1 actor_loss=0.3027 critic_loss=88410215219.2000 entropy=17.6642 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 172980] reward=-115399069.8 actor_loss=0.3929 critic_loss=80935791130.9474 entropy=17.6696 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 172980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403348.6 mean_steps=15.3
|
|
[Episode 172990] reward=-114713674.4 actor_loss=0.4002 critic_loss=82723224654.7692 entropy=17.6725 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 173000] reward=-116739557.9 actor_loss=0.3474 critic_loss=123349623913.9310 entropy=17.6679 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 173000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413431.1 mean_steps=16.1
|
|
[Episode 173010] reward=-122308823.5 actor_loss=0.2345 critic_loss=89007950080.0000 entropy=17.6618 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 173020] reward=-123474552.1 actor_loss=0.2155 critic_loss=87435705737.8462 entropy=17.6618 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 173020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540131.7 mean_steps=13.3
|
|
[Episode 173030] reward=-123772401.3 actor_loss=0.2636 critic_loss=91513348096.0000 entropy=17.6757 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 173040] reward=-116029664.5 actor_loss=0.2412 critic_loss=78891202787.5556 entropy=17.6773 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 173040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530419.3 mean_steps=13.3
|
|
[Episode 173050] reward=-122416229.8 actor_loss=0.2407 critic_loss=88957573592.6154 entropy=17.6792 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 173060] reward=-119717335.5 actor_loss=0.3111 critic_loss=87381671708.4444 entropy=17.6710 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 173060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-660743.2 mean_steps=12.2
|
|
[Episode 173070] reward=-119940745.5 actor_loss=0.1920 critic_loss=92936220330.6667 entropy=17.6598 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 173080] reward=-122579298.8 actor_loss=0.3755 critic_loss=93524615767.4146 entropy=17.6582 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 173080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-476224.0 mean_steps=15.8
|
|
[Episode 173090] reward=-118302773.7 actor_loss=0.3349 critic_loss=88158890484.6222 entropy=17.6561 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 173100] reward=-118331261.1 actor_loss=0.3285 critic_loss=84636482839.2727 entropy=17.6596 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 173100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-472390.8 mean_steps=16.9
|
|
[Episode 173110] reward=-120491901.6 actor_loss=0.2974 critic_loss=83397810930.5263 entropy=17.6606 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 173120] reward=-115667967.2 actor_loss=0.3504 critic_loss=79925020808.5333 entropy=17.6511 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 173120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491401.3 mean_steps=13.2
|
|
[Episode 173130] reward=-117765452.0 actor_loss=0.3642 critic_loss=85293458701.4737 entropy=17.6392 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 173140] reward=-120658926.2 actor_loss=0.2908 critic_loss=84469203763.2000 entropy=17.6348 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 173140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572172.4 mean_steps=13.3
|
|
[Episode 173150] reward=-114077599.1 actor_loss=0.3262 critic_loss=81836537446.4000 entropy=17.6195 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 173160] reward=-116469507.9 actor_loss=0.2623 critic_loss=80183922829.2414 entropy=17.6153 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 173160] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-710848.2 mean_steps=10.4
|
|
[Episode 173170] reward=-118838740.0 actor_loss=0.3382 critic_loss=88272522017.3913 entropy=17.6232 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 173180] reward=-120701149.7 actor_loss=0.2998 critic_loss=87035775777.3913 entropy=17.6166 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 173180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509152.5 mean_steps=14.6
|
|
[Episode 173190] reward=-119876035.6 actor_loss=0.2952 critic_loss=83156671394.9091 entropy=17.6317 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 173200] reward=-120328579.9 actor_loss=0.2622 critic_loss=89486129379.5556 entropy=17.6473 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 173200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-514980.0 mean_steps=13.2
|
|
[Episode 173210] reward=-118649872.0 actor_loss=0.2845 critic_loss=90904533037.5111 entropy=17.6578 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 173220] reward=-115902649.9 actor_loss=0.2821 critic_loss=102517007798.8571 entropy=17.6783 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 173220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437811.6 mean_steps=15.3
|
|
[Episode 173230] reward=-126664994.7 actor_loss=0.2120 critic_loss=314410037443.0476 entropy=17.6634 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 173240] reward=-123020881.9 actor_loss=0.1814 critic_loss=85962749059.2821 entropy=17.6827 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 173240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-453447.2 mean_steps=13.8
|
|
[Episode 173250] reward=-121788471.2 actor_loss=0.2430 critic_loss=85296048713.1429 entropy=17.6806 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 173260] reward=-118012679.7 actor_loss=0.3110 critic_loss=78043411941.0526 entropy=17.6610 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 173260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576397.5 mean_steps=13.4
|
|
[Episode 173270] reward=-118582364.9 actor_loss=0.2391 critic_loss=81401660952.3810 entropy=17.6588 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 173280] reward=-120734663.1 actor_loss=0.2969 critic_loss=84774993920.0000 entropy=17.6685 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 173280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467716.3 mean_steps=14.9
|
|
[Episode 173290] reward=-115253776.5 actor_loss=0.3283 critic_loss=76537679780.9778 entropy=17.6629 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 173300] reward=-121982436.8 actor_loss=0.2893 critic_loss=82434626901.3333 entropy=17.6678 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 173300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-466665.4 mean_steps=12.9
|
|
[Episode 173310] reward=-123037580.8 actor_loss=0.2114 critic_loss=84841139586.8445 entropy=17.6565 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 173320] reward=-120265059.7 actor_loss=0.3867 critic_loss=88671059421.8667 entropy=17.6436 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 173320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565666.5 mean_steps=13.7
|
|
[Episode 173330] reward=-122634691.0 actor_loss=0.1900 critic_loss=84622734950.4000 entropy=17.6530 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 173340] reward=-113984397.3 actor_loss=0.3600 critic_loss=78579754780.4444 entropy=17.6438 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 173340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-380697.2 mean_steps=17.1
|
|
[Episode 173350] reward=-116654871.8 actor_loss=0.2824 critic_loss=82868654717.1555 entropy=17.6564 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 173360] reward=-123451857.0 actor_loss=0.2654 critic_loss=90533057234.8235 entropy=17.6616 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 173360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-456511.9 mean_steps=16.8
|
|
[Episode 173370] reward=-120132562.5 actor_loss=0.2850 critic_loss=84628094976.0000 entropy=17.6724 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 173380] reward=-1420673310.6 actor_loss=0.9343 critic_loss=3239349623067047.5000 entropy=17.7055 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 173380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455807.4 mean_steps=14.8
|
|
[Episode 173390] reward=-125632697.3 actor_loss=0.1367 critic_loss=194649976376.8889 entropy=17.7319 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 173400] reward=-732444947.6 actor_loss=0.3483 critic_loss=949016634373643.3750 entropy=17.7302 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 173400] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-325794.0 mean_steps=16.6
|
|
[Episode 173410] reward=-2465426313.0 actor_loss=1.2614 critic_loss=4346445029179392.0000 entropy=17.7547 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0990 front_blocked=0
|
|
[Episode 173420] reward=-192371686.6 actor_loss=0.2713 critic_loss=16918501759385.5996 entropy=17.7728 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 173420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-370601.1 mean_steps=16.9
|
|
[Episode 173430] reward=-258106321.6 actor_loss=0.2997 critic_loss=81131379505379.5625 entropy=17.7864 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 173440] reward=-116348327.9 actor_loss=0.3762 critic_loss=84669859430.4000 entropy=17.7965 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 173440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427500.6 mean_steps=15.4
|
|
[Episode 173450] reward=-117375413.0 actor_loss=0.2749 critic_loss=90138061643.2941 entropy=17.8092 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 173460] reward=-1146224173.9 actor_loss=0.8496 critic_loss=2849340129879186.5000 entropy=17.7994 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 173460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444415.5 mean_steps=15.8
|
|
[Episode 173470] reward=-116444190.1 actor_loss=0.3005 critic_loss=89055089095.1111 entropy=17.8037 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 173480] reward=-340506518.9 actor_loss=0.2889 critic_loss=138866190677879.4688 entropy=17.8055 approx_kl=0.0014 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 173480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623692.7 mean_steps=13.1
|
|
[Episode 173490] reward=-119820730.2 actor_loss=0.4399 critic_loss=91763684875.3778 entropy=17.8310 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 173500] reward=-124927994.8 actor_loss=0.1828 critic_loss=91546297048.1778 entropy=17.8363 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 173500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526123.1 mean_steps=13.6
|
|
[Episode 173510] reward=-121899940.4 actor_loss=0.3326 critic_loss=85801402368.0000 entropy=17.8505 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 173520] reward=-2039563456.8 actor_loss=0.9569 critic_loss=4272955722825728.0000 entropy=17.8551 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 173520] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-661198.6 mean_steps=11.4
|
|
[Episode 173530] reward=-123325335.0 actor_loss=0.3268 critic_loss=112294968200.9302 entropy=17.8848 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 173540] reward=-124796008.7 actor_loss=0.3089 critic_loss=97480740329.7391 entropy=17.8866 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 173540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394194.2 mean_steps=15.1
|
|
[Episode 173550] reward=-119368684.2 actor_loss=0.3176 critic_loss=86524884127.2889 entropy=17.8927 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 173560] reward=-1450845530.5 actor_loss=0.2449 critic_loss=2161437954988077.5000 entropy=17.8883 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 173560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459414.1 mean_steps=15.3
|
|
[Episode 173570] reward=-122634678.0 actor_loss=0.3237 critic_loss=88164516196.1739 entropy=17.8929 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 173580] reward=-1766831942.8 actor_loss=11.2097 critic_loss=5941799240667409.0000 entropy=17.8984 approx_kl=0.0034 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 173580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632940.1 mean_steps=11.8
|
|
[Episode 173590] reward=-127343108.4 actor_loss=0.3293 critic_loss=98389370834.4889 entropy=17.8985 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 173600] reward=-120585207.9 actor_loss=0.3077 critic_loss=88166446080.0000 entropy=17.8995 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 173600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452509.0 mean_steps=14.1
|
|
[Episode 173610] reward=-2007070219.5 actor_loss=0.8003 critic_loss=2994967598561903.5000 entropy=17.8983 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1068 front_blocked=0
|
|
[Episode 173620] reward=-3725808106.8 actor_loss=1.6291 critic_loss=14996468156159318.0000 entropy=17.9148 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Eval 173620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460647.9 mean_steps=15.1
|
|
[Episode 173630] reward=-3575757325.7 actor_loss=1.1628 critic_loss=24530627197006724.0000 entropy=17.9447 approx_kl=0.0044 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 173640] reward=-780327572.3 actor_loss=0.6227 critic_loss=739550903373368.8750 entropy=17.9402 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 173640] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-250870.0 mean_steps=18.1
|
|
[Episode 173650] reward=-121342611.9 actor_loss=0.3599 critic_loss=89314833021.1555 entropy=17.9536 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 173660] reward=-3819298402.6 actor_loss=8.5407 critic_loss=13053689593017140.0000 entropy=17.9514 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 173660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-426119.3 mean_steps=16.4
|
|
[Episode 173670] reward=-118695188.1 actor_loss=0.2996 critic_loss=93704074763.3778 entropy=17.9857 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 173680] reward=-123591450.9 actor_loss=0.3443 critic_loss=92729240052.6222 entropy=18.0104 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 173680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511631.3 mean_steps=13.7
|
|
[Episode 173690] reward=-121322472.2 actor_loss=0.3178 critic_loss=106613007064.1778 entropy=17.9947 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 173700] reward=-121167588.4 actor_loss=0.2718 critic_loss=89828724644.9778 entropy=17.9973 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 173700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353998.1 mean_steps=17.1
|
|
[Episode 173710] reward=-1428324602.5 actor_loss=0.3526 critic_loss=2146962693073578.7500 entropy=17.9988 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 173720] reward=-717683040.7 actor_loss=0.6772 critic_loss=1007402609191594.6250 entropy=17.9939 approx_kl=0.0037 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 173720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622163.5 mean_steps=11.7
|
|
[Episode 173730] reward=-125129138.2 actor_loss=0.1484 critic_loss=91769381626.0465 entropy=17.9835 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 173740] reward=-118788136.7 actor_loss=0.3130 critic_loss=87759677940.6222 entropy=17.9647 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 173740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473819.2 mean_steps=14.6
|
|
[Episode 173750] reward=-125863097.9 actor_loss=0.1967 critic_loss=89263227790.2222 entropy=17.9674 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 173760] reward=-115877744.1 actor_loss=0.3467 critic_loss=84978105821.8667 entropy=17.9567 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 173760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360717.1 mean_steps=16.2
|
|
[Episode 173770] reward=-115846568.9 actor_loss=0.2574 critic_loss=83698628835.5556 entropy=17.9232 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 173780] reward=-122769637.8 actor_loss=0.2920 critic_loss=88522502781.1555 entropy=17.9275 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 173780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457766.8 mean_steps=15.6
|
|
[Episode 173790] reward=-118785110.3 actor_loss=0.3246 critic_loss=81345615735.4667 entropy=17.9272 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 173800] reward=-120812219.5 actor_loss=0.2373 critic_loss=87252618444.8000 entropy=17.9065 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 173800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483682.3 mean_steps=13.7
|
|
[Episode 173810] reward=-123676576.8 actor_loss=0.2363 critic_loss=88643016385.4222 entropy=17.9003 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 173820] reward=-124518528.7 actor_loss=0.2674 critic_loss=87156955909.6889 entropy=17.8863 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 173820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597567.5 mean_steps=12.6
|
|
[Episode 173830] reward=-118653562.4 actor_loss=0.2002 critic_loss=86058610153.7391 entropy=17.8770 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 173840] reward=-119176530.8 actor_loss=0.3661 critic_loss=86090632900.9231 entropy=17.8756 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 173840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532677.0 mean_steps=13.2
|
|
[Episode 173850] reward=-127239823.8 actor_loss=0.3045 critic_loss=96546320699.0769 entropy=17.8828 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 173860] reward=-120879659.1 actor_loss=0.2568 critic_loss=87717378821.6889 entropy=17.8947 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 173860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490176.8 mean_steps=15.2
|
|
[Episode 173870] reward=-1164916796.8 actor_loss=0.3977 critic_loss=2931778087516023.5000 entropy=17.9054 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 173880] reward=-119348632.5 actor_loss=0.3561 critic_loss=88283761112.6154 entropy=17.8998 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 173880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589776.9 mean_steps=12.6
|
|
[Episode 173890] reward=-525848462.0 actor_loss=0.5211 critic_loss=350681920392011.3125 entropy=17.9164 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 173900] reward=-118692579.8 actor_loss=0.2554 critic_loss=86271729664.0000 entropy=17.9079 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 173900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-650996.2 mean_steps=12.2
|
|
[Episode 173910] reward=-123886944.2 actor_loss=0.2766 critic_loss=91612470613.3333 entropy=17.9058 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 173920] reward=-464693698.4 actor_loss=0.3011 critic_loss=299943320086575.6250 entropy=17.9392 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 173920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-3953002.4 mean_steps=16.2
|
|
[Episode 173930] reward=-124396579.8 actor_loss=0.3130 critic_loss=94959876869.6889 entropy=17.9455 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 173940] reward=-124961487.4 actor_loss=0.2523 critic_loss=92195638980.9231 entropy=17.9445 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 173940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420044.9 mean_steps=14.2
|
|
[Episode 173950] reward=-123162195.1 actor_loss=0.3353 critic_loss=103794039361.6410 entropy=17.9458 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 173960] reward=-147231988.6 actor_loss=0.3460 critic_loss=2645200941875.2002 entropy=17.9550 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 173960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-404213.3 mean_steps=14.2
|
|
[Episode 173970] reward=-121350246.6 actor_loss=0.3575 critic_loss=88249881577.2444 entropy=17.9697 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 173980] reward=-474671066.7 actor_loss=0.3668 critic_loss=355286384360379.7500 entropy=17.9708 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 173980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546600.3 mean_steps=13.5
|
|
[Episode 173990] reward=-124011744.8 actor_loss=0.2628 critic_loss=103101571072.0000 entropy=17.9608 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 174000] reward=-120230290.0 actor_loss=0.3050 critic_loss=85533573848.1778 entropy=17.9689 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 174000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-454748.5 mean_steps=13.8
|
|
[Episode 174010] reward=-122451455.4 actor_loss=0.2027 critic_loss=96614747844.9231 entropy=17.9623 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 174020] reward=-119096505.1 actor_loss=0.3412 critic_loss=84283448797.8667 entropy=17.9662 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 174020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-568541.6 mean_steps=12.5
|
|
[Episode 174030] reward=-123721120.1 actor_loss=0.1735 critic_loss=93582170794.6667 entropy=17.9532 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 174040] reward=-120732814.7 actor_loss=0.3696 critic_loss=90751593278.2703 entropy=17.9451 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 174040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527112.2 mean_steps=13.8
|
|
[Episode 174050] reward=-116864353.4 actor_loss=0.3862 critic_loss=87046157653.3333 entropy=17.9796 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 174060] reward=-122624228.4 actor_loss=0.2644 critic_loss=90738006243.5556 entropy=17.9663 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 174060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-587532.4 mean_steps=11.8
|
|
[Episode 174070] reward=-121368168.0 actor_loss=0.3559 critic_loss=89072679321.6000 entropy=17.9732 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 174080] reward=-122760204.8 actor_loss=0.3540 critic_loss=93108588635.0222 entropy=17.9643 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 174080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502018.9 mean_steps=14.8
|
|
[Episode 174090] reward=-122717919.4 actor_loss=0.2692 critic_loss=86045128749.5111 entropy=17.9514 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 174100] reward=-123681921.5 actor_loss=0.3210 critic_loss=89370564653.5111 entropy=17.9328 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 174100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493027.2 mean_steps=15.4
|
|
[Episode 174110] reward=-123623068.1 actor_loss=0.1941 critic_loss=86525620132.9778 entropy=17.9370 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 174120] reward=-125789065.8 actor_loss=0.3171 critic_loss=92911180299.9070 entropy=17.9207 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 174120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419149.3 mean_steps=15.4
|
|
[Episode 174130] reward=-120420441.5 actor_loss=0.2565 critic_loss=87077789149.8667 entropy=17.9339 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 174140] reward=-122871235.6 actor_loss=0.1453 critic_loss=86797706854.4000 entropy=17.9007 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 174140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-534715.1 mean_steps=15.2
|
|
[Episode 174150] reward=-122036783.5 actor_loss=0.3047 critic_loss=87677292635.0222 entropy=17.8873 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 174160] reward=-121844209.8 actor_loss=0.2766 critic_loss=87234524891.4286 entropy=17.8965 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 174160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417959.0 mean_steps=16.2
|
|
[Episode 174170] reward=-122563215.5 actor_loss=0.2477 critic_loss=88332678667.3778 entropy=17.8949 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 174180] reward=-124417504.3 actor_loss=0.3377 critic_loss=91523913955.5556 entropy=17.8758 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 174180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447702.8 mean_steps=14.7
|
|
[Episode 174190] reward=-116715538.9 actor_loss=0.5375 critic_loss=85217770245.6889 entropy=17.8691 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 174200] reward=-129293783.2 actor_loss=0.2499 critic_loss=131161003895.4667 entropy=17.8568 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 174200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-625926.3 mean_steps=13.0
|
|
[Episode 174210] reward=-123950268.5 actor_loss=0.3695 critic_loss=94275572531.2000 entropy=17.8469 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 174220] reward=-122566874.7 actor_loss=0.3011 critic_loss=90694219093.3333 entropy=17.8281 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 174220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-349153.3 mean_steps=15.7
|
|
[Episode 174230] reward=-118921424.3 actor_loss=0.2195 critic_loss=97209724108.8000 entropy=17.8112 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 174240] reward=-125997341.5 actor_loss=0.2602 critic_loss=93733965004.8000 entropy=17.8218 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 174240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-599954.8 mean_steps=11.9
|
|
[Episode 174250] reward=-124725754.7 actor_loss=0.3318 critic_loss=88585382054.0540 entropy=17.8098 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 174260] reward=-121382196.5 actor_loss=0.3317 critic_loss=85001327775.2889 entropy=17.7825 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 174260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573018.1 mean_steps=12.4
|
|
[Episode 174270] reward=-118907447.3 actor_loss=0.1813 critic_loss=80564283115.2432 entropy=17.7689 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 174280] reward=-121378499.8 actor_loss=0.2345 critic_loss=86218684825.6000 entropy=17.7679 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 174280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420952.1 mean_steps=15.3
|
|
[Episode 174290] reward=-116665188.0 actor_loss=0.4370 critic_loss=82306274099.2000 entropy=17.7656 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 174300] reward=-121339267.0 actor_loss=0.3430 critic_loss=86831097800.6487 entropy=17.7525 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 174300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526288.6 mean_steps=14.2
|
|
[Episode 174310] reward=-118410127.6 actor_loss=0.3597 critic_loss=84744352563.2000 entropy=17.7541 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 174320] reward=-123468388.9 actor_loss=0.3192 critic_loss=218093658112.0000 entropy=17.7251 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 174320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-647826.8 mean_steps=12.2
|
|
[Episode 174330] reward=-119103896.6 actor_loss=0.2487 critic_loss=85211654553.6000 entropy=17.7183 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 174340] reward=-124001761.7 actor_loss=0.2882 critic_loss=86984772333.2683 entropy=17.7197 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 174340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-321544.2 mean_steps=16.4
|
|
[Episode 174350] reward=-119200503.1 actor_loss=0.3150 critic_loss=85566411727.2381 entropy=17.7192 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 174360] reward=-121945889.4 actor_loss=0.3505 critic_loss=94875540210.5263 entropy=17.7206 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 174360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535559.4 mean_steps=14.3
|
|
[Episode 174370] reward=-121827805.1 actor_loss=0.3887 critic_loss=84480727917.7143 entropy=17.7237 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 174380] reward=-118581022.3 actor_loss=0.3183 critic_loss=82187285684.7059 entropy=17.7227 approx_kl=0.0042 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 174380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420921.4 mean_steps=15.2
|
|
[Episode 174390] reward=-118236926.4 actor_loss=0.3237 critic_loss=81103367964.4444 entropy=17.7066 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 174400] reward=-142028956.6 actor_loss=0.4082 critic_loss=2367141204628.6450 entropy=17.7075 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 174400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520741.0 mean_steps=13.0
|
|
[Episode 174410] reward=-119573893.9 actor_loss=0.3611 critic_loss=90868437955.7647 entropy=17.7195 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 174420] reward=-118414108.7 actor_loss=0.4262 critic_loss=86955735917.7143 entropy=17.7201 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 174420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556754.5 mean_steps=14.3
|
|
[Episode 174430] reward=-121992842.1 actor_loss=0.2516 critic_loss=87405145331.8095 entropy=17.7250 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 174440] reward=-118793803.9 actor_loss=0.3949 critic_loss=96706137656.8889 entropy=17.7216 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 174440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-626533.6 mean_steps=13.8
|
|
[Episode 174450] reward=-125542769.7 actor_loss=0.2795 critic_loss=93126428444.4444 entropy=17.7242 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 174460] reward=-120065961.0 actor_loss=0.3539 critic_loss=86240650661.6471 entropy=17.7298 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 174460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461068.9 mean_steps=14.4
|
|
[Episode 174470] reward=-119872186.6 actor_loss=0.2769 critic_loss=84995467758.3448 entropy=17.7300 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 174480] reward=-141826632.1 actor_loss=0.1982 critic_loss=1437217490206.7200 entropy=17.7289 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 174480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495743.1 mean_steps=14.0
|
|
[Episode 174490] reward=-121914309.0 actor_loss=0.2284 critic_loss=104237462750.6087 entropy=17.7247 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 174500] reward=-549385174.3 actor_loss=0.2887 critic_loss=563540680687980.1250 entropy=17.7344 approx_kl=-0.0018 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 174500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-659279.1 mean_steps=12.4
|
|
[Episode 174510] reward=-122667243.8 actor_loss=0.2544 critic_loss=91126806461.9355 entropy=17.7544 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 174520] reward=-117337186.4 actor_loss=0.2794 critic_loss=92587209908.7059 entropy=17.7450 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 174520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472840.0 mean_steps=15.2
|
|
[Episode 174530] reward=-121651280.6 actor_loss=0.2727 critic_loss=85306585088.0000 entropy=17.7399 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 174540] reward=-120357036.3 actor_loss=0.3633 critic_loss=81725084623.2381 entropy=17.7364 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 174540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-594851.4 mean_steps=11.8
|
|
[Episode 174550] reward=-120654585.9 actor_loss=0.2822 critic_loss=86702096110.9333 entropy=17.7454 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 174560] reward=-120796079.7 actor_loss=0.2000 critic_loss=83347510067.2000 entropy=17.7503 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 174560] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-282373.2 mean_steps=17.3
|
|
[Episode 174570] reward=-116860664.5 actor_loss=0.2705 critic_loss=74963276777.2444 entropy=17.7355 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 174580] reward=-122085315.9 actor_loss=0.2434 critic_loss=124825022919.1111 entropy=17.7316 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 174580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-482315.6 mean_steps=15.7
|
|
[Episode 174590] reward=-117723393.7 actor_loss=0.4235 critic_loss=86426353664.0000 entropy=17.7204 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 174600] reward=-126139091.1 actor_loss=0.3126 critic_loss=97358948731.2593 entropy=17.7171 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 174600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443391.5 mean_steps=15.2
|
|
[Episode 174610] reward=-119099327.3 actor_loss=0.3008 critic_loss=78010622255.4074 entropy=17.6999 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 174620] reward=-117873939.3 actor_loss=0.3432 critic_loss=84132809289.1429 entropy=17.6834 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 174620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501987.7 mean_steps=15.2
|
|
[Episode 174630] reward=-122047557.5 actor_loss=0.2907 critic_loss=86791155712.0000 entropy=17.6839 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 174640] reward=-117712371.0 actor_loss=0.3331 critic_loss=82413785734.7368 entropy=17.7006 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 174640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-372594.3 mean_steps=15.1
|
|
[Episode 174650] reward=-118371261.1 actor_loss=0.3197 critic_loss=88360476672.0000 entropy=17.7097 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 174660] reward=-121969866.8 actor_loss=0.2541 critic_loss=83770066620.6316 entropy=17.7016 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 174660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-555986.9 mean_steps=14.1
|
|
[Episode 174670] reward=-121330293.1 actor_loss=0.2393 critic_loss=93332507525.1200 entropy=17.6828 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 174680] reward=-120221733.3 actor_loss=0.2479 critic_loss=104164847081.7391 entropy=17.6696 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 174680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-635583.9 mean_steps=12.8
|
|
[Episode 174690] reward=-120628725.2 actor_loss=0.2231 critic_loss=80796977152.0000 entropy=17.6657 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 174700] reward=-124719341.5 actor_loss=0.3023 critic_loss=95437032106.6667 entropy=17.6538 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 174700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-607218.6 mean_steps=12.0
|
|
[Episode 174710] reward=-117023551.5 actor_loss=0.3680 critic_loss=78806198717.2174 entropy=17.6512 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 174720] reward=-118202695.2 actor_loss=0.2886 critic_loss=78282382252.9730 entropy=17.6560 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 174720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527743.0 mean_steps=14.1
|
|
[Episode 174730] reward=-120931331.8 actor_loss=0.2361 critic_loss=81755884930.8445 entropy=17.6444 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 174740] reward=-121879921.0 actor_loss=0.3189 critic_loss=118778656267.3778 entropy=17.6501 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 174740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532400.7 mean_steps=14.3
|
|
[Episode 174750] reward=-118406636.1 actor_loss=0.2887 critic_loss=79254354944.0000 entropy=17.6311 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 174760] reward=-117660176.7 actor_loss=0.3043 critic_loss=79386413010.4889 entropy=17.6252 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 174760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544065.0 mean_steps=14.2
|
|
[Episode 174770] reward=-116308072.9 actor_loss=0.3002 critic_loss=77269919152.3556 entropy=17.6226 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 174780] reward=-122234463.1 actor_loss=0.3186 critic_loss=85952670281.1429 entropy=17.6300 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 174780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494781.3 mean_steps=15.1
|
|
[Episode 174790] reward=-119712235.7 actor_loss=0.2921 critic_loss=80845185934.2222 entropy=17.6369 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 174800] reward=-116372275.7 actor_loss=0.3624 critic_loss=82092025297.4545 entropy=17.6336 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 174800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379428.8 mean_steps=15.8
|
|
[Episode 174810] reward=-118368655.7 actor_loss=0.3292 critic_loss=86308565691.3171 entropy=17.6414 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 174820] reward=-115709275.3 actor_loss=0.3347 critic_loss=82847426497.9394 entropy=17.6226 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 174820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416447.5 mean_steps=16.2
|
|
[Episode 174830] reward=-121071625.0 actor_loss=0.2870 critic_loss=94871230691.5556 entropy=17.6355 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 174840] reward=-119148484.1 actor_loss=0.2304 critic_loss=86715404288.0000 entropy=17.6229 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 174840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-719744.5 mean_steps=12.7
|
|
[Episode 174850] reward=-123430344.3 actor_loss=0.3059 critic_loss=108794167623.6800 entropy=17.6100 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 174860] reward=-117685834.8 actor_loss=0.3838 critic_loss=82533341593.6000 entropy=17.6098 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 174860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480872.9 mean_steps=15.1
|
|
[Episode 174870] reward=-123417731.8 actor_loss=0.3803 critic_loss=160021942604.1081 entropy=17.6165 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 174880] reward=-122106159.9 actor_loss=0.2491 critic_loss=100138971782.7368 entropy=17.6101 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 174880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599868.3 mean_steps=12.9
|
|
[Episode 174890] reward=-120601688.5 actor_loss=0.2343 critic_loss=85431468404.3636 entropy=17.6121 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 174900] reward=-112712012.5 actor_loss=0.3233 critic_loss=80675803627.5200 entropy=17.6097 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 174900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477199.4 mean_steps=14.9
|
|
[Episode 174910] reward=-116711006.1 actor_loss=0.2748 critic_loss=82432022625.5238 entropy=17.5971 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 174920] reward=-120413099.2 actor_loss=0.2386 critic_loss=87953450052.2667 entropy=17.5951 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 174920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-605683.8 mean_steps=12.7
|
|
[Episode 174930] reward=-114566831.6 actor_loss=0.3846 critic_loss=78989114895.5152 entropy=17.6017 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 174940] reward=-118169715.1 actor_loss=0.3760 critic_loss=84462841615.0588 entropy=17.5971 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 174940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-623211.8 mean_steps=14.2
|
|
[Episode 174950] reward=-119299799.9 actor_loss=0.3596 critic_loss=86564856100.5714 entropy=17.6009 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 174960] reward=-121634539.6 actor_loss=0.3050 critic_loss=83663594200.1778 entropy=17.5851 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 174960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491001.8 mean_steps=14.0
|
|
[Episode 174970] reward=-119389463.7 actor_loss=0.2691 critic_loss=86272545490.8235 entropy=17.5669 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 174980] reward=-119780480.6 actor_loss=0.3755 critic_loss=85423617280.0000 entropy=17.5638 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 174980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401604.3 mean_steps=15.6
|
|
[Episode 174990] reward=-119801288.3 actor_loss=0.3100 critic_loss=87830098550.1538 entropy=17.5708 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 175000] reward=-115284092.8 actor_loss=0.2798 critic_loss=77051297063.8222 entropy=17.5508 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 175000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516649.5 mean_steps=13.6
|
|
[Episode 175010] reward=-119082050.5 actor_loss=0.2949 critic_loss=83748368384.0000 entropy=17.5451 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 175020] reward=-118637378.9 actor_loss=0.3191 critic_loss=85308623140.5714 entropy=17.5438 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 175020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569358.1 mean_steps=13.2
|
|
[Episode 175030] reward=-115512355.6 actor_loss=0.3020 critic_loss=79129232015.3600 entropy=17.5390 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 175040] reward=-124438567.7 actor_loss=0.1866 critic_loss=88479975424.0000 entropy=17.5342 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 175040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-533503.2 mean_steps=15.2
|
|
[Episode 175050] reward=-116398367.8 actor_loss=0.3297 critic_loss=80462434596.5714 entropy=17.5364 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 175060] reward=-123842284.1 actor_loss=0.2667 critic_loss=90093566109.5385 entropy=17.5256 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 175060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534274.2 mean_steps=13.3
|
|
[Episode 175070] reward=-118565687.8 actor_loss=0.2293 critic_loss=84379857387.5200 entropy=17.5305 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 175080] reward=-119778648.4 actor_loss=0.2953 critic_loss=84295161514.6667 entropy=17.5327 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 175080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483543.0 mean_steps=15.9
|
|
[Episode 175090] reward=-117833252.9 actor_loss=0.4230 critic_loss=78198447347.8095 entropy=17.5319 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1517 front_blocked=0
|
|
[Episode 175100] reward=-117708865.4 actor_loss=0.2791 critic_loss=78740272250.8800 entropy=17.5273 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 175100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540939.8 mean_steps=14.2
|
|
[Episode 175110] reward=-122071626.8 actor_loss=0.1764 critic_loss=82152553221.6889 entropy=17.5127 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 175120] reward=-120243906.7 actor_loss=0.3005 critic_loss=85385691136.0000 entropy=17.5191 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 175120] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-273174.0 mean_steps=18.2
|
|
[Episode 175130] reward=-123936131.4 actor_loss=0.2502 critic_loss=85662108160.0000 entropy=17.5165 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 175140] reward=-117463555.5 actor_loss=0.4241 critic_loss=83508508409.4359 entropy=17.5180 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 175140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500145.6 mean_steps=15.2
|
|
[Episode 175150] reward=-117829871.6 actor_loss=0.2260 critic_loss=80024052208.4848 entropy=17.5180 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 175160] reward=-121336703.2 actor_loss=0.3133 critic_loss=82533579889.7778 entropy=17.5154 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 175160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427398.1 mean_steps=15.3
|
|
[Episode 175170] reward=-117437355.7 actor_loss=0.2718 critic_loss=79981917306.8800 entropy=17.5261 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 175180] reward=-115490579.4 actor_loss=0.4268 critic_loss=81268649560.2759 entropy=17.5157 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 175180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614993.4 mean_steps=12.5
|
|
[Episode 175190] reward=-121683691.4 actor_loss=0.2554 critic_loss=88534969230.2222 entropy=17.5189 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 175200] reward=-124630097.9 actor_loss=0.2491 critic_loss=87279882649.6000 entropy=17.5177 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 175200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431611.7 mean_steps=14.1
|
|
[Episode 175210] reward=-116176963.9 actor_loss=0.3028 critic_loss=76339505288.5333 entropy=17.5222 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 175220] reward=-121416756.8 actor_loss=0.2389 critic_loss=85778186682.8108 entropy=17.5237 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 175220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458022.3 mean_steps=15.6
|
|
[Episode 175230] reward=-115915011.8 actor_loss=0.3278 critic_loss=80123402012.4444 entropy=17.5425 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 175240] reward=-119777492.5 actor_loss=0.2867 critic_loss=80408950626.4615 entropy=17.5509 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 175240] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-724040.2 mean_steps=11.6
|
|
[Episode 175250] reward=-116832318.6 actor_loss=0.3099 critic_loss=79740725061.8182 entropy=17.5421 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 175260] reward=-112981787.2 actor_loss=0.2873 critic_loss=75523817115.8261 entropy=17.5487 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 175260] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-674655.8 mean_steps=11.3
|
|
[Episode 175270] reward=-122940610.2 actor_loss=0.3217 critic_loss=85151016667.4286 entropy=17.5529 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 175280] reward=-117734433.6 actor_loss=0.2573 critic_loss=80519074816.0000 entropy=17.5489 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 175280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499664.5 mean_steps=14.1
|
|
[Episode 175290] reward=-114750618.9 actor_loss=0.3729 critic_loss=76644760429.7143 entropy=17.5489 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 175300] reward=-121472756.2 actor_loss=0.2940 critic_loss=85472818869.6774 entropy=17.5397 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 175300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511273.2 mean_steps=12.8
|
|
[Episode 175310] reward=-122924050.2 actor_loss=0.2317 critic_loss=87518109199.5152 entropy=17.5289 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 175320] reward=-118162791.9 actor_loss=0.3602 critic_loss=82250405205.3333 entropy=17.5381 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 175320] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-605935.9 mean_steps=10.8
|
|
[Episode 175330] reward=-117000133.6 actor_loss=0.3225 critic_loss=77830545092.9231 entropy=17.5243 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 175340] reward=-118695527.5 actor_loss=0.3820 critic_loss=82553879405.7143 entropy=17.5295 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 175340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-365665.7 mean_steps=14.9
|
|
[Episode 175350] reward=-120152191.8 actor_loss=0.3242 critic_loss=83630647614.5778 entropy=17.5245 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 175360] reward=-120989246.4 actor_loss=0.3080 critic_loss=87938506361.9048 entropy=17.5190 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 175360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526271.1 mean_steps=13.1
|
|
[Episode 175370] reward=-117683590.2 actor_loss=0.3082 critic_loss=79133067605.3333 entropy=17.5273 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 175380] reward=-122995539.2 actor_loss=0.2173 critic_loss=82642369266.5263 entropy=17.5196 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 175380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500883.2 mean_steps=13.7
|
|
[Episode 175390] reward=-120850794.5 actor_loss=0.2762 critic_loss=84512239988.3636 entropy=17.5220 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 175400] reward=-119211379.8 actor_loss=0.3710 critic_loss=82850600150.3256 entropy=17.5233 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 175400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-372418.5 mean_steps=16.0
|
|
[Episode 175410] reward=-118497675.8 actor_loss=0.2924 critic_loss=80778536172.3077 entropy=17.5255 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 175420] reward=-120656480.2 actor_loss=0.3952 critic_loss=83793585573.6471 entropy=17.5235 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 175420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549455.4 mean_steps=14.2
|
|
[Episode 175430] reward=-121448935.3 actor_loss=0.3051 critic_loss=85849859623.3846 entropy=17.5250 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 175440] reward=-121954279.6 actor_loss=0.3124 critic_loss=89119711596.0889 entropy=17.5168 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 175440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-585156.1 mean_steps=13.7
|
|
[Episode 175450] reward=-121853615.3 actor_loss=0.2716 critic_loss=83845401941.3333 entropy=17.5198 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 175460] reward=-121389595.2 actor_loss=0.2312 critic_loss=80216269281.8824 entropy=17.5156 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 175460] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359268.1 mean_steps=17.2
|
|
[Episode 175470] reward=-116254536.4 actor_loss=0.3008 critic_loss=82957980558.2222 entropy=17.5022 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 175480] reward=-119239474.7 actor_loss=0.3592 critic_loss=85523177472.0000 entropy=17.5134 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 175480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-631952.5 mean_steps=13.0
|
|
[Episode 175490] reward=-117322823.6 actor_loss=0.3798 critic_loss=82481931170.9091 entropy=17.5162 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 175500] reward=-116914693.5 actor_loss=0.2899 critic_loss=77942130278.4000 entropy=17.5159 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 175500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-345208.6 mean_steps=15.6
|
|
[Episode 175510] reward=-121692346.5 actor_loss=0.2799 critic_loss=83269478172.4444 entropy=17.5030 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 175520] reward=-115171125.4 actor_loss=0.2744 critic_loss=78176614627.5556 entropy=17.4965 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 175520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578113.0 mean_steps=12.7
|
|
[Episode 175530] reward=-117010026.3 actor_loss=0.3846 critic_loss=83341407378.2857 entropy=17.4987 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 175540] reward=-119601515.4 actor_loss=0.3101 critic_loss=83077115552.9143 entropy=17.5011 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 175540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587594.8 mean_steps=13.6
|
|
[Episode 175550] reward=-119022725.4 actor_loss=0.2702 critic_loss=80464852397.4194 entropy=17.5110 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 175560] reward=-117067288.3 actor_loss=0.2346 critic_loss=80183892125.5385 entropy=17.5051 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 175560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-448151.3 mean_steps=16.4
|
|
[Episode 175570] reward=-115780836.2 actor_loss=0.2494 critic_loss=77807527865.3793 entropy=17.5112 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 175580] reward=-116312019.1 actor_loss=0.3488 critic_loss=80143738946.0645 entropy=17.5144 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 175580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484395.2 mean_steps=14.6
|
|
[Episode 175590] reward=-114322811.4 actor_loss=0.3206 critic_loss=77793241770.6667 entropy=17.5246 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 175600] reward=-120505093.0 actor_loss=0.3349 critic_loss=86067988252.4444 entropy=17.5523 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 175600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591876.1 mean_steps=13.4
|
|
[Episode 175610] reward=-118470333.9 actor_loss=0.2853 critic_loss=82645954468.9778 entropy=17.5246 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 175620] reward=-118554604.1 actor_loss=0.2881 critic_loss=80872552675.5556 entropy=17.5328 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 175620] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-343137.0 mean_steps=17.7
|
|
[Episode 175630] reward=-116392638.4 actor_loss=0.3606 critic_loss=81800640739.5556 entropy=17.5254 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 175640] reward=-119143046.4 actor_loss=0.2559 critic_loss=82491367610.1818 entropy=17.5109 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 175640] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612657.6 mean_steps=11.9
|
|
[Episode 175650] reward=-116556692.3 actor_loss=0.2139 critic_loss=80039928951.0698 entropy=17.5176 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 175660] reward=-111777441.6 actor_loss=0.3893 critic_loss=79931423857.7778 entropy=17.5045 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 175660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473678.4 mean_steps=13.8
|
|
[Episode 175670] reward=-106111904.0 actor_loss=0.3684 critic_loss=71827997309.1555 entropy=17.5270 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 175680] reward=-116219452.9 actor_loss=0.2996 critic_loss=84367677303.4667 entropy=17.5275 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 175680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528738.1 mean_steps=14.4
|
|
[Episode 175690] reward=-113440422.8 actor_loss=0.3144 critic_loss=77360699255.4667 entropy=17.5212 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 175700] reward=-121616808.8 actor_loss=0.2176 critic_loss=82703688044.0889 entropy=17.5159 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 175700] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-630119.6 mean_steps=10.8
|
|
[Episode 175710] reward=-119084822.5 actor_loss=0.3124 critic_loss=86028878370.1333 entropy=17.5094 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 175720] reward=-119878516.0 actor_loss=0.1586 critic_loss=80212033154.9767 entropy=17.5039 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 175720] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-655953.5 mean_steps=11.2
|
|
[Episode 175730] reward=-112290398.9 actor_loss=0.3490 critic_loss=74848599848.4211 entropy=17.5160 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 175740] reward=-121428246.6 actor_loss=0.2711 critic_loss=90074144403.9111 entropy=17.5191 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 175740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537785.6 mean_steps=13.3
|
|
[Episode 175750] reward=-113296397.3 actor_loss=0.3682 critic_loss=76119190186.6667 entropy=17.5135 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 175760] reward=-108303561.3 actor_loss=0.3008 critic_loss=73987587150.7692 entropy=17.5263 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 175760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451512.8 mean_steps=14.3
|
|
[Episode 175770] reward=-117298606.3 actor_loss=0.3351 critic_loss=76154347884.0889 entropy=17.5283 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 175780] reward=-122590791.6 actor_loss=0.3229 critic_loss=89232218794.6667 entropy=17.5242 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 175780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442275.4 mean_steps=15.5
|
|
[Episode 175790] reward=-114387491.9 actor_loss=0.3149 critic_loss=76458461742.5455 entropy=17.5223 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 175800] reward=-122598462.2 actor_loss=0.3403 critic_loss=85176736067.3684 entropy=17.5325 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 175800] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-612887.4 mean_steps=10.8
|
|
[Episode 175810] reward=-116313224.2 actor_loss=0.2924 critic_loss=81658122539.7073 entropy=17.5446 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 175820] reward=-121001180.5 actor_loss=0.2706 critic_loss=82447518881.6842 entropy=17.5342 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 175820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457162.1 mean_steps=13.8
|
|
[Episode 175830] reward=-116551651.5 actor_loss=0.2128 critic_loss=83086329406.4390 entropy=17.5523 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 175840] reward=-115733325.0 actor_loss=0.2568 critic_loss=77748254583.4667 entropy=17.5379 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 175840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410955.4 mean_steps=15.8
|
|
[Episode 175850] reward=-118947923.3 actor_loss=0.2552 critic_loss=83189632078.7692 entropy=17.5589 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 175860] reward=-119743927.9 actor_loss=0.2856 critic_loss=81505436922.3111 entropy=17.5464 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 175860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445508.5 mean_steps=15.1
|
|
[Episode 175870] reward=-119061650.3 actor_loss=0.3363 critic_loss=134347711049.1429 entropy=17.5522 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 175880] reward=-117254716.9 actor_loss=0.2986 critic_loss=80121832789.3333 entropy=17.5737 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 175880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478726.1 mean_steps=14.7
|
|
[Episode 175890] reward=-115569070.8 actor_loss=0.2341 critic_loss=75305012428.8000 entropy=17.5680 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 175900] reward=-123232889.2 actor_loss=0.2100 critic_loss=84012501947.7333 entropy=17.5644 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 175900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566518.7 mean_steps=12.6
|
|
[Episode 175910] reward=-118189755.4 actor_loss=0.3520 critic_loss=81783171208.5333 entropy=17.5752 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 175920] reward=-120876608.1 actor_loss=0.2890 critic_loss=86970888533.3333 entropy=17.5632 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 175920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505405.0 mean_steps=13.9
|
|
[Episode 175930] reward=-118436342.8 actor_loss=0.3217 critic_loss=80499562723.5556 entropy=17.5657 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 175940] reward=-115918571.7 actor_loss=0.3184 critic_loss=80922555733.3333 entropy=17.5690 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 175940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548246.1 mean_steps=13.2
|
|
[Episode 175950] reward=-117889322.9 actor_loss=0.2156 critic_loss=81133519667.2000 entropy=17.5602 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 175960] reward=-118446077.2 actor_loss=0.3512 critic_loss=81032050874.1818 entropy=17.5573 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 175960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492951.2 mean_steps=14.0
|
|
[Episode 175970] reward=-116015241.2 actor_loss=0.2593 critic_loss=74316121884.4444 entropy=17.5691 approx_kl=0.0044 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 175980] reward=-115028857.3 actor_loss=0.2464 critic_loss=82911029306.5143 entropy=17.5662 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 175980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514091.2 mean_steps=13.8
|
|
[Episode 175990] reward=-118017091.3 actor_loss=0.3400 critic_loss=80243702101.3333 entropy=17.5718 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 176000] reward=-117889628.4 actor_loss=0.3173 critic_loss=84874199222.0444 entropy=17.5553 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 176000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-716335.9 mean_steps=11.7
|
|
[Episode 176010] reward=-114490146.0 actor_loss=0.3549 critic_loss=77301635686.4000 entropy=17.5451 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 176020] reward=-107159039.3 actor_loss=0.3661 critic_loss=73664974301.8667 entropy=17.5352 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 176020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-441562.4 mean_steps=16.4
|
|
[Episode 176030] reward=-115178693.5 actor_loss=0.3441 critic_loss=78976039685.6889 entropy=17.5236 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 176040] reward=-116070607.9 actor_loss=0.2969 critic_loss=81170695418.3111 entropy=17.5140 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 176040] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-672961.3 mean_steps=10.9
|
|
[Episode 176050] reward=-118267072.6 actor_loss=0.2772 critic_loss=84492296704.0000 entropy=17.5262 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 176060] reward=-118574350.3 actor_loss=0.3319 critic_loss=77221307096.1778 entropy=17.5191 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 176060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446966.0 mean_steps=14.4
|
|
[Episode 176070] reward=-116316644.6 actor_loss=0.3875 critic_loss=81275163115.5200 entropy=17.5061 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 176080] reward=-114514155.6 actor_loss=0.3692 critic_loss=82605751016.7273 entropy=17.5123 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 176080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527061.9 mean_steps=14.1
|
|
[Episode 176090] reward=-120627703.2 actor_loss=0.2684 critic_loss=84813102193.7778 entropy=17.5075 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 176100] reward=-109780314.2 actor_loss=0.3613 critic_loss=76792320455.1111 entropy=17.5146 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 176100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630269.8 mean_steps=12.7
|
|
[Episode 176110] reward=-113908717.6 actor_loss=0.3462 critic_loss=79774727909.5172 entropy=17.5180 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 176120] reward=-119650083.6 actor_loss=0.1257 critic_loss=82016880025.6000 entropy=17.5257 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 176120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-600382.1 mean_steps=11.7
|
|
[Episode 176130] reward=-118510880.2 actor_loss=0.3032 critic_loss=80962512304.3556 entropy=17.5215 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 176140] reward=-118685308.5 actor_loss=0.2560 critic_loss=88042321975.3513 entropy=17.5180 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 176140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497644.0 mean_steps=14.5
|
|
[Episode 176150] reward=-116419104.0 actor_loss=0.2745 critic_loss=76768297301.3333 entropy=17.5091 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 176160] reward=-114935955.7 actor_loss=0.3342 critic_loss=78159156471.1724 entropy=17.5221 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 176160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561166.8 mean_steps=13.0
|
|
[Episode 176170] reward=-113955466.6 actor_loss=0.3578 critic_loss=80202953334.1538 entropy=17.5121 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 176180] reward=-114374195.8 actor_loss=0.3382 critic_loss=76557203683.5556 entropy=17.5087 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 176180] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-682432.4 mean_steps=11.5
|
|
[Episode 176190] reward=-120036188.6 actor_loss=0.3299 critic_loss=88301773892.2667 entropy=17.5075 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 176200] reward=-119976640.0 actor_loss=0.2675 critic_loss=84013013594.3529 entropy=17.5071 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 176200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540768.7 mean_steps=13.2
|
|
[Episode 176210] reward=-118835527.5 actor_loss=0.3140 critic_loss=79048770901.3333 entropy=17.5308 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 176220] reward=-116705262.4 actor_loss=0.3877 critic_loss=91081706882.8445 entropy=17.5456 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 176220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617523.6 mean_steps=11.8
|
|
[Episode 176230] reward=-112838568.3 actor_loss=0.2135 critic_loss=79930713344.0000 entropy=17.5350 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 176240] reward=-111481383.8 actor_loss=0.3521 critic_loss=75691500319.2195 entropy=17.5253 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 176240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375759.1 mean_steps=15.8
|
|
[Episode 176250] reward=-113518721.7 actor_loss=0.4503 critic_loss=74152243324.1212 entropy=17.5116 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1549 front_blocked=0
|
|
[Episode 176260] reward=-116563632.4 actor_loss=0.3457 critic_loss=75455145392.3556 entropy=17.4986 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 176260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431122.1 mean_steps=15.1
|
|
[Episode 176270] reward=-118497331.6 actor_loss=0.3561 critic_loss=77498461622.8571 entropy=17.5066 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 176280] reward=-119097350.9 actor_loss=0.3865 critic_loss=84658957912.2759 entropy=17.5034 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 176280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-615280.5 mean_steps=11.9
|
|
[Episode 176290] reward=-117306294.0 actor_loss=0.3579 critic_loss=80965867677.5385 entropy=17.5159 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 176300] reward=-117411865.7 actor_loss=0.2943 critic_loss=79249203200.0000 entropy=17.5284 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 176300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503095.5 mean_steps=14.6
|
|
[Episode 176310] reward=-120380073.2 actor_loss=0.3024 critic_loss=86967969941.8537 entropy=17.5114 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 176320] reward=-114182947.9 actor_loss=0.3169 critic_loss=76182957189.5652 entropy=17.5060 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 176320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573388.3 mean_steps=12.5
|
|
[Episode 176330] reward=-117255804.5 actor_loss=0.2580 critic_loss=82469838217.8462 entropy=17.5037 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 176340] reward=-113127129.7 actor_loss=0.3367 critic_loss=76547869445.6889 entropy=17.4990 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 176340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-373214.5 mean_steps=16.5
|
|
[Episode 176350] reward=-118629542.3 actor_loss=0.3549 critic_loss=85246703616.0000 entropy=17.4982 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 176360] reward=-115194984.2 actor_loss=0.3408 critic_loss=78024579559.6190 entropy=17.4802 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 176360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602891.7 mean_steps=12.6
|
|
[Episode 176370] reward=-114691729.0 actor_loss=0.3483 critic_loss=77165825923.8788 entropy=17.4691 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 176380] reward=-121105880.5 actor_loss=0.2606 critic_loss=88256255317.3333 entropy=17.4753 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 176380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458246.7 mean_steps=15.5
|
|
[Episode 176390] reward=-212660836.7 actor_loss=1.2316 critic_loss=37355452704722.4922 entropy=17.4889 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 176400] reward=-120770196.4 actor_loss=0.2950 critic_loss=80955059200.0000 entropy=17.4818 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 176400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-618979.2 mean_steps=13.0
|
|
[Episode 176410] reward=-121813689.8 actor_loss=0.2877 critic_loss=92600723188.8696 entropy=17.4794 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 176420] reward=-118879581.5 actor_loss=0.3061 critic_loss=161909535812.2667 entropy=17.4786 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 176420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394225.2 mean_steps=15.3
|
|
[Episode 176430] reward=-132421686.4 actor_loss=0.3207 critic_loss=797898733688.4706 entropy=17.4862 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 176440] reward=-114689496.1 actor_loss=0.3092 critic_loss=79191742464.0000 entropy=17.5003 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 176440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475451.4 mean_steps=13.8
|
|
[Episode 176450] reward=-117623147.2 actor_loss=0.3073 critic_loss=86257799912.7273 entropy=17.4960 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 176460] reward=-117954056.0 actor_loss=0.2974 critic_loss=81846422186.6667 entropy=17.5074 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 176460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564290.0 mean_steps=13.4
|
|
[Episode 176470] reward=-115190513.0 actor_loss=0.3997 critic_loss=82932283205.8182 entropy=17.5073 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 176480] reward=-118835904.1 actor_loss=0.2249 critic_loss=87828383607.4667 entropy=17.5179 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 176480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-577289.7 mean_steps=11.6
|
|
[Episode 176490] reward=-112980906.4 actor_loss=0.3829 critic_loss=75417061888.0000 entropy=17.5321 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 176500] reward=-116319546.9 actor_loss=0.2840 critic_loss=228954739545.3023 entropy=17.5345 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 176500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510843.1 mean_steps=13.8
|
|
[Episode 176510] reward=-111963130.4 actor_loss=0.2799 critic_loss=74436266978.7429 entropy=17.6107 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 176520] reward=-115120033.9 actor_loss=0.3533 critic_loss=82323970925.7143 entropy=17.6076 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 176520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434442.7 mean_steps=15.7
|
|
[Episode 176530] reward=-173384607.3 actor_loss=0.2556 critic_loss=7592750629956.2666 entropy=17.6136 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 176540] reward=-119397459.2 actor_loss=0.2815 critic_loss=86055392597.3333 entropy=17.6364 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 176540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371321.3 mean_steps=15.6
|
|
[Episode 176550] reward=-118258455.2 actor_loss=0.3723 critic_loss=82371238386.1622 entropy=17.6282 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 176560] reward=-120602128.7 actor_loss=0.3583 critic_loss=82375657629.5385 entropy=17.6155 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 176560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543406.3 mean_steps=13.2
|
|
[Episode 176570] reward=-113771651.1 actor_loss=0.3524 critic_loss=75916878088.2581 entropy=17.6230 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 176580] reward=-119891786.1 actor_loss=0.2837 critic_loss=82253079984.3556 entropy=17.6060 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 176580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-607206.3 mean_steps=13.8
|
|
[Episode 176590] reward=-116610961.7 actor_loss=0.3325 critic_loss=78344885504.0000 entropy=17.6070 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 176600] reward=-118070139.7 actor_loss=0.3338 critic_loss=86460781454.2222 entropy=17.6093 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 176600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444136.4 mean_steps=15.8
|
|
[Episode 176610] reward=-113904625.8 actor_loss=0.3798 critic_loss=88367935943.1111 entropy=17.5984 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 176620] reward=-122124704.3 actor_loss=0.2478 critic_loss=235236832918.5882 entropy=17.6060 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 176620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515372.3 mean_steps=14.3
|
|
[Episode 176630] reward=-115921754.3 actor_loss=0.3361 critic_loss=84347195977.1429 entropy=17.6085 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 176640] reward=-115695494.4 actor_loss=0.4299 critic_loss=83939668964.3243 entropy=17.5922 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 176640] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-625014.4 mean_steps=11.3
|
|
[Episode 176650] reward=-116132134.9 actor_loss=0.2385 critic_loss=91945691515.2593 entropy=17.5787 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 176660] reward=-120324127.5 actor_loss=0.2549 critic_loss=88357439563.8519 entropy=17.5806 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 176660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-335941.3 mean_steps=15.6
|
|
[Episode 176670] reward=-117067622.9 actor_loss=0.3067 critic_loss=123943910286.2222 entropy=17.5685 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 176680] reward=-120327941.1 actor_loss=0.2450 critic_loss=100138478250.6667 entropy=17.5609 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 176680] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-679976.7 mean_steps=10.6
|
|
[Episode 176690] reward=-119869222.5 actor_loss=0.2545 critic_loss=83684747036.4444 entropy=17.5701 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 176700] reward=-120080490.1 actor_loss=0.2265 critic_loss=84464166866.4889 entropy=17.5730 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 176700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461990.3 mean_steps=13.7
|
|
[Episode 176710] reward=-121057285.8 actor_loss=0.3434 critic_loss=90061905737.9556 entropy=17.5687 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 176720] reward=-118606515.0 actor_loss=0.3491 critic_loss=86322472732.4444 entropy=17.5851 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 176720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614143.0 mean_steps=13.1
|
|
[Episode 176730] reward=-121114352.2 actor_loss=0.2606 critic_loss=96334689777.3714 entropy=17.5775 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 176740] reward=-116626639.7 actor_loss=0.3320 critic_loss=77977724700.4444 entropy=17.5865 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 176740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-390742.0 mean_steps=15.9
|
|
[Episode 176750] reward=-120414640.6 actor_loss=0.2548 critic_loss=84442893880.8889 entropy=17.5963 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 176760] reward=-117325604.9 actor_loss=0.2321 critic_loss=79629416857.6000 entropy=17.6037 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 176760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432147.0 mean_steps=14.6
|
|
[Episode 176770] reward=-122603959.5 actor_loss=0.2596 critic_loss=85838374736.4571 entropy=17.5872 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 176780] reward=-118290609.8 actor_loss=0.3473 critic_loss=77834323606.5882 entropy=17.5907 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 176780] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-592255.2 mean_steps=10.9
|
|
[Episode 176790] reward=-115916137.8 actor_loss=0.4046 critic_loss=73667301282.9091 entropy=17.5888 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 176800] reward=-119965388.5 actor_loss=0.2113 critic_loss=86525247670.0444 entropy=17.5906 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 176800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-525585.2 mean_steps=15.1
|
|
[Episode 176810] reward=-118222626.1 actor_loss=0.3114 critic_loss=84484627751.8222 entropy=17.5942 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 176820] reward=-115771282.7 actor_loss=0.3645 critic_loss=86592415061.3333 entropy=17.5764 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 176820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624878.7 mean_steps=13.1
|
|
[Episode 176830] reward=-118888049.5 actor_loss=0.2755 critic_loss=80697352100.9778 entropy=17.5815 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 176840] reward=-122795693.9 actor_loss=0.2115 critic_loss=106152082545.7778 entropy=17.5848 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 176840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596363.6 mean_steps=12.5
|
|
[Episode 176850] reward=-118345623.5 actor_loss=0.2936 critic_loss=80486600508.9524 entropy=17.5934 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 176860] reward=-120479292.5 actor_loss=0.2749 critic_loss=85516942729.8462 entropy=17.5975 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 176860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503205.2 mean_steps=13.9
|
|
[Episode 176870] reward=-120569830.9 actor_loss=0.1828 critic_loss=85177911768.6154 entropy=17.5807 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 176880] reward=-122706115.3 actor_loss=0.2362 critic_loss=88868666910.1176 entropy=17.5881 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 176880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433281.8 mean_steps=15.3
|
|
[Episode 176890] reward=-118379872.9 actor_loss=0.2972 critic_loss=83124653624.8889 entropy=17.5790 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 176900] reward=-120310190.3 actor_loss=0.2788 critic_loss=85045751282.8718 entropy=17.5829 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 176900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453635.6 mean_steps=15.4
|
|
[Episode 176910] reward=-121360862.2 actor_loss=0.3439 critic_loss=83173024399.3600 entropy=17.5911 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 176920] reward=-122384328.1 actor_loss=0.2552 critic_loss=85406345534.5778 entropy=17.5773 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 176920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391153.2 mean_steps=15.9
|
|
[Episode 176930] reward=-122362779.0 actor_loss=0.1967 critic_loss=84789902449.7778 entropy=17.5721 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 176940] reward=-118256399.7 actor_loss=0.3362 critic_loss=87515189248.0000 entropy=17.5750 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 176940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578563.8 mean_steps=12.8
|
|
[Episode 176950] reward=-118729107.0 actor_loss=0.3683 critic_loss=82449449566.8148 entropy=17.5663 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 176960] reward=-117032813.2 actor_loss=0.4124 critic_loss=80540259597.4737 entropy=17.5574 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 176960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-680853.4 mean_steps=13.6
|
|
[Episode 176970] reward=-118736551.3 actor_loss=0.3669 critic_loss=83702381824.0000 entropy=17.5523 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 176980] reward=-122262986.6 actor_loss=0.2212 critic_loss=85602753355.2941 entropy=17.5545 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 176980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-554582.6 mean_steps=14.4
|
|
[Episode 176990] reward=-113622694.1 actor_loss=0.2927 critic_loss=75111423283.2000 entropy=17.5468 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 177000] reward=-116536170.8 actor_loss=0.3228 critic_loss=76882816614.4000 entropy=17.5495 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 177000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403462.3 mean_steps=15.1
|
|
[Episode 177010] reward=-116162661.2 actor_loss=0.2791 critic_loss=77269451480.1778 entropy=17.5564 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 177020] reward=-120887919.3 actor_loss=0.2744 critic_loss=87418969755.8261 entropy=17.5713 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 177020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-352674.5 mean_steps=15.7
|
|
[Episode 177030] reward=-121379146.7 actor_loss=0.2832 critic_loss=85912586922.6667 entropy=17.5721 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 177040] reward=-121878541.6 actor_loss=0.3958 critic_loss=89105890210.9091 entropy=17.5839 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 177040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-336798.4 mean_steps=15.7
|
|
[Episode 177050] reward=-121526672.3 actor_loss=0.2563 critic_loss=88624209100.8000 entropy=17.5860 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 177060] reward=-117295789.4 actor_loss=0.4463 critic_loss=79713706415.1579 entropy=17.5795 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 177060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538949.1 mean_steps=13.3
|
|
[Episode 177070] reward=-123354544.5 actor_loss=0.3212 critic_loss=87670420041.1429 entropy=17.5858 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 177080] reward=-118276829.6 actor_loss=0.2008 critic_loss=81400333812.0930 entropy=17.5843 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 177080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572611.1 mean_steps=12.6
|
|
[Episode 177090] reward=-119780919.2 actor_loss=0.3485 critic_loss=84480092160.0000 entropy=17.5870 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 177100] reward=-112060679.9 actor_loss=0.3233 critic_loss=75914870510.9333 entropy=17.5840 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 177100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457073.9 mean_steps=15.4
|
|
[Episode 177110] reward=-118543141.3 actor_loss=0.3396 critic_loss=160422702284.8000 entropy=17.5938 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 177120] reward=-119466934.8 actor_loss=0.1409 critic_loss=78529306988.0889 entropy=17.5984 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 177120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-523808.7 mean_steps=15.0
|
|
[Episode 177130] reward=-116003788.2 actor_loss=0.3351 critic_loss=76942354750.5778 entropy=17.6014 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 177140] reward=-123822311.9 actor_loss=0.2052 critic_loss=87114826820.2667 entropy=17.6062 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 177140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574568.5 mean_steps=13.5
|
|
[Episode 177150] reward=-120109621.5 actor_loss=0.2626 critic_loss=82413844480.0000 entropy=17.5990 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 177160] reward=-119500252.3 actor_loss=0.3148 critic_loss=85625867264.0000 entropy=17.5962 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 177160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494014.0 mean_steps=14.6
|
|
[Episode 177170] reward=-143412436.0 actor_loss=0.2743 critic_loss=2337465416635.7334 entropy=17.6031 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 177180] reward=-117312552.4 actor_loss=0.2811 critic_loss=79303283020.1081 entropy=17.5962 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 177180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-627614.7 mean_steps=12.2
|
|
[Episode 177190] reward=-119996380.2 actor_loss=0.3450 critic_loss=82425004214.0444 entropy=17.6095 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 177200] reward=-120653731.6 actor_loss=0.3527 critic_loss=82126511581.8667 entropy=17.5961 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 177200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489233.3 mean_steps=13.6
|
|
[Episode 177210] reward=-124187813.3 actor_loss=0.1962 critic_loss=88650776858.4828 entropy=17.5776 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 177220] reward=-119717217.8 actor_loss=0.2571 critic_loss=82054369826.1333 entropy=17.5845 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 177220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415015.9 mean_steps=15.9
|
|
[Episode 177230] reward=-126190037.3 actor_loss=0.3117 critic_loss=249821716176.5926 entropy=17.5797 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 177240] reward=-120483339.8 actor_loss=0.3287 critic_loss=104212461275.4286 entropy=17.5815 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 177240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-310939.0 mean_steps=16.4
|
|
[Episode 177250] reward=-116034410.3 actor_loss=0.3924 critic_loss=78090894987.6364 entropy=17.5697 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 177260] reward=-118980612.2 actor_loss=0.2791 critic_loss=81590983429.6889 entropy=17.5604 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 177260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-674455.6 mean_steps=11.9
|
|
[Episode 177270] reward=-115136306.4 actor_loss=0.3577 critic_loss=86565199872.0000 entropy=17.5563 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 177280] reward=-117496052.6 actor_loss=0.2460 critic_loss=75443709542.4000 entropy=17.5638 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 177280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474760.6 mean_steps=13.9
|
|
[Episode 177290] reward=-122189324.7 actor_loss=0.3787 critic_loss=239307751424.0000 entropy=17.5576 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 177300] reward=-182671255.9 actor_loss=0.2341 critic_loss=8310684599761.4541 entropy=17.5700 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 177300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535609.1 mean_steps=13.1
|
|
[Episode 177310] reward=-182037148.4 actor_loss=0.3199 critic_loss=13448156055688.5332 entropy=17.5665 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 177320] reward=-113895548.7 actor_loss=0.3156 critic_loss=81581426129.4545 entropy=17.5718 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 177320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379538.7 mean_steps=16.0
|
|
[Episode 177330] reward=-116546581.6 actor_loss=0.2943 critic_loss=77590290249.9556 entropy=17.5887 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 177340] reward=-113547359.3 actor_loss=0.2615 critic_loss=76883010901.3333 entropy=17.5891 approx_kl=0.0041 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 177340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-383729.1 mean_steps=14.2
|
|
[Episode 177350] reward=-116011743.4 actor_loss=0.2915 critic_loss=86511620096.0000 entropy=17.5972 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 177360] reward=-122915619.5 actor_loss=0.2596 critic_loss=115618282390.0690 entropy=17.6150 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 177360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531384.9 mean_steps=14.5
|
|
[Episode 177370] reward=-124911090.4 actor_loss=0.3735 critic_loss=226006849217.4222 entropy=17.6024 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 177380] reward=-200486808.9 actor_loss=0.8687 critic_loss=24156881588809.1445 entropy=17.5943 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 177380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-546763.8 mean_steps=12.4
|
|
[Episode 177390] reward=-266050946.1 actor_loss=0.3462 critic_loss=47780853967166.5781 entropy=17.5932 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 177400] reward=-628386315.3 actor_loss=0.1640 critic_loss=356102677987328.0000 entropy=17.5843 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 177400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-415024.0 mean_steps=15.2
|
|
[Episode 177410] reward=-173734809.7 actor_loss=0.8694 critic_loss=12962526999347.1992 entropy=17.5837 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 177420] reward=-120464608.7 actor_loss=0.2635 critic_loss=85236922105.4359 entropy=17.5885 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 177420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552489.4 mean_steps=14.3
|
|
[Episode 177430] reward=-300969938.3 actor_loss=0.3109 critic_loss=110563243525552.3594 entropy=17.5997 approx_kl=-0.0030 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 177440] reward=-411608743.8 actor_loss=0.3268 critic_loss=190015347115076.2812 entropy=17.6077 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 177440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502347.4 mean_steps=14.1
|
|
[Episode 177450] reward=-199667174.2 actor_loss=0.3782 critic_loss=20034010661228.0898 entropy=17.6201 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 177460] reward=-372149648.8 actor_loss=0.2498 critic_loss=103942785621825.4844 entropy=17.6558 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 177460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-67614677.5 mean_steps=78.0
|
|
[Episode 177470] reward=-1088487565.2 actor_loss=0.3646 critic_loss=1270377890185216.0000 entropy=17.6684 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 177480] reward=-1314449618.1 actor_loss=0.1465 critic_loss=1061675865814605.6250 entropy=17.6813 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 177480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-557645.6 mean_steps=14.6
|
|
[Episode 177490] reward=-723764717.7 actor_loss=0.1601 critic_loss=251694851698315.6250 entropy=17.6983 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 177500] reward=-843841110.4 actor_loss=0.3228 critic_loss=988374484345014.0000 entropy=17.7021 approx_kl=-0.0002 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 177500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-39412109.0 mean_steps=41.2
|
|
[Episode 177510] reward=-1398077362.8 actor_loss=0.2623 critic_loss=1225477700698608.5000 entropy=17.7131 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1081 front_blocked=0
|
|
[Episode 177520] reward=-123481176.6 actor_loss=0.3022 critic_loss=96566206464.0000 entropy=17.7166 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 177520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498843.5 mean_steps=15.2
|
|
[Episode 177530] reward=-2617048657.5 actor_loss=0.4588 critic_loss=4107046233472296.0000 entropy=17.7200 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 177540] reward=-1137837807.3 actor_loss=0.1817 critic_loss=1170588459947349.2500 entropy=17.7320 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1042 front_blocked=0
|
|
[Eval 177540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423841.8 mean_steps=15.5
|
|
[Episode 177550] reward=-641281202.8 actor_loss=0.2212 critic_loss=251854389496346.9375 entropy=17.7571 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1048 front_blocked=0
|
|
[Episode 177560] reward=-114642516.3 actor_loss=0.3527 critic_loss=93855961543.1111 entropy=17.7675 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 177560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502566.7 mean_steps=13.2
|
|
[Episode 177570] reward=-639882239.8 actor_loss=0.2264 critic_loss=262935625853246.5625 entropy=17.7605 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Episode 177580] reward=-706922329.4 actor_loss=0.2323 critic_loss=453851459501680.3750 entropy=17.7670 approx_kl=-0.0008 kl_stop=1 intervention_rate=0.1107 front_blocked=0
|
|
[Eval 177580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-530770.4 mean_steps=12.1
|
|
[Episode 177590] reward=-4243274778.3 actor_loss=0.1293 critic_loss=14888594158125056.0000 entropy=17.7751 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 177600] reward=-121652300.6 actor_loss=0.3652 critic_loss=91993917394.4889 entropy=17.7841 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 177600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-557669.9 mean_steps=14.4
|
|
[Episode 177610] reward=-118712218.8 actor_loss=0.3927 critic_loss=94353101073.0667 entropy=17.7870 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 177620] reward=-120908371.5 actor_loss=0.2934 critic_loss=148416676932.2667 entropy=17.7995 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 177620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-320536.7 mean_steps=16.0
|
|
[Episode 177630] reward=-122555038.0 actor_loss=0.3088 critic_loss=95198768150.7556 entropy=17.8073 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 177640] reward=-614908866.8 actor_loss=0.2536 critic_loss=464013859446352.8125 entropy=17.8048 approx_kl=0.0030 kl_stop=1 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 177640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-338540.9 mean_steps=16.8
|
|
[Episode 177650] reward=-120862442.5 actor_loss=0.3508 critic_loss=86514116198.4000 entropy=17.8196 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 177660] reward=-330318307.2 actor_loss=0.1983 critic_loss=108315613413011.9062 entropy=17.8322 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 177660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-569584.6 mean_steps=14.3
|
|
[Episode 177670] reward=-2964709383.8 actor_loss=0.3667 critic_loss=5678523394497650.0000 entropy=17.8475 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 177680] reward=-116312624.8 actor_loss=0.3712 critic_loss=98318315428.9778 entropy=17.8545 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 177680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-85794268.4 mean_steps=23.4
|
|
[Episode 177690] reward=-122859872.7 actor_loss=0.3074 critic_loss=115280938130.2857 entropy=17.8411 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 177700] reward=-1052158156.9 actor_loss=0.0982 critic_loss=796548364051602.2500 entropy=17.8439 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 177700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501253.1 mean_steps=14.9
|
|
[Episode 177710] reward=-122384763.6 actor_loss=0.2738 critic_loss=92105286542.2222 entropy=17.8449 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 177720] reward=-606275779.6 actor_loss=0.2305 critic_loss=254132348454320.3438 entropy=17.8693 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 177720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-572398.1 mean_steps=13.3
|
|
[Episode 177730] reward=-2903249555.0 actor_loss=0.0386 critic_loss=3741936480070314.5000 entropy=17.8743 approx_kl=0.0085 kl_stop=0 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 177740] reward=-5253010934.2 actor_loss=7.0014 critic_loss=15440808793813812.0000 entropy=17.8847 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 177740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463221.4 mean_steps=14.4
|
|
[Episode 177750] reward=-1874605287.1 actor_loss=0.0335 critic_loss=1686782746191462.5000 entropy=17.8930 approx_kl=0.0069 kl_stop=0 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 177760] reward=-2015904627.2 actor_loss=0.8825 critic_loss=2444761151569920.0000 entropy=17.9120 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 177760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517055.7 mean_steps=14.2
|
|
[Episode 177770] reward=-3693771924.3 actor_loss=0.1957 critic_loss=14665836682222706.0000 entropy=17.9178 approx_kl=0.0035 kl_stop=1 intervention_rate=0.1016 front_blocked=0
|
|
[Episode 177780] reward=-8069433457.9 actor_loss=-0.0036 critic_loss=49195052765596880.0000 entropy=17.9241 approx_kl=0.0002 kl_stop=0 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 177780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556072.4 mean_steps=13.3
|
|
[Episode 177790] reward=-1630599631.1 actor_loss=0.2061 critic_loss=3083600573287629.0000 entropy=17.9491 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 177800] reward=-433354758.7 actor_loss=0.2448 critic_loss=292590301741056.0000 entropy=17.9838 approx_kl=-0.0004 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 177800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589674.8 mean_steps=12.7
|
|
[Episode 177810] reward=-3114291697.4 actor_loss=5.3730 critic_loss=11453162460282880.0000 entropy=17.9997 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 177820] reward=-4470784871.8 actor_loss=0.2469 critic_loss=8531253969003770.0000 entropy=17.9993 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0534 front_blocked=0
|
|
[Eval 177820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387220.6 mean_steps=15.1
|
|
[Episode 177830] reward=-588956011.4 actor_loss=0.3012 critic_loss=325911852379827.8750 entropy=18.0130 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 177840] reward=-416655931.7 actor_loss=0.1676 critic_loss=96791048256736.7812 entropy=18.0182 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1022 front_blocked=0
|
|
[Eval 177840] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-374886.8 mean_steps=17.1
|
|
[Episode 177850] reward=-120427973.1 actor_loss=0.2644 critic_loss=91604447323.0222 entropy=18.0272 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 177860] reward=-125926255.2 actor_loss=0.3465 critic_loss=146450307192.4706 entropy=18.0312 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 177860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519084.9 mean_steps=13.2
|
|
[Episode 177870] reward=-119630905.1 actor_loss=0.3232 critic_loss=87026021717.3333 entropy=18.0296 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 177880] reward=-121450840.9 actor_loss=0.2324 critic_loss=122083829760.0000 entropy=18.0273 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 177880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552731.7 mean_steps=14.6
|
|
[Episode 177890] reward=-121850270.5 actor_loss=0.3388 critic_loss=94696439625.9556 entropy=18.0328 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 177900] reward=-119996973.2 actor_loss=0.2308 critic_loss=89925114060.8000 entropy=18.0289 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 177900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465907.3 mean_steps=14.2
|
|
[Episode 177910] reward=-1741945050.7 actor_loss=0.8593 critic_loss=3125798443803106.0000 entropy=18.0689 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 177920] reward=-124820350.2 actor_loss=0.2549 critic_loss=93163157367.4667 entropy=18.0797 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 177920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573596.9 mean_steps=13.8
|
|
[Episode 177930] reward=-119258332.1 actor_loss=0.2745 critic_loss=82168557294.9333 entropy=18.0563 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 177940] reward=-121757685.8 actor_loss=0.2165 critic_loss=87304977521.7778 entropy=18.0607 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 177940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454228.2 mean_steps=14.6
|
|
[Episode 177950] reward=-493139063.6 actor_loss=0.3227 critic_loss=226229313834552.8750 entropy=18.0656 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 177960] reward=-574191534.1 actor_loss=0.6914 critic_loss=488644686496399.3750 entropy=18.0707 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1152 front_blocked=0
|
|
[Eval 177960] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-686202.5 mean_steps=10.7
|
|
[Episode 177970] reward=-119801620.8 actor_loss=0.2759 critic_loss=92041410423.4667 entropy=18.0731 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 177980] reward=-120832565.3 actor_loss=0.2055 critic_loss=91047874651.0222 entropy=18.0812 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 177980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612390.7 mean_steps=12.1
|
|
[Episode 177990] reward=-122672677.2 actor_loss=0.2560 critic_loss=84899012061.8667 entropy=18.0478 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 178000] reward=-120479640.0 actor_loss=0.2824 critic_loss=88653156443.0222 entropy=18.0402 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 178000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394332.0 mean_steps=15.1
|
|
[Episode 178010] reward=-587842849.9 actor_loss=0.1945 critic_loss=578380568548693.3750 entropy=18.0335 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 178020] reward=-1005851889.2 actor_loss=0.3362 critic_loss=757866833163787.3750 entropy=18.0362 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 178020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-357261.7 mean_steps=15.7
|
|
[Episode 178030] reward=-3404585143.8 actor_loss=0.4187 critic_loss=6963450196128298.0000 entropy=18.0482 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 178040] reward=-579235194.2 actor_loss=0.2773 critic_loss=274075732695813.6875 entropy=18.0620 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 178040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535839.0 mean_steps=13.2
|
|
[Episode 178050] reward=-6660596967.1 actor_loss=5.1413 critic_loss=18415136209650756.0000 entropy=18.0472 approx_kl=0.0017 kl_stop=0 intervention_rate=0.0247 front_blocked=0
|
|
[Episode 178060] reward=-2763426597.2 actor_loss=0.2390 critic_loss=5801863109696262.0000 entropy=18.0635 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1068 front_blocked=0
|
|
[Eval 178060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-516023.4 mean_steps=12.6
|
|
[Episode 178070] reward=-5426818924.5 actor_loss=6.3271 critic_loss=14368243790852916.0000 entropy=18.0864 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0618 front_blocked=0
|
|
[Episode 178080] reward=-3486028112.6 actor_loss=0.4681 critic_loss=8179554141450331.0000 entropy=18.0922 approx_kl=0.0051 kl_stop=0 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 178080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-172620783.3 mean_steps=27.1
|
|
[Episode 178090] reward=-1470856161.2 actor_loss=0.2172 critic_loss=2065211849353898.7500 entropy=18.0994 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1139 front_blocked=0
|
|
[Episode 178100] reward=-2597126036.1 actor_loss=0.2720 critic_loss=7244464897445069.0000 entropy=18.1232 approx_kl=0.0060 kl_stop=0 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 178100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399049.1 mean_steps=16.2
|
|
[Episode 178110] reward=-131898826.7 actor_loss=0.2024 critic_loss=280943335833.6000 entropy=18.1597 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 178120] reward=-1103276922.6 actor_loss=0.2941 critic_loss=2133412335961884.5000 entropy=18.1565 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 178120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464514.2 mean_steps=14.1
|
|
[Episode 178130] reward=-124322921.2 actor_loss=0.4109 critic_loss=102626833749.3333 entropy=18.1743 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 178140] reward=-1044420943.9 actor_loss=0.2962 critic_loss=2329102016029946.5000 entropy=18.1712 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 178140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530857.1 mean_steps=13.6
|
|
[Episode 178150] reward=-126531744.3 actor_loss=0.3073 critic_loss=216024186880.0000 entropy=18.1740 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 178160] reward=-123205722.4 actor_loss=0.2321 critic_loss=96286459312.3556 entropy=18.1809 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 178160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465183.0 mean_steps=14.0
|
|
[Episode 178170] reward=-118941835.2 actor_loss=0.3479 critic_loss=92677078675.9111 entropy=18.1482 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 178180] reward=-119950195.1 actor_loss=0.3353 critic_loss=99574220176.6956 entropy=18.1417 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 178180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522801.4 mean_steps=14.5
|
|
[Episode 178190] reward=-122521563.1 actor_loss=0.3554 critic_loss=100684839867.7333 entropy=18.1432 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 178200] reward=-123888393.4 actor_loss=0.1905 critic_loss=96979603251.2000 entropy=18.1258 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 178200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-676706.5 mean_steps=11.4
|
|
[Episode 178210] reward=-361060282.3 actor_loss=0.4388 critic_loss=80342043276083.2031 entropy=18.1306 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 178220] reward=-124881111.5 actor_loss=0.2785 critic_loss=122416152029.8667 entropy=18.1294 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 178220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635465.5 mean_steps=11.6
|
|
[Episode 178230] reward=-171030163.9 actor_loss=0.3322 critic_loss=9502710725927.8223 entropy=18.1545 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 178240] reward=-121213331.4 actor_loss=0.2278 critic_loss=92187212860.2353 entropy=18.1812 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 178240] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-782198.2 mean_steps=11.3
|
|
[Episode 178250] reward=-124605168.9 actor_loss=0.3359 critic_loss=95852582092.8000 entropy=18.1736 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 178260] reward=-132807340.8 actor_loss=0.2711 critic_loss=323053009745.1707 entropy=18.2084 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 178260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578001.7 mean_steps=12.7
|
|
[Episode 178270] reward=-1211792227.4 actor_loss=0.2419 critic_loss=1981365861550881.5000 entropy=18.2211 approx_kl=0.0011 kl_stop=1 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 178280] reward=-126358195.4 actor_loss=0.3580 critic_loss=248284139892.3636 entropy=18.2539 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 178280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488900.6 mean_steps=14.1
|
|
[Episode 178290] reward=-126662142.0 actor_loss=0.2096 critic_loss=97542725085.8667 entropy=18.2400 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 178300] reward=-131065531.0 actor_loss=0.3238 critic_loss=488094020508.9032 entropy=18.2422 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 178300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487329.0 mean_steps=14.2
|
|
[Episode 178310] reward=-124213227.7 actor_loss=0.2768 critic_loss=91502228457.2444 entropy=18.2494 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 178320] reward=-121437474.3 actor_loss=0.1796 critic_loss=95229151914.6667 entropy=18.2627 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 178320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513185.0 mean_steps=13.1
|
|
[Episode 178330] reward=-125914057.2 actor_loss=0.1387 critic_loss=94517301353.0256 entropy=18.2506 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 178340] reward=-179019006.0 actor_loss=0.2326 critic_loss=5975389462710.0449 entropy=18.2686 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 178340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532959.1 mean_steps=13.1
|
|
[Episode 178350] reward=-129278697.2 actor_loss=0.2607 critic_loss=109396313338.3111 entropy=18.2726 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 178360] reward=-149516363.1 actor_loss=0.2678 critic_loss=2804823486040.2759 entropy=18.2786 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 178360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508363.8 mean_steps=13.2
|
|
[Episode 178370] reward=-122310453.7 actor_loss=0.2761 critic_loss=96704683303.8222 entropy=18.2887 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 178380] reward=-119305995.9 actor_loss=0.3829 critic_loss=87844819945.2444 entropy=18.2626 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 178380] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-687002.3 mean_steps=10.5
|
|
[Episode 178390] reward=-127352554.9 actor_loss=0.3259 critic_loss=99266511576.1778 entropy=18.2713 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 178400] reward=-204605070.0 actor_loss=0.2219 critic_loss=11011818695338.6660 entropy=18.2591 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 178400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527636.2 mean_steps=14.0
|
|
[Episode 178410] reward=-123165328.9 actor_loss=0.3042 critic_loss=94426081052.4444 entropy=18.2494 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 178420] reward=-123855865.0 actor_loss=0.2436 critic_loss=93357322695.1111 entropy=18.2456 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 178420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429279.0 mean_steps=15.6
|
|
[Episode 178430] reward=-497950763.7 actor_loss=0.2379 critic_loss=397739509514968.1875 entropy=18.2516 approx_kl=-0.0021 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 178440] reward=-124474697.3 actor_loss=0.2252 critic_loss=97396535296.0000 entropy=18.2539 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 178440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371703.0 mean_steps=15.9
|
|
[Episode 178450] reward=-125454625.5 actor_loss=0.3142 critic_loss=131408959897.6000 entropy=18.2445 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 178460] reward=-117059203.8 actor_loss=0.2585 critic_loss=89624010752.0000 entropy=18.2220 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 178460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-361279.0 mean_steps=16.0
|
|
[Episode 178470] reward=-1335479975.5 actor_loss=0.1555 critic_loss=2146887521452396.0000 entropy=18.2121 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Episode 178480] reward=-125635139.1 actor_loss=0.2313 critic_loss=108377596814.2222 entropy=18.2325 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 178480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-565492.1 mean_steps=12.5
|
|
[Episode 178490] reward=-121716325.9 actor_loss=0.3455 critic_loss=118590149812.7059 entropy=18.2231 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 178500] reward=-1295259697.4 actor_loss=0.2732 critic_loss=1751174560192921.5000 entropy=18.2448 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 178500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541432.0 mean_steps=13.4
|
|
[Episode 178510] reward=-124247855.8 actor_loss=0.2502 critic_loss=107448875690.6667 entropy=18.2487 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 178520] reward=-1544788144.4 actor_loss=0.3075 critic_loss=4903650144702373.0000 entropy=18.2480 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 178520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494401.2 mean_steps=14.7
|
|
[Episode 178530] reward=-728463657.9 actor_loss=0.2458 critic_loss=839433577136856.1250 entropy=18.2483 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 178540] reward=-1224705085.1 actor_loss=0.4131 critic_loss=1077855412440997.0000 entropy=18.2824 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1029 front_blocked=0
|
|
[Eval 178540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-31165924.4 mean_steps=20.9
|
|
[Episode 178550] reward=-4758969302.6 actor_loss=0.3312 critic_loss=18292479650757200.0000 entropy=18.3115 approx_kl=0.0003 kl_stop=0 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 178560] reward=-434371098.8 actor_loss=0.1897 critic_loss=129469910462555.0156 entropy=18.3209 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Eval 178560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459792.1 mean_steps=14.7
|
|
[Episode 178570] reward=-122771826.7 actor_loss=0.2981 critic_loss=136459923364.9778 entropy=18.3162 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 178580] reward=-135080121.8 actor_loss=0.1922 critic_loss=334512061735.8222 entropy=18.3220 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 178580] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-667732.2 mean_steps=11.3
|
|
[Episode 178590] reward=-273077072.8 actor_loss=0.6813 critic_loss=48383699976192.0000 entropy=18.3006 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 178600] reward=-2678412312.3 actor_loss=2.0382 critic_loss=12679889677189120.0000 entropy=18.3075 approx_kl=0.0029 kl_stop=1 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 178600] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-670198.4 mean_steps=11.2
|
|
[Episode 178610] reward=-130479407.1 actor_loss=0.2993 critic_loss=97760464987.0222 entropy=18.2852 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 178620] reward=-835843004.0 actor_loss=0.2873 critic_loss=1429854608162816.0000 entropy=18.2880 approx_kl=0.0009 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 178620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481636.2 mean_steps=13.6
|
|
[Episode 178630] reward=-2541774347.6 actor_loss=4.3764 critic_loss=12925998210988988.0000 entropy=18.2678 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 178640] reward=-127078924.1 actor_loss=0.2521 critic_loss=123533766018.8445 entropy=18.2759 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 178640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-551592.4 mean_steps=14.3
|
|
[Episode 178650] reward=-2393429680.9 actor_loss=0.9762 critic_loss=5112995918404887.0000 entropy=18.2978 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 178660] reward=-1292366302.3 actor_loss=0.1658 critic_loss=2135202469383008.7500 entropy=18.3237 approx_kl=-0.0005 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Eval 178660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487474.9 mean_steps=13.9
|
|
[Episode 178670] reward=-3495103905.9 actor_loss=1.2280 critic_loss=15474670220531758.0000 entropy=18.3409 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 178680] reward=-124584181.2 actor_loss=0.1817 critic_loss=99517825024.0000 entropy=18.3510 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 178680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478717.6 mean_steps=13.7
|
|
[Episode 178690] reward=-127526435.9 actor_loss=0.3002 critic_loss=133034535861.0732 entropy=18.3590 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 178700] reward=-3113485981.7 actor_loss=12.8769 critic_loss=7979836444199413.0000 entropy=18.3696 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 178700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-114038300.7 mean_steps=19.9
|
|
[Episode 178710] reward=-1948601939.0 actor_loss=0.7933 critic_loss=8293037568845414.0000 entropy=18.3910 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 178720] reward=-143591137.2 actor_loss=0.3174 critic_loss=1700417666343.8223 entropy=18.4020 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 178720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506839.3 mean_steps=14.2
|
|
[Episode 178730] reward=-130215583.8 actor_loss=0.3748 critic_loss=175164704768.0000 entropy=18.4075 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 178740] reward=-125544102.8 actor_loss=0.3207 critic_loss=99614159485.1555 entropy=18.4174 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 178740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-189765320.3 mean_steps=28.7
|
|
[Episode 178750] reward=-367338801.1 actor_loss=0.2083 critic_loss=191932512396174.2188 entropy=18.4263 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 178760] reward=-127658092.3 actor_loss=0.2920 critic_loss=185796662249.2444 entropy=18.4350 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 178760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502444.1 mean_steps=13.9
|
|
[Episode 178770] reward=-752501073.0 actor_loss=0.1682 critic_loss=1088784917128078.2500 entropy=18.4304 approx_kl=-0.0022 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 178780] reward=-171478796.9 actor_loss=0.2957 critic_loss=5078909008659.6924 entropy=18.4482 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 178780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496945.5 mean_steps=14.8
|
|
[Episode 178790] reward=-798213116.8 actor_loss=0.3185 critic_loss=1302598479313487.7500 entropy=18.4539 approx_kl=0.0004 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 178800] reward=-1435271104.6 actor_loss=0.3050 critic_loss=3535773581903280.5000 entropy=18.4634 approx_kl=0.0000 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 178800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-8016125.9 mean_steps=15.8
|
|
[Episode 178810] reward=-1226670773.0 actor_loss=0.2941 critic_loss=2420423400247205.0000 entropy=18.4575 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 178820] reward=-132776866.8 actor_loss=0.2084 critic_loss=397210671877.6889 entropy=18.4635 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 178820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542810.1 mean_steps=13.4
|
|
[Episode 178830] reward=-125682261.1 actor_loss=0.3271 critic_loss=107682724327.6190 entropy=18.4696 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 178840] reward=-134645304.4 actor_loss=0.2792 critic_loss=334450914733.4194 entropy=18.4793 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 178840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-620687.7 mean_steps=12.5
|
|
[Episode 178850] reward=-390485870.4 actor_loss=0.3294 critic_loss=200728762751385.5938 entropy=18.4747 approx_kl=-0.0004 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 178860] reward=-126961229.8 actor_loss=0.2088 critic_loss=101609315805.8667 entropy=18.4855 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 178860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530942.7 mean_steps=14.3
|
|
[Episode 178870] reward=-131233417.9 actor_loss=0.2938 critic_loss=112007839744.0000 entropy=18.4706 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 178880] reward=-128460860.4 actor_loss=0.2025 critic_loss=98542997603.9024 entropy=18.4581 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 178880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-381387.0 mean_steps=16.1
|
|
[Episode 178890] reward=-120325794.3 actor_loss=0.3058 critic_loss=108054586254.2222 entropy=18.4440 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 178900] reward=-301539649.0 actor_loss=0.2401 critic_loss=106448022102198.0469 entropy=18.4263 approx_kl=0.0008 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 178900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604967.6 mean_steps=12.7
|
|
[Episode 178910] reward=-128678643.1 actor_loss=0.2041 critic_loss=153544127710.6087 entropy=18.4094 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 178920] reward=-127668399.2 actor_loss=0.2843 critic_loss=99784839259.0222 entropy=18.3909 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 178920] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-322581.0 mean_steps=16.4
|
|
[Episode 178930] reward=-137259099.9 actor_loss=0.2672 critic_loss=686711100757.3334 entropy=18.3840 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 178940] reward=-125308934.8 actor_loss=0.2866 critic_loss=103833532558.8837 entropy=18.3817 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 178940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500896.9 mean_steps=15.0
|
|
[Episode 178950] reward=-151304288.0 actor_loss=0.2535 critic_loss=2429496607812.2666 entropy=18.3959 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 178960] reward=-121052829.5 actor_loss=0.3649 critic_loss=90623034800.3556 entropy=18.3822 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 178960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-447263.9 mean_steps=13.8
|
|
[Episode 178970] reward=-126560813.3 actor_loss=0.2777 critic_loss=139849919692.8000 entropy=18.3600 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 178980] reward=-130347293.2 actor_loss=0.2397 critic_loss=235457260469.0732 entropy=18.3546 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 178980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582162.2 mean_steps=12.8
|
|
[Episode 178990] reward=-125253051.4 actor_loss=0.2921 critic_loss=99944885088.7111 entropy=18.3174 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 179000] reward=-120348877.6 actor_loss=0.3062 critic_loss=94359480707.4595 entropy=18.3062 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 179000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400642.5 mean_steps=16.1
|
|
[Episode 179010] reward=-129369329.1 actor_loss=0.2311 critic_loss=103816338057.3659 entropy=18.3066 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 179020] reward=-121199298.0 actor_loss=0.3743 critic_loss=93402811144.8276 entropy=18.2857 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 179020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-368665.6 mean_steps=16.9
|
|
[Episode 179030] reward=-125922932.1 actor_loss=0.3116 critic_loss=97334841252.9778 entropy=18.2818 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 179040] reward=-126473014.3 actor_loss=0.2138 critic_loss=103808954641.0667 entropy=18.2592 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 179040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-609734.2 mean_steps=11.9
|
|
[Episode 179050] reward=-136173948.3 actor_loss=0.3190 critic_loss=1319380316400.9412 entropy=18.2376 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 179060] reward=-127931273.9 actor_loss=0.3350 critic_loss=100367522929.7778 entropy=18.2302 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 179060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466730.5 mean_steps=14.7
|
|
[Episode 179070] reward=-131941427.1 actor_loss=0.1913 critic_loss=240277508915.2000 entropy=18.2382 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 179080] reward=-123429905.4 actor_loss=0.3383 critic_loss=107596409162.3226 entropy=18.2290 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 179080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511907.9 mean_steps=14.3
|
|
[Episode 179090] reward=-130643352.8 actor_loss=0.4270 critic_loss=500086425008.3555 entropy=18.2145 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 179100] reward=-122174376.3 actor_loss=0.2527 critic_loss=91145082743.4667 entropy=18.1957 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 179100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501133.7 mean_steps=12.9
|
|
[Episode 179110] reward=-120347126.7 actor_loss=0.2554 critic_loss=85081134057.2444 entropy=18.1854 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 179120] reward=-126453399.4 actor_loss=0.2524 critic_loss=94324560457.1429 entropy=18.1771 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 179120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461419.3 mean_steps=14.4
|
|
[Episode 179130] reward=-122346152.6 actor_loss=0.2747 critic_loss=92988434568.5333 entropy=18.1623 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 179140] reward=-121483324.7 actor_loss=0.2796 critic_loss=86784092296.5333 entropy=18.1560 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 179140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460534.7 mean_steps=14.3
|
|
[Episode 179150] reward=-122177566.7 actor_loss=0.3377 critic_loss=158102087270.4000 entropy=18.1418 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 179160] reward=-119471737.6 actor_loss=0.3289 critic_loss=83024167470.5455 entropy=18.1057 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 179160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436376.2 mean_steps=15.1
|
|
[Episode 179170] reward=-123545173.5 actor_loss=0.3295 critic_loss=104021061176.8889 entropy=18.1024 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 179180] reward=-210145664.5 actor_loss=0.2883 critic_loss=17766689852620.8008 entropy=18.1047 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 179180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563552.1 mean_steps=12.2
|
|
[Episode 179190] reward=-124364989.4 actor_loss=0.1768 critic_loss=88619464021.3333 entropy=18.1371 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 179200] reward=-122750794.1 actor_loss=0.2497 critic_loss=95578861112.8889 entropy=18.1368 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 179200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-657492.6 mean_steps=12.2
|
|
[Episode 179210] reward=-118313650.5 actor_loss=0.2811 critic_loss=89306794666.6667 entropy=18.1462 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 179220] reward=-125287758.9 actor_loss=0.3885 critic_loss=92037806307.5556 entropy=18.1282 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 179220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471909.6 mean_steps=13.8
|
|
[Episode 179230] reward=-393274800.5 actor_loss=0.8593 critic_loss=189265254666612.3750 entropy=18.1115 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 179240] reward=-172551483.8 actor_loss=0.3432 critic_loss=8716644469236.6221 entropy=18.1006 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 179240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-104972780.2 mean_steps=138.8
|
|
[Episode 179250] reward=-140654328.3 actor_loss=0.2263 critic_loss=983947811953.7778 entropy=18.0990 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 179260] reward=-122828474.3 actor_loss=0.3092 critic_loss=85674683232.7111 entropy=18.0895 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 179260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-376508.1 mean_steps=16.9
|
|
[Episode 179270] reward=-324897065.9 actor_loss=0.2331 critic_loss=90204973086219.3750 entropy=18.0902 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 179280] reward=-121259224.9 actor_loss=0.3682 critic_loss=87554289390.9333 entropy=18.0845 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 179280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410090.7 mean_steps=15.3
|
|
[Episode 179290] reward=-123203624.2 actor_loss=0.2993 critic_loss=86391520733.8667 entropy=18.0771 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 179300] reward=-122695640.4 actor_loss=0.2880 critic_loss=91092612732.5405 entropy=18.0616 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 179300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465049.3 mean_steps=15.0
|
|
[Episode 179310] reward=-119484595.0 actor_loss=0.3880 critic_loss=88769888920.2162 entropy=18.0606 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 179320] reward=-119577779.6 actor_loss=0.2201 critic_loss=97511916430.2222 entropy=18.0661 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 179320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539874.7 mean_steps=13.3
|
|
[Episode 179330] reward=-123401207.5 actor_loss=0.3174 critic_loss=90189177196.0889 entropy=18.0564 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 179340] reward=-118592611.3 actor_loss=0.2874 critic_loss=87697660245.3333 entropy=18.0382 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 179340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433357.7 mean_steps=15.3
|
|
[Episode 179350] reward=-122862525.4 actor_loss=0.2876 critic_loss=88714363699.2000 entropy=18.0383 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 179360] reward=-125279067.5 actor_loss=0.1449 critic_loss=86986471716.5714 entropy=18.0374 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 179360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481056.4 mean_steps=14.9
|
|
[Episode 179370] reward=-123237130.2 actor_loss=0.2419 critic_loss=96107567331.5556 entropy=18.0229 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 179380] reward=-118237689.1 actor_loss=0.3115 critic_loss=92108099857.0667 entropy=18.0123 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 179380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501081.8 mean_steps=13.2
|
|
[Episode 179390] reward=-123971833.5 actor_loss=0.2105 critic_loss=88405389767.1111 entropy=17.9974 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 179400] reward=-127777989.4 actor_loss=0.2260 critic_loss=92993901621.8947 entropy=17.9852 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 179400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468252.3 mean_steps=14.8
|
|
[Episode 179410] reward=-122748231.3 actor_loss=0.3161 critic_loss=126173326099.6923 entropy=17.9813 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 179420] reward=-118426772.7 actor_loss=0.4480 critic_loss=92514071645.0909 entropy=17.9852 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 179420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490536.4 mean_steps=15.2
|
|
[Episode 179430] reward=-118398430.8 actor_loss=0.2940 critic_loss=80676821037.5111 entropy=17.9847 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 179440] reward=-121516050.3 actor_loss=0.2971 critic_loss=88818696507.0769 entropy=17.9783 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 179440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447444.7 mean_steps=15.2
|
|
[Episode 179450] reward=-122096594.6 actor_loss=0.2432 critic_loss=83622642574.2222 entropy=17.9765 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 179460] reward=-125883140.0 actor_loss=0.2856 critic_loss=92378494614.5882 entropy=17.9861 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 179460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510572.1 mean_steps=13.4
|
|
[Episode 179470] reward=-119741480.0 actor_loss=0.3024 critic_loss=103380736834.3704 entropy=17.9978 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 179480] reward=-121116817.9 actor_loss=0.2739 critic_loss=89276298581.3333 entropy=17.9860 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 179480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423168.5 mean_steps=14.5
|
|
[Episode 179490] reward=-120986469.3 actor_loss=0.3371 critic_loss=82002148283.7333 entropy=17.9894 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 179500] reward=-119331423.0 actor_loss=0.2709 critic_loss=98440858737.7778 entropy=17.9955 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 179500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-472366.8 mean_steps=12.8
|
|
[Episode 179510] reward=-125135460.9 actor_loss=0.3359 critic_loss=96713847694.2222 entropy=17.9841 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 179520] reward=-121379007.1 actor_loss=0.2523 critic_loss=90332739851.1304 entropy=17.9739 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 179520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483881.3 mean_steps=13.8
|
|
[Episode 179530] reward=-122075876.2 actor_loss=0.3111 critic_loss=88864735838.8148 entropy=17.9681 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 179540] reward=-115650081.3 actor_loss=0.3559 critic_loss=95564410470.4000 entropy=17.9608 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 179540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-507354.5 mean_steps=14.6
|
|
[Episode 179550] reward=-118708529.8 actor_loss=0.3832 critic_loss=91657804860.2353 entropy=17.9666 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 179560] reward=-126258535.0 actor_loss=0.1967 critic_loss=93540764750.7692 entropy=17.9668 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 179560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396718.0 mean_steps=16.1
|
|
[Episode 179570] reward=-119637703.1 actor_loss=0.2714 critic_loss=86179252516.5714 entropy=17.9580 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 179580] reward=-122573292.2 actor_loss=0.2135 critic_loss=83513588053.3333 entropy=17.9527 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 179580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402012.7 mean_steps=15.5
|
|
[Episode 179590] reward=-122140809.1 actor_loss=0.2645 critic_loss=87760858862.9333 entropy=17.9430 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 179600] reward=-116103946.6 actor_loss=0.3081 critic_loss=79249330508.1081 entropy=17.9431 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 179600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443996.1 mean_steps=15.3
|
|
[Episode 179610] reward=-121566617.4 actor_loss=0.3216 critic_loss=92758520012.8000 entropy=17.9456 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 179620] reward=-124253155.7 actor_loss=0.2692 critic_loss=91649690757.5652 entropy=17.9512 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 179620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475108.0 mean_steps=14.8
|
|
[Episode 179630] reward=-122869317.6 actor_loss=0.3199 critic_loss=91248764741.8182 entropy=17.9361 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 179640] reward=-122631133.7 actor_loss=0.2618 critic_loss=88105111961.6000 entropy=17.9334 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 179640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518192.3 mean_steps=13.4
|
|
[Episode 179650] reward=-116118928.5 actor_loss=0.2688 critic_loss=86502444904.2963 entropy=17.9113 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 179660] reward=-116284155.3 actor_loss=0.2555 critic_loss=87291212534.5185 entropy=17.9094 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 179660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427381.4 mean_steps=14.6
|
|
[Episode 179670] reward=-118493923.5 actor_loss=0.2570 critic_loss=86889524428.8000 entropy=17.8857 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 179680] reward=-124001332.8 actor_loss=0.3208 critic_loss=90262479038.5116 entropy=17.8767 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 179680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541210.1 mean_steps=13.8
|
|
[Episode 179690] reward=-116215444.2 actor_loss=0.2954 critic_loss=79984464616.7273 entropy=17.8741 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 179700] reward=-120626958.4 actor_loss=0.2700 critic_loss=85976074103.4667 entropy=17.8713 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 179700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459038.3 mean_steps=15.6
|
|
[Episode 179710] reward=-123945575.4 actor_loss=0.1776 critic_loss=110937753144.8889 entropy=17.8674 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 179720] reward=-117967444.1 actor_loss=0.3352 critic_loss=95541857657.2632 entropy=17.8580 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 179720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442049.9 mean_steps=14.1
|
|
[Episode 179730] reward=-118140645.0 actor_loss=0.3645 critic_loss=83431923165.8667 entropy=17.8517 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 179740] reward=-121979779.2 actor_loss=0.2404 critic_loss=85835597637.8182 entropy=17.8418 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 179740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-625967.4 mean_steps=12.2
|
|
[Episode 179750] reward=-120275106.2 actor_loss=0.3988 critic_loss=92274243265.4222 entropy=17.8212 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 179760] reward=-117791837.3 actor_loss=0.2101 critic_loss=82941053914.0741 entropy=17.8177 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 179760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-473200.6 mean_steps=15.7
|
|
[Episode 179770] reward=-117748359.3 actor_loss=0.3013 critic_loss=85346396779.1628 entropy=17.8041 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 179780] reward=-121567984.9 actor_loss=0.2371 critic_loss=85683135738.3111 entropy=17.8087 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 179780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506245.5 mean_steps=13.9
|
|
[Episode 179790] reward=-118752699.4 actor_loss=0.3166 critic_loss=84640713932.8000 entropy=17.8026 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 179800] reward=-122421163.7 actor_loss=0.2291 critic_loss=83917113844.6222 entropy=17.7944 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 179800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-570726.2 mean_steps=14.2
|
|
[Episode 179810] reward=-124927835.2 actor_loss=0.2295 critic_loss=90631323648.0000 entropy=17.7987 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 179820] reward=-122259290.5 actor_loss=0.2665 critic_loss=136934501454.7692 entropy=17.8006 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 179820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-467112.6 mean_steps=17.9
|
|
[Episode 179830] reward=-114592596.3 actor_loss=0.2654 critic_loss=85574532792.3200 entropy=17.7912 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 179840] reward=-121931146.5 actor_loss=0.2687 critic_loss=97420540061.5385 entropy=17.7721 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 179840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535893.3 mean_steps=13.2
|
|
[Episode 179850] reward=-121136428.0 actor_loss=0.2508 critic_loss=104864728795.4286 entropy=17.7776 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 179860] reward=-117606547.1 actor_loss=0.2683 critic_loss=77270035456.0000 entropy=17.7811 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 179860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-755060.4 mean_steps=16.1
|
|
[Episode 179870] reward=-116691812.6 actor_loss=0.3312 critic_loss=83829752925.0909 entropy=17.7958 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 179880] reward=-116311848.2 actor_loss=0.2116 critic_loss=84960490549.8947 entropy=17.7928 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 179880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426573.6 mean_steps=15.2
|
|
[Episode 179890] reward=-119031465.7 actor_loss=0.2679 critic_loss=82153152954.8108 entropy=17.7881 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 179900] reward=-115109357.3 actor_loss=0.2685 critic_loss=76632128170.6667 entropy=17.7796 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 179900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452775.2 mean_steps=15.9
|
|
[Episode 179910] reward=-116501433.3 actor_loss=0.4095 critic_loss=83840018432.0000 entropy=17.7659 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 179920] reward=-121578964.7 actor_loss=0.3130 critic_loss=85728356028.6316 entropy=17.7664 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 179920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-655444.5 mean_steps=12.9
|
|
[Episode 179930] reward=-115724615.3 actor_loss=0.2772 critic_loss=82420946944.0000 entropy=17.7660 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 179940] reward=-119547454.2 actor_loss=0.3270 critic_loss=86020420812.8000 entropy=17.7733 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 179940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538114.5 mean_steps=14.3
|
|
[Episode 179950] reward=-167040651.9 actor_loss=0.2543 critic_loss=6829805427461.6885 entropy=17.7877 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 179960] reward=-108643639.9 actor_loss=0.3849 critic_loss=83244857929.1429 entropy=17.7951 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 179960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610987.8 mean_steps=12.5
|
|
[Episode 179970] reward=-115637469.8 actor_loss=0.3859 critic_loss=79424287767.8139 entropy=17.7817 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 179980] reward=-120365709.2 actor_loss=0.2595 critic_loss=89100218467.9024 entropy=17.8118 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 179980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369048.4 mean_steps=15.8
|
|
[Episode 179990] reward=-115953990.5 actor_loss=0.2761 critic_loss=92025761659.8710 entropy=17.8109 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 180000] reward=-120300866.7 actor_loss=0.2079 critic_loss=84858100485.6889 entropy=17.8107 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 180000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-867456.2 mean_steps=18.8
|
|
[Episode 180010] reward=-122623562.3 actor_loss=0.3104 critic_loss=85863638454.8571 entropy=17.7970 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 180020] reward=-118462523.7 actor_loss=0.3491 critic_loss=81883137365.3333 entropy=17.8012 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 180020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-651523.6 mean_steps=12.2
|
|
[Episode 180030] reward=-118304868.2 actor_loss=0.2364 critic_loss=83846644311.4146 entropy=17.7856 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 180040] reward=-116910288.0 actor_loss=0.3366 critic_loss=83886597120.0000 entropy=17.7906 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 180040] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-300260.4 mean_steps=17.4
|
|
[Episode 180050] reward=-115766333.1 actor_loss=0.3052 critic_loss=92258012598.8571 entropy=17.7858 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 180060] reward=-119995657.3 actor_loss=0.4111 critic_loss=84786043877.7436 entropy=17.7884 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 180060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-534744.5 mean_steps=12.5
|
|
[Episode 180070] reward=-121996790.9 actor_loss=0.3136 critic_loss=276820415222.5185 entropy=17.7909 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 180080] reward=-122469279.6 actor_loss=0.3387 critic_loss=907158768459.2941 entropy=17.7875 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 180080] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-6199041.0 mean_steps=27.8
|
|
[Episode 180090] reward=-139293048.0 actor_loss=0.2573 critic_loss=858442969907.2000 entropy=17.7966 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 180100] reward=-115974451.0 actor_loss=0.2607 critic_loss=80511800115.2000 entropy=17.8078 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 180100] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-657996.6 mean_steps=11.2
|
|
[Episode 180110] reward=-116726737.9 actor_loss=0.3505 critic_loss=85296184815.4839 entropy=17.8161 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 180120] reward=-115894077.5 actor_loss=0.3460 critic_loss=88182693467.8974 entropy=17.8375 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 180120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-543379.4 mean_steps=14.2
|
|
[Episode 180130] reward=-119973999.4 actor_loss=0.2505 critic_loss=86878477399.7714 entropy=17.8487 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 180140] reward=-334987724.1 actor_loss=0.2946 critic_loss=159344905869357.5000 entropy=17.8677 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 180140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-339304.6 mean_steps=15.7
|
|
[Episode 180150] reward=-158106634.7 actor_loss=0.3577 critic_loss=7580763122710.7559 entropy=17.8887 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 180160] reward=-120989407.6 actor_loss=0.2870 critic_loss=87357500740.6829 entropy=17.8838 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 180160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-645079.0 mean_steps=12.2
|
|
[Episode 180170] reward=-116356566.1 actor_loss=0.3300 critic_loss=134047832016.3721 entropy=17.8917 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 180180] reward=-739273087.0 actor_loss=0.2736 critic_loss=777720934654134.0000 entropy=17.8972 approx_kl=-0.0019 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 180180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-683507.7 mean_steps=12.2
|
|
[Episode 180190] reward=-169234454.0 actor_loss=0.2361 critic_loss=4325577968392.8276 entropy=17.9122 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 180200] reward=-121586669.1 actor_loss=0.3176 critic_loss=88212897360.8421 entropy=17.9241 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 180200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595408.5 mean_steps=12.7
|
|
[Episode 180210] reward=-120741524.1 actor_loss=0.3541 critic_loss=98837452800.0000 entropy=17.9418 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 180220] reward=-117699496.6 actor_loss=0.4681 critic_loss=94301168084.1143 entropy=17.9419 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 180220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476219.1 mean_steps=13.9
|
|
[Episode 180230] reward=-120947328.4 actor_loss=0.2831 critic_loss=86614345780.5128 entropy=17.9478 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 180240] reward=-117957358.2 actor_loss=0.3040 critic_loss=81735408298.6667 entropy=17.9438 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 180240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-624036.3 mean_steps=11.9
|
|
[Episode 180250] reward=-115792234.7 actor_loss=0.2974 critic_loss=91741763155.3488 entropy=17.9577 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 180260] reward=-120386337.0 actor_loss=0.2516 critic_loss=87735698773.3333 entropy=17.9464 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 180260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571246.2 mean_steps=13.7
|
|
[Episode 180270] reward=-114139740.0 actor_loss=0.2996 critic_loss=84171795842.8445 entropy=17.9365 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 180280] reward=-118697833.3 actor_loss=0.3133 critic_loss=92260735203.5556 entropy=17.9243 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 180280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-428693.1 mean_steps=14.3
|
|
[Episode 180290] reward=-118447884.4 actor_loss=0.2458 critic_loss=80474278675.6923 entropy=17.9175 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 180300] reward=-120606202.4 actor_loss=0.3189 critic_loss=89711310893.5111 entropy=17.9179 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 180300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-573289.1 mean_steps=11.4
|
|
[Episode 180310] reward=-119078611.7 actor_loss=0.3612 critic_loss=89042219827.2000 entropy=17.8989 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 180320] reward=-116382307.4 actor_loss=0.3168 critic_loss=93436353194.6667 entropy=17.9201 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 180320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463022.0 mean_steps=14.7
|
|
[Episode 180330] reward=-111173407.3 actor_loss=0.3905 critic_loss=83317917497.8065 entropy=17.9255 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 180340] reward=-123304868.9 actor_loss=0.1627 critic_loss=89471366667.3778 entropy=17.9062 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 180340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491309.0 mean_steps=13.8
|
|
[Episode 180350] reward=-123498504.6 actor_loss=0.1908 critic_loss=87758869299.2000 entropy=17.9214 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 180360] reward=-118785926.4 actor_loss=0.3296 critic_loss=196936342966.8571 entropy=17.9374 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 180360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-658364.2 mean_steps=12.4
|
|
[Episode 180370] reward=-153021377.0 actor_loss=0.2891 critic_loss=3600840553372.9033 entropy=17.9539 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 180380] reward=-122040005.6 actor_loss=0.3062 critic_loss=87379549297.7778 entropy=17.9549 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 180380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465977.1 mean_steps=14.7
|
|
[Episode 180390] reward=-421241544.0 actor_loss=0.4079 critic_loss=196162789041598.3438 entropy=17.9660 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 180400] reward=-120405762.4 actor_loss=0.2633 critic_loss=83582685574.0952 entropy=17.9414 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 180400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-84890103.9 mean_steps=47.0
|
|
[Episode 180410] reward=-121538384.7 actor_loss=0.3316 critic_loss=88665163002.3111 entropy=17.9348 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 180420] reward=-521149324.6 actor_loss=0.2324 critic_loss=498268557960806.3750 entropy=17.9238 approx_kl=0.0003 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 180420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467545.8 mean_steps=14.8
|
|
[Episode 180430] reward=-118736660.1 actor_loss=0.2604 critic_loss=85009920000.0000 entropy=17.9107 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 180440] reward=-113974682.1 actor_loss=0.3431 critic_loss=83661395831.4667 entropy=17.9053 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 180440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369560.7 mean_steps=15.9
|
|
[Episode 180450] reward=-119434788.2 actor_loss=0.3404 critic_loss=89101667896.8889 entropy=17.9108 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 180460] reward=-123573015.9 actor_loss=0.2272 critic_loss=112705763514.1818 entropy=17.9059 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 180460] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-719412.8 mean_steps=11.9
|
|
[Episode 180470] reward=-120272461.3 actor_loss=0.2105 critic_loss=89807749120.0000 entropy=17.8947 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 180480] reward=-160310735.2 actor_loss=0.4121 critic_loss=7743165431808.0000 entropy=17.9018 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 180480] success_rate=0.400 qp_infeasible_rate=0.550 mean_return=-124785289.8 mean_steps=173.9
|
|
[Episode 180490] reward=-114623583.2 actor_loss=0.2479 critic_loss=84113398533.6889 entropy=17.8921 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 180500] reward=-440900480.8 actor_loss=0.3045 critic_loss=315242424921110.7500 entropy=17.8796 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 180500] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-299955.0 mean_steps=17.6
|
|
[Episode 180510] reward=-121491306.5 actor_loss=0.2855 critic_loss=89114240269.4737 entropy=17.8649 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 180520] reward=-120776852.2 actor_loss=0.2717 critic_loss=101212122316.8000 entropy=17.8548 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 180520] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-625024.3 mean_steps=11.1
|
|
[Episode 180530] reward=-118223520.9 actor_loss=0.2179 critic_loss=79845469485.1765 entropy=17.8516 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 180540] reward=-114195718.1 actor_loss=0.2965 critic_loss=79624362337.1035 entropy=17.8391 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 180540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576999.0 mean_steps=13.2
|
|
[Episode 180550] reward=-122476423.3 actor_loss=0.2994 critic_loss=95399605713.4545 entropy=17.8397 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 180560] reward=-117106925.7 actor_loss=0.3230 critic_loss=81050710940.9032 entropy=17.8421 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 180560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566872.1 mean_steps=13.6
|
|
[Episode 180570] reward=-122459065.9 actor_loss=0.1774 critic_loss=85471746821.6889 entropy=17.8468 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 180580] reward=-122430726.0 actor_loss=0.2172 critic_loss=85481897000.9600 entropy=17.8339 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 180580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441878.3 mean_steps=15.6
|
|
[Episode 180590] reward=-116518509.3 actor_loss=0.2247 critic_loss=78168208361.2444 entropy=17.8478 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 180600] reward=-128553348.7 actor_loss=0.2575 critic_loss=376967064939.3549 entropy=17.8263 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 180600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-435306.6 mean_steps=16.2
|
|
[Episode 180610] reward=-117997113.0 actor_loss=0.2449 critic_loss=93350994193.0667 entropy=17.8272 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 180620] reward=-120473780.8 actor_loss=0.2987 critic_loss=84633823666.4242 entropy=17.8233 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 180620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424190.2 mean_steps=14.8
|
|
[Episode 180630] reward=-123163918.8 actor_loss=0.3511 critic_loss=98342607046.1935 entropy=17.7990 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 180640] reward=-126479957.8 actor_loss=0.3331 critic_loss=334328957428.6222 entropy=17.7874 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 180640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-31641753.8 mean_steps=44.2
|
|
[Episode 180650] reward=-116901108.4 actor_loss=0.2064 critic_loss=84267520819.2000 entropy=17.8167 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 180660] reward=-116898147.2 actor_loss=0.2974 critic_loss=83655491193.9048 entropy=17.8145 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 180660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438948.3 mean_steps=14.2
|
|
[Episode 180670] reward=-119491119.9 actor_loss=0.2997 critic_loss=112404728490.6667 entropy=17.8148 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 180680] reward=-129705964.9 actor_loss=0.3465 critic_loss=1083691789334.7556 entropy=17.8312 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 180680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-339900.8 mean_steps=15.8
|
|
[Episode 180690] reward=-119568711.4 actor_loss=0.3476 critic_loss=86744616686.9333 entropy=17.8606 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 180700] reward=-138154832.5 actor_loss=0.3807 critic_loss=1361007787425.1853 entropy=17.8657 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 180700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-472395.7 mean_steps=15.7
|
|
[Episode 180710] reward=-111574604.6 actor_loss=0.4039 critic_loss=79850181868.3077 entropy=17.8593 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 180720] reward=-122180375.8 actor_loss=0.2911 critic_loss=98342235886.9333 entropy=17.8545 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 180720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-512684.7 mean_steps=14.9
|
|
[Episode 180730] reward=-121466502.3 actor_loss=0.2923 critic_loss=89997937178.9474 entropy=17.8777 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 180740] reward=-115645246.2 actor_loss=0.3600 critic_loss=78173458432.0000 entropy=17.8811 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 180740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-628996.3 mean_steps=11.7
|
|
[Episode 180750] reward=-118281693.4 actor_loss=0.2571 critic_loss=81092551214.5455 entropy=17.8640 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 180760] reward=-121206647.0 actor_loss=0.2470 critic_loss=85056362541.5111 entropy=17.8701 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 180760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566209.4 mean_steps=13.4
|
|
[Episode 180770] reward=-120644331.5 actor_loss=0.3304 critic_loss=92966614357.3333 entropy=17.8608 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 180780] reward=-118206508.7 actor_loss=0.2723 critic_loss=82869072023.7037 entropy=17.8637 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 180780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436213.7 mean_steps=14.6
|
|
[Episode 180790] reward=-119173893.8 actor_loss=0.3108 critic_loss=89031342237.5385 entropy=17.8495 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 180800] reward=-153704025.0 actor_loss=0.2807 critic_loss=6122617385779.2002 entropy=17.8571 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 180800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514837.2 mean_steps=14.2
|
|
[Episode 180810] reward=-118060533.3 actor_loss=0.3426 critic_loss=79160534266.3111 entropy=17.8594 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 180820] reward=-118559572.2 actor_loss=0.3275 critic_loss=84728834730.6667 entropy=17.8417 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 180820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626030.7 mean_steps=12.2
|
|
[Episode 180830] reward=-134917644.0 actor_loss=0.3366 critic_loss=918814254694.4000 entropy=17.8352 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 180840] reward=-123717927.4 actor_loss=0.2969 critic_loss=88085369105.0667 entropy=17.8264 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 180840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454447.9 mean_steps=15.6
|
|
[Episode 180850] reward=-121807513.6 actor_loss=0.2564 critic_loss=83263384171.1628 entropy=17.8319 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 180860] reward=-123464778.0 actor_loss=0.2844 critic_loss=107489549042.5263 entropy=17.8359 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 180860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-327208.6 mean_steps=16.6
|
|
[Episode 180870] reward=-117415589.7 actor_loss=0.3127 critic_loss=82725086608.6956 entropy=17.8321 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 180880] reward=-118887358.3 actor_loss=0.2525 critic_loss=86514064315.7333 entropy=17.8548 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 180880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560704.7 mean_steps=13.2
|
|
[Episode 180890] reward=-140336478.2 actor_loss=0.2532 critic_loss=2563361174210.2070 entropy=17.8432 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 180900] reward=-119180278.5 actor_loss=0.2823 critic_loss=82200113737.1429 entropy=17.8353 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 180900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-461086.6 mean_steps=15.4
|
|
[Episode 180910] reward=-117283926.4 actor_loss=0.2818 critic_loss=81166214718.4390 entropy=17.8297 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 180920] reward=-116816295.9 actor_loss=0.3126 critic_loss=80097958661.6889 entropy=17.8183 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 180920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-523542.4 mean_steps=14.7
|
|
[Episode 180930] reward=-209354330.2 actor_loss=0.3467 critic_loss=30137277865437.8672 entropy=17.8076 approx_kl=0.0004 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 180940] reward=-118403392.8 actor_loss=0.2535 critic_loss=86145859911.6800 entropy=17.7999 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 180940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453910.5 mean_steps=15.4
|
|
[Episode 180950] reward=-123392850.5 actor_loss=0.2836 critic_loss=87604318559.0857 entropy=17.8042 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 180960] reward=-119760356.8 actor_loss=0.3213 critic_loss=82876789919.2889 entropy=17.7966 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 180960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516523.3 mean_steps=12.8
|
|
[Episode 180970] reward=-121187102.6 actor_loss=0.2821 critic_loss=85511564902.4000 entropy=17.7988 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 180980] reward=-113142489.5 actor_loss=0.4568 critic_loss=79659745644.0889 entropy=17.8011 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 180980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-319252.8 mean_steps=16.8
|
|
[Episode 180990] reward=-117106419.6 actor_loss=0.3143 critic_loss=77598184789.3333 entropy=17.7972 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 181000] reward=-116818983.9 actor_loss=0.3058 critic_loss=83931502455.4667 entropy=17.7983 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 181000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521808.7 mean_steps=13.2
|
|
[Episode 181010] reward=-119047912.9 actor_loss=0.2608 critic_loss=82003464927.1795 entropy=17.8155 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 181020] reward=-111410024.7 actor_loss=0.4407 critic_loss=71457230745.6000 entropy=17.8023 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 181020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-640063.6 mean_steps=12.8
|
|
[Episode 181030] reward=-260451361.7 actor_loss=0.2203 critic_loss=67433048653459.9141 entropy=17.8052 approx_kl=-0.0000 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 181040] reward=-118042986.9 actor_loss=0.3010 critic_loss=83266695395.5556 entropy=17.8157 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 181040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495710.0 mean_steps=14.1
|
|
[Episode 181050] reward=-119850902.1 actor_loss=0.3660 critic_loss=86362353129.7391 entropy=17.8227 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 181060] reward=-118427524.8 actor_loss=0.3768 critic_loss=90876311233.4222 entropy=17.8235 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 181060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438301.6 mean_steps=15.3
|
|
[Episode 181070] reward=-119065889.5 actor_loss=0.3574 critic_loss=119929070567.0244 entropy=17.8404 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 181080] reward=-118060324.5 actor_loss=0.3656 critic_loss=81440907264.0000 entropy=17.8445 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 181080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-357408.2 mean_steps=15.6
|
|
[Episode 181090] reward=-116346167.8 actor_loss=0.1998 critic_loss=78236444672.0000 entropy=17.8366 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 181100] reward=-122071377.8 actor_loss=0.3515 critic_loss=83368858601.2444 entropy=17.8159 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 181100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572138.0 mean_steps=12.7
|
|
[Episode 181110] reward=-156122039.6 actor_loss=0.3694 critic_loss=6770672556805.6885 entropy=17.8094 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 181120] reward=-114527578.3 actor_loss=0.2841 critic_loss=78713834651.1515 entropy=17.7914 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 181120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414246.7 mean_steps=15.3
|
|
[Episode 181130] reward=-115420775.5 actor_loss=0.3244 critic_loss=81201777868.8000 entropy=17.7882 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 181140] reward=-115201237.0 actor_loss=0.2953 critic_loss=83238631287.4667 entropy=17.7767 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 181140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516603.5 mean_steps=14.3
|
|
[Episode 181150] reward=-119990257.6 actor_loss=0.3024 critic_loss=84864566211.7647 entropy=17.7637 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 181160] reward=-117818374.9 actor_loss=0.2658 critic_loss=84381320131.7647 entropy=17.7659 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 181160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512287.5 mean_steps=14.4
|
|
[Episode 181170] reward=-123918283.8 actor_loss=0.1718 critic_loss=86334936041.2444 entropy=17.7617 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 181180] reward=-118108777.1 actor_loss=0.2839 critic_loss=94612213444.9231 entropy=17.7578 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 181180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-429286.6 mean_steps=16.4
|
|
[Episode 181190] reward=-122557850.1 actor_loss=0.2166 critic_loss=94041313652.3636 entropy=17.7614 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 181200] reward=-118816468.8 actor_loss=0.2731 critic_loss=81236963132.9524 entropy=17.7647 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 181200] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-271209.1 mean_steps=17.6
|
|
[Episode 181210] reward=-116687972.8 actor_loss=0.2738 critic_loss=80220863634.2857 entropy=17.7550 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 181220] reward=-116927800.5 actor_loss=0.4609 critic_loss=80686607018.6667 entropy=17.7598 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 181220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524447.9 mean_steps=14.1
|
|
[Episode 181230] reward=-116935211.6 actor_loss=0.2873 critic_loss=78787879003.0222 entropy=17.7593 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 181240] reward=-115629567.4 actor_loss=0.3040 critic_loss=78696193200.5517 entropy=17.7501 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 181240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509637.5 mean_steps=14.8
|
|
[Episode 181250] reward=-117102197.6 actor_loss=0.3978 critic_loss=79199366826.6667 entropy=17.7466 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 181260] reward=-123778322.6 actor_loss=0.2491 critic_loss=85511242898.2857 entropy=17.7295 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 181260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470035.0 mean_steps=14.2
|
|
[Episode 181270] reward=-134442686.2 actor_loss=0.3450 critic_loss=1380072846751.1353 entropy=17.7156 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 181280] reward=-117320975.4 actor_loss=0.3855 critic_loss=145885905920.0000 entropy=17.7223 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 181280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513731.1 mean_steps=13.9
|
|
[Episode 181290] reward=-125097419.7 actor_loss=0.1840 critic_loss=166448066128.8421 entropy=17.7139 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 181300] reward=-124964156.9 actor_loss=0.2490 critic_loss=89063990377.9310 entropy=17.7192 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 181300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-442326.7 mean_steps=16.1
|
|
[Episode 181310] reward=-119537568.8 actor_loss=0.3063 critic_loss=89878922705.4545 entropy=17.7260 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 181320] reward=-114954496.8 actor_loss=0.2794 critic_loss=82882840113.5484 entropy=17.7210 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 181320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398246.5 mean_steps=15.4
|
|
[Episode 181330] reward=-118418870.0 actor_loss=0.3468 critic_loss=81512459041.3913 entropy=17.7207 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 181340] reward=-111455593.6 actor_loss=0.4411 critic_loss=77554715343.5676 entropy=17.7249 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 181340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473988.2 mean_steps=14.8
|
|
[Episode 181350] reward=-122002634.3 actor_loss=0.3112 critic_loss=84814877855.2889 entropy=17.7277 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 181360] reward=-117867975.7 actor_loss=0.3264 critic_loss=79884719263.2889 entropy=17.7193 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 181360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517316.0 mean_steps=13.9
|
|
[Episode 181370] reward=-123764845.0 actor_loss=0.2152 critic_loss=86498783045.8182 entropy=17.7297 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 181380] reward=-119514780.2 actor_loss=0.3267 critic_loss=81990522379.3778 entropy=17.6918 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 181380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-513384.2 mean_steps=14.6
|
|
[Episode 181390] reward=-117036832.6 actor_loss=0.4094 critic_loss=122443985897.2444 entropy=17.6984 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 181400] reward=-121373584.9 actor_loss=0.2920 critic_loss=81516319488.0000 entropy=17.6996 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 181400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569816.1 mean_steps=13.4
|
|
[Episode 181410] reward=-120458736.3 actor_loss=0.2799 critic_loss=83308341202.4889 entropy=17.6824 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 181420] reward=-117829486.7 actor_loss=0.3700 critic_loss=79472113163.3778 entropy=17.6907 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 181420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559058.4 mean_steps=13.2
|
|
[Episode 181430] reward=-116351408.8 actor_loss=0.3263 critic_loss=74327559338.6667 entropy=17.6714 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 181440] reward=-114893840.0 actor_loss=0.3860 critic_loss=80267081541.8182 entropy=17.6601 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 181440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441097.9 mean_steps=15.6
|
|
[Episode 181450] reward=-119515225.8 actor_loss=0.3677 critic_loss=81961276355.7647 entropy=17.6504 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 181460] reward=-116675448.9 actor_loss=0.4027 critic_loss=75802301599.2889 entropy=17.6498 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 181460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577084.9 mean_steps=12.7
|
|
[Episode 181470] reward=-113331817.7 actor_loss=0.3534 critic_loss=77095493524.2105 entropy=17.6335 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 181480] reward=-122206545.5 actor_loss=0.2664 critic_loss=82166734643.2000 entropy=17.6289 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 181480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-716705.1 mean_steps=12.6
|
|
[Episode 181490] reward=-117389433.5 actor_loss=0.3504 critic_loss=75300855949.2414 entropy=17.6168 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 181500] reward=-117704118.1 actor_loss=0.1999 critic_loss=78899922745.8065 entropy=17.6104 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 181500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-586033.9 mean_steps=11.7
|
|
[Episode 181510] reward=-116454368.9 actor_loss=0.2680 critic_loss=78524265358.2222 entropy=17.6073 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 181520] reward=-120643075.9 actor_loss=0.2519 critic_loss=85907411688.7273 entropy=17.6100 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 181520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555070.0 mean_steps=13.2
|
|
[Episode 181530] reward=-118061285.7 actor_loss=0.2993 critic_loss=86672196431.4483 entropy=17.6085 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 181540] reward=-122977808.7 actor_loss=0.2428 critic_loss=86826873554.8235 entropy=17.6068 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 181540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-423163.4 mean_steps=17.1
|
|
[Episode 181550] reward=-124449350.6 actor_loss=0.2904 critic_loss=154926607018.6667 entropy=17.5966 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 181560] reward=-125042401.0 actor_loss=0.2855 critic_loss=95987034112.0000 entropy=17.5946 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 181560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404372.6 mean_steps=15.8
|
|
[Episode 181570] reward=-114689643.2 actor_loss=0.3024 critic_loss=75172214453.6774 entropy=17.5992 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 181580] reward=-123796723.1 actor_loss=0.2550 critic_loss=84900689540.7407 entropy=17.5988 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 181580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590341.0 mean_steps=13.9
|
|
[Episode 181590] reward=-114630040.3 actor_loss=0.3034 critic_loss=75355357976.7742 entropy=17.5912 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 181600] reward=-119567881.0 actor_loss=0.2574 critic_loss=87520613717.3333 entropy=17.5859 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 181600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619004.0 mean_steps=12.7
|
|
[Episode 181610] reward=-115148075.7 actor_loss=0.3407 critic_loss=78970210258.4889 entropy=17.5792 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 181620] reward=-117854294.4 actor_loss=0.2850 critic_loss=78817513372.0976 entropy=17.5706 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 181620] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-339342.6 mean_steps=17.2
|
|
[Episode 181630] reward=-117169099.6 actor_loss=0.3437 critic_loss=80654228992.0000 entropy=17.5810 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 181640] reward=-112847578.7 actor_loss=0.3165 critic_loss=76632217307.4286 entropy=17.5946 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 181640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487069.4 mean_steps=14.8
|
|
[Episode 181650] reward=-109293422.9 actor_loss=0.2703 critic_loss=75755387562.6667 entropy=17.5839 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 181660] reward=-115633018.1 actor_loss=0.3306 critic_loss=76366859811.7209 entropy=17.5923 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 181660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532803.5 mean_steps=14.1
|
|
[Episode 181670] reward=-116316094.0 actor_loss=0.3229 critic_loss=83702285016.1778 entropy=17.5905 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 181680] reward=-115130180.6 actor_loss=0.2341 critic_loss=73954304000.0000 entropy=17.5931 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 181680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-323640.8 mean_steps=16.5
|
|
[Episode 181690] reward=-111681841.7 actor_loss=0.3521 critic_loss=78375216096.9697 entropy=17.5889 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 181700] reward=-117146336.5 actor_loss=0.2774 critic_loss=80232456874.6667 entropy=17.5767 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 181700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-389630.8 mean_steps=17.0
|
|
[Episode 181710] reward=-116055534.3 actor_loss=0.3819 critic_loss=79022795875.0968 entropy=17.5896 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 181720] reward=-119566290.2 actor_loss=0.2034 critic_loss=77322883600.5161 entropy=17.5744 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 181720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526754.7 mean_steps=14.1
|
|
[Episode 181730] reward=-119527442.2 actor_loss=0.2541 critic_loss=93708434870.8571 entropy=17.5586 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 181740] reward=-118004089.8 actor_loss=0.2240 critic_loss=81334040762.1818 entropy=17.5507 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 181740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403887.9 mean_steps=15.4
|
|
[Episode 181750] reward=-121962998.4 actor_loss=0.3230 critic_loss=81008417177.6000 entropy=17.5610 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 181760] reward=-122427735.9 actor_loss=0.2682 critic_loss=116755740262.4000 entropy=17.5612 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 181760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379304.7 mean_steps=15.8
|
|
[Episode 181770] reward=-116307698.8 actor_loss=0.3201 critic_loss=85113841371.4286 entropy=17.5560 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 181780] reward=-121695318.7 actor_loss=0.3512 critic_loss=185701714056.5333 entropy=17.5634 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 181780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501640.1 mean_steps=14.1
|
|
[Episode 181790] reward=-119994337.5 actor_loss=0.2986 critic_loss=185109995520.0000 entropy=17.5524 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 181800] reward=-120383903.2 actor_loss=0.3361 critic_loss=93131018695.1111 entropy=17.5392 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 181800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-471230.5 mean_steps=15.3
|
|
[Episode 181810] reward=-113901953.2 actor_loss=0.3705 critic_loss=77466068764.4444 entropy=17.5419 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 181820] reward=-112191720.7 actor_loss=0.3064 critic_loss=75385764554.4186 entropy=17.5571 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 181820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620538.6 mean_steps=12.0
|
|
[Episode 181830] reward=-120371147.1 actor_loss=0.2557 critic_loss=80121280124.5405 entropy=17.5750 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 181840] reward=-122917294.1 actor_loss=0.3468 critic_loss=91604017620.1143 entropy=17.5730 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 181840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552804.6 mean_steps=14.2
|
|
[Episode 181850] reward=-120286100.7 actor_loss=0.2247 critic_loss=78622393490.2857 entropy=17.5703 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 181860] reward=-116477633.7 actor_loss=0.3644 critic_loss=80623581304.4706 entropy=17.5845 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 181860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-529440.1 mean_steps=14.8
|
|
[Episode 181870] reward=-114071239.1 actor_loss=0.3894 critic_loss=77468823017.7391 entropy=17.5828 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 181880] reward=-121071152.8 actor_loss=0.2134 critic_loss=86879118677.3333 entropy=17.5833 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 181880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-515010.1 mean_steps=14.9
|
|
[Episode 181890] reward=-113525221.6 actor_loss=0.3853 critic_loss=80046069077.3333 entropy=17.5743 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 181900] reward=-115916621.8 actor_loss=0.3112 critic_loss=77582909767.6800 entropy=17.5718 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 181900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-454404.5 mean_steps=15.6
|
|
[Episode 181910] reward=-119506042.3 actor_loss=0.2829 critic_loss=80164607098.8800 entropy=17.5818 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 181920] reward=-120907581.5 actor_loss=0.2937 critic_loss=80508464206.7692 entropy=17.5820 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 181920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-585308.0 mean_steps=13.4
|
|
[Episode 181930] reward=-115687705.1 actor_loss=0.2764 critic_loss=77634452386.9091 entropy=17.5822 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 181940] reward=-119718948.2 actor_loss=0.3015 critic_loss=81840636723.2000 entropy=17.5718 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 181940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-618945.8 mean_steps=11.8
|
|
[Episode 181950] reward=-116267565.0 actor_loss=0.2509 critic_loss=75928726186.6667 entropy=17.5672 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 181960] reward=-108438416.8 actor_loss=0.3747 critic_loss=74909672501.8947 entropy=17.5675 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 181960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450416.0 mean_steps=14.8
|
|
[Episode 181970] reward=-122142782.4 actor_loss=0.3340 critic_loss=81072323347.6923 entropy=17.5513 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 181980] reward=-122621007.2 actor_loss=0.3579 critic_loss=80365568963.7647 entropy=17.5558 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 181980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396191.7 mean_steps=16.2
|
|
[Episode 181990] reward=-116207014.4 actor_loss=0.3323 critic_loss=73283546368.0000 entropy=17.5551 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 182000] reward=-117392727.9 actor_loss=0.3355 critic_loss=78833845134.2222 entropy=17.5588 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 182000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528102.7 mean_steps=13.2
|
|
[Episode 182010] reward=-120459132.8 actor_loss=0.2413 critic_loss=80509979696.7619 entropy=17.5620 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 182020] reward=-119297385.2 actor_loss=0.2515 critic_loss=79260877115.0769 entropy=17.5651 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 182020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464880.9 mean_steps=14.6
|
|
[Episode 182030] reward=-118413051.6 actor_loss=0.2505 critic_loss=85769935803.7333 entropy=17.5636 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 182040] reward=-119559730.1 actor_loss=0.1398 critic_loss=83293925831.1111 entropy=17.5591 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 182040] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361750.7 mean_steps=16.4
|
|
[Episode 182050] reward=-112463252.8 actor_loss=0.3570 critic_loss=77877621191.1111 entropy=17.5594 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 182060] reward=-121354024.6 actor_loss=0.2830 critic_loss=79998641977.8065 entropy=17.5588 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 182060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487456.8 mean_steps=14.8
|
|
[Episode 182070] reward=-120188340.9 actor_loss=0.2718 critic_loss=83185932337.9512 entropy=17.5592 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 182080] reward=-116772568.3 actor_loss=0.2439 critic_loss=79790576493.7143 entropy=17.5650 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 182080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438059.1 mean_steps=15.3
|
|
[Episode 182090] reward=-118833929.8 actor_loss=0.2932 critic_loss=79152687058.4889 entropy=17.5635 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 182100] reward=-120563046.8 actor_loss=0.2700 critic_loss=83015713002.0571 entropy=17.5682 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 182100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396709.5 mean_steps=16.3
|
|
[Episode 182110] reward=-112194636.7 actor_loss=0.2895 critic_loss=74483994419.2000 entropy=17.5529 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 182120] reward=-119457045.3 actor_loss=0.3273 critic_loss=80991514624.0000 entropy=17.5633 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 182120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-434008.0 mean_steps=13.6
|
|
[Episode 182130] reward=-120195714.1 actor_loss=0.4500 critic_loss=80719950139.0769 entropy=17.5516 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 182140] reward=-119806322.7 actor_loss=0.2103 critic_loss=82919493120.0000 entropy=17.5519 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 182140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500994.1 mean_steps=14.2
|
|
[Episode 182150] reward=-116538194.0 actor_loss=0.1852 critic_loss=80085071872.0000 entropy=17.5388 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 182160] reward=-119338054.3 actor_loss=0.3179 critic_loss=77993609603.4595 entropy=17.5285 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 182160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423516.4 mean_steps=15.9
|
|
[Episode 182170] reward=-115196077.5 actor_loss=0.2861 critic_loss=80144483942.4000 entropy=17.5310 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 182180] reward=-118291195.1 actor_loss=0.3232 critic_loss=76064314957.5758 entropy=17.5272 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 182180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-366906.7 mean_steps=14.9
|
|
[Episode 182190] reward=-113393188.1 actor_loss=0.2987 critic_loss=73923753630.8965 entropy=17.5203 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 182200] reward=-119593345.4 actor_loss=0.1807 critic_loss=78756769047.2727 entropy=17.5302 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 182200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488327.7 mean_steps=14.2
|
|
[Episode 182210] reward=-117381904.0 actor_loss=0.3367 critic_loss=77287884572.4444 entropy=17.5335 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 182220] reward=-118014462.4 actor_loss=0.3320 critic_loss=81429694805.3333 entropy=17.5384 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 182220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495537.8 mean_steps=15.2
|
|
[Episode 182230] reward=-114695433.5 actor_loss=0.3333 critic_loss=78339461939.2000 entropy=17.5347 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 182240] reward=-115294690.5 actor_loss=0.2570 critic_loss=72265867264.0000 entropy=17.5382 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 182240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-598658.2 mean_steps=13.7
|
|
[Episode 182250] reward=-115513234.8 actor_loss=0.2552 critic_loss=76668212819.3488 entropy=17.5378 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 182260] reward=-116173878.3 actor_loss=0.2913 critic_loss=78248620851.2000 entropy=17.5332 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 182260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482221.6 mean_steps=14.7
|
|
[Episode 182270] reward=-113915032.5 actor_loss=0.3365 critic_loss=79322309376.0000 entropy=17.5360 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 182280] reward=-119809590.1 actor_loss=0.2057 critic_loss=80999657699.5556 entropy=17.5332 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 182280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491270.4 mean_steps=13.9
|
|
[Episode 182290] reward=-118945863.6 actor_loss=0.3247 critic_loss=89676982954.6667 entropy=17.5334 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 182300] reward=-115387033.7 actor_loss=0.3695 critic_loss=75066406912.0000 entropy=17.5283 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 182300] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-645461.2 mean_steps=11.2
|
|
[Episode 182310] reward=-117916501.2 actor_loss=0.2446 critic_loss=82025043580.5405 entropy=17.5305 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 182320] reward=-115082907.9 actor_loss=0.2754 critic_loss=75232484352.0000 entropy=17.5282 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 182320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550304.3 mean_steps=13.4
|
|
[Episode 182330] reward=-117489113.3 actor_loss=0.2018 critic_loss=78780974573.0370 entropy=17.5265 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 182340] reward=-114718354.0 actor_loss=0.4262 critic_loss=72993773478.9565 entropy=17.5115 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 182340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-622646.2 mean_steps=13.9
|
|
[Episode 182350] reward=-120923517.5 actor_loss=0.3586 critic_loss=80191096939.7895 entropy=17.4966 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 182360] reward=-113990436.8 actor_loss=0.3137 critic_loss=75806691881.5135 entropy=17.5010 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 182360] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-722612.3 mean_steps=11.6
|
|
[Episode 182370] reward=-123713164.7 actor_loss=0.3313 critic_loss=84411853710.2222 entropy=17.4956 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 182380] reward=-119441400.7 actor_loss=0.2748 critic_loss=77916041697.8824 entropy=17.4950 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 182380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473078.8 mean_steps=14.7
|
|
[Episode 182390] reward=-121086918.2 actor_loss=0.3259 critic_loss=81015291422.1176 entropy=17.5139 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 182400] reward=-122994925.4 actor_loss=0.2831 critic_loss=85449850489.9048 entropy=17.5035 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 182400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476884.3 mean_steps=14.6
|
|
[Episode 182410] reward=-117639471.0 actor_loss=0.2588 critic_loss=77333880441.9048 entropy=17.5060 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 182420] reward=-119124316.0 actor_loss=0.2149 critic_loss=82367484119.5789 entropy=17.5179 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 182420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-485227.3 mean_steps=15.6
|
|
[Episode 182430] reward=-116886027.8 actor_loss=0.4174 critic_loss=88572924550.7368 entropy=17.5213 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 182440] reward=-115711546.1 actor_loss=0.4432 critic_loss=87875148003.5556 entropy=17.5238 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 182440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623326.8 mean_steps=11.9
|
|
[Episode 182450] reward=-116202009.3 actor_loss=0.3507 critic_loss=80343181312.0000 entropy=17.5110 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 182460] reward=-119692330.6 actor_loss=0.2042 critic_loss=78424884838.4000 entropy=17.4994 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 182460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-560335.0 mean_steps=14.3
|
|
[Episode 182470] reward=-116740315.5 actor_loss=0.2892 critic_loss=78192953685.3333 entropy=17.4982 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 182480] reward=-115695173.9 actor_loss=0.2515 critic_loss=79560473258.6667 entropy=17.4922 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 182480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477798.2 mean_steps=14.6
|
|
[Episode 182490] reward=-121931803.7 actor_loss=0.3208 critic_loss=84449286826.6667 entropy=17.4951 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 182500] reward=-118414560.5 actor_loss=0.2713 critic_loss=78445292384.7111 entropy=17.4835 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 182500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438037.7 mean_steps=15.7
|
|
[Episode 182510] reward=-115580475.8 actor_loss=0.2265 critic_loss=81642937299.4783 entropy=17.4799 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 182520] reward=-120662201.3 actor_loss=0.2595 critic_loss=80367588252.9032 entropy=17.4677 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 182520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525095.0 mean_steps=14.2
|
|
[Episode 182530] reward=-115794850.1 actor_loss=0.2772 critic_loss=76871490721.6842 entropy=17.4866 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 182540] reward=-116503860.3 actor_loss=0.2732 critic_loss=77613822710.5185 entropy=17.4772 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 182540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472640.8 mean_steps=13.8
|
|
[Episode 182550] reward=-118160335.1 actor_loss=0.3444 critic_loss=75366158966.1538 entropy=17.4762 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 182560] reward=-116490817.6 actor_loss=0.3851 critic_loss=79360046865.8605 entropy=17.4753 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 182560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514085.0 mean_steps=13.8
|
|
[Episode 182570] reward=-118305756.9 actor_loss=0.3764 critic_loss=79131340093.7931 entropy=17.4691 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 182580] reward=-113330915.7 actor_loss=0.3415 critic_loss=73951120588.8000 entropy=17.4519 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 182580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-449034.5 mean_steps=13.6
|
|
[Episode 182590] reward=-119984878.3 actor_loss=0.2935 critic_loss=83258724807.1111 entropy=17.4387 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 182600] reward=-124136954.8 actor_loss=0.3081 critic_loss=80679044055.0400 entropy=17.4325 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 182600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-443131.0 mean_steps=16.1
|
|
[Episode 182610] reward=-115175362.9 actor_loss=0.3160 critic_loss=75358781440.0000 entropy=17.4393 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 182620] reward=-111185204.7 actor_loss=0.3766 critic_loss=72909921591.6522 entropy=17.4436 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 182620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527897.0 mean_steps=13.4
|
|
[Episode 182630] reward=-116094662.8 actor_loss=0.3565 critic_loss=79098944443.7333 entropy=17.4541 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 182640] reward=-121277711.1 actor_loss=0.2964 critic_loss=83014599884.8000 entropy=17.4598 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 182640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-411384.4 mean_steps=16.3
|
|
[Episode 182650] reward=-119700071.3 actor_loss=0.2050 critic_loss=77883739574.8571 entropy=17.4520 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 182660] reward=-114482201.5 actor_loss=0.2810 critic_loss=71624006780.8781 entropy=17.4484 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 182660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405118.2 mean_steps=15.1
|
|
[Episode 182670] reward=-119745198.6 actor_loss=0.1898 critic_loss=78190296632.8889 entropy=17.4540 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 182680] reward=-114687563.6 actor_loss=0.3987 critic_loss=76138612326.4000 entropy=17.4667 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 182680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-702199.8 mean_steps=12.7
|
|
[Episode 182690] reward=-122086728.0 actor_loss=0.3111 critic_loss=85440627416.1778 entropy=17.4716 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 182700] reward=-122289007.0 actor_loss=0.2613 critic_loss=83383891533.5758 entropy=17.4760 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 182700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456081.9 mean_steps=14.7
|
|
[Episode 182710] reward=-119839309.6 actor_loss=0.3509 critic_loss=81483532288.0000 entropy=17.4805 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 182720] reward=-117727411.8 actor_loss=0.2036 critic_loss=78887831688.5333 entropy=17.4774 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 182720] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-334961.0 mean_steps=16.6
|
|
[Episode 182730] reward=-119360014.9 actor_loss=0.3141 critic_loss=80928645120.0000 entropy=17.4831 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 182740] reward=-118960690.6 actor_loss=0.2417 critic_loss=81053475098.4828 entropy=17.4881 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 182740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-613505.7 mean_steps=13.8
|
|
[Episode 182750] reward=-117029835.4 actor_loss=0.3086 critic_loss=80701955614.1176 entropy=17.4970 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 182760] reward=-117717624.0 actor_loss=0.2060 critic_loss=81296505896.9600 entropy=17.5025 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 182760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630558.1 mean_steps=12.8
|
|
[Episode 182770] reward=-116146754.3 actor_loss=0.3314 critic_loss=77355529069.7143 entropy=17.5005 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 182780] reward=-116367322.2 actor_loss=0.2867 critic_loss=78179689170.8235 entropy=17.4982 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 182780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-367034.9 mean_steps=14.6
|
|
[Episode 182790] reward=-120062940.7 actor_loss=0.2738 critic_loss=82794960668.4444 entropy=17.5020 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 182800] reward=-117614387.2 actor_loss=0.3061 critic_loss=83728327619.7647 entropy=17.5085 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 182800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-659115.1 mean_steps=12.2
|
|
[Episode 182810] reward=-121084975.1 actor_loss=0.2522 critic_loss=78483128832.0000 entropy=17.5203 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 182820] reward=-115455924.0 actor_loss=0.3384 critic_loss=77869039119.5152 entropy=17.5264 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 182820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-709507.7 mean_steps=12.5
|
|
[Episode 182830] reward=-115101064.3 actor_loss=0.3905 critic_loss=76907214165.3333 entropy=17.5335 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 182840] reward=-119325601.2 actor_loss=0.2681 critic_loss=76960086016.0000 entropy=17.5258 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 182840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-662705.5 mean_steps=11.9
|
|
[Episode 182850] reward=-123465387.9 actor_loss=0.3488 critic_loss=86598564522.6667 entropy=17.5215 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 182860] reward=-119166303.5 actor_loss=0.3595 critic_loss=81926929950.1176 entropy=17.5140 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 182860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-607018.9 mean_steps=13.5
|
|
[Episode 182870] reward=-119672766.0 actor_loss=0.2574 critic_loss=81096686145.6410 entropy=17.5268 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 182880] reward=-114352297.6 actor_loss=0.4066 critic_loss=77307273606.0952 entropy=17.5339 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 182880] success_rate=0.800 qp_infeasible_rate=0.200 mean_return=-199355.3 mean_steps=20.0
|
|
[Episode 182890] reward=-119490154.7 actor_loss=0.2750 critic_loss=79107030698.6667 entropy=17.5257 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 182900] reward=-116814715.9 actor_loss=0.4251 critic_loss=80105242965.3333 entropy=17.5312 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 182900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465450.6 mean_steps=15.3
|
|
[Episode 182910] reward=-110815356.0 actor_loss=0.3845 critic_loss=73410619462.6207 entropy=17.5227 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 182920] reward=-122586612.0 actor_loss=0.2240 critic_loss=81948618342.4000 entropy=17.5198 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 182920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489217.2 mean_steps=13.7
|
|
[Episode 182930] reward=-116456794.0 actor_loss=0.4337 critic_loss=83057011916.8000 entropy=17.5179 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 182940] reward=-120958641.6 actor_loss=0.3091 critic_loss=407670410444.8000 entropy=17.5190 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 182940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476130.0 mean_steps=14.5
|
|
[Episode 182950] reward=-114307826.5 actor_loss=0.3471 critic_loss=74408987033.6000 entropy=17.5284 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 182960] reward=-121000679.5 actor_loss=0.3518 critic_loss=86451424334.7692 entropy=17.5364 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 182960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441238.9 mean_steps=14.6
|
|
[Episode 182970] reward=-114265799.8 actor_loss=0.2689 critic_loss=73629975256.1778 entropy=17.5227 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 182980] reward=-115212162.4 actor_loss=0.2634 critic_loss=78018010680.8889 entropy=17.5231 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 182980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528644.2 mean_steps=13.7
|
|
[Episode 182990] reward=-120565738.2 actor_loss=0.2602 critic_loss=86443340498.8235 entropy=17.5215 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 183000] reward=-118123441.0 actor_loss=0.2825 critic_loss=79450672640.0000 entropy=17.5281 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 183000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-520295.7 mean_steps=14.7
|
|
[Episode 183010] reward=-115357343.3 actor_loss=0.2640 critic_loss=73515663815.1111 entropy=17.5368 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 183020] reward=-118016079.6 actor_loss=0.3559 critic_loss=82834916966.4000 entropy=17.5391 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 183020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610707.2 mean_steps=12.4
|
|
[Episode 183030] reward=-118133088.9 actor_loss=0.2285 critic_loss=77436449972.7059 entropy=17.5352 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 183040] reward=-117011231.4 actor_loss=0.2903 critic_loss=84448698740.3636 entropy=17.5226 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 183040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-455317.9 mean_steps=13.8
|
|
[Episode 183050] reward=-119720515.0 actor_loss=0.2589 critic_loss=80170784312.8889 entropy=17.5203 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 183060] reward=-113895495.9 actor_loss=0.3249 critic_loss=85195934234.9474 entropy=17.5144 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 183060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-587139.9 mean_steps=11.9
|
|
[Episode 183070] reward=-123252643.6 actor_loss=0.2438 critic_loss=82436801786.3111 entropy=17.5349 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 183080] reward=-115967335.3 actor_loss=0.3071 critic_loss=75505215351.4667 entropy=17.5474 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 183080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483161.1 mean_steps=13.8
|
|
[Episode 183090] reward=-115652861.1 actor_loss=0.2957 critic_loss=73956192434.0870 entropy=17.5489 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 183100] reward=-112258557.0 actor_loss=0.4096 critic_loss=74620358179.7209 entropy=17.5431 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 183100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571419.3 mean_steps=13.3
|
|
[Episode 183110] reward=-110742752.6 actor_loss=0.3909 critic_loss=76258090461.8667 entropy=17.5411 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 183120] reward=-116860219.8 actor_loss=0.2915 critic_loss=75779431628.8000 entropy=17.5534 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 183120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561156.8 mean_steps=14.4
|
|
[Episode 183130] reward=-117454707.4 actor_loss=0.2782 critic_loss=74391927314.9630 entropy=17.5545 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 183140] reward=-117303536.5 actor_loss=0.2535 critic_loss=80101726208.0000 entropy=17.5362 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 183140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523181.6 mean_steps=14.2
|
|
[Episode 183150] reward=-112991050.1 actor_loss=0.3225 critic_loss=83946841305.2121 entropy=17.5296 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 183160] reward=-118900873.3 actor_loss=0.3776 critic_loss=81203497824.7111 entropy=17.5133 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 183160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584565.6 mean_steps=12.7
|
|
[Episode 183170] reward=-116745806.4 actor_loss=0.3096 critic_loss=79240646780.1212 entropy=17.5115 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 183180] reward=-113653549.9 actor_loss=0.2842 critic_loss=73717810884.9231 entropy=17.5232 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 183180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468898.3 mean_steps=13.6
|
|
[Episode 183190] reward=-110224478.1 actor_loss=0.4354 critic_loss=73988088490.6667 entropy=17.5264 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 183200] reward=-119525935.9 actor_loss=0.2049 critic_loss=80206686890.6667 entropy=17.5305 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 183200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580702.3 mean_steps=13.1
|
|
[Episode 183210] reward=-117558513.2 actor_loss=0.3027 critic_loss=75560854960.3556 entropy=17.5312 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 183220] reward=-116449546.4 actor_loss=0.3075 critic_loss=75210100432.5926 entropy=17.5265 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 183220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430555.0 mean_steps=15.2
|
|
[Episode 183230] reward=-117843489.9 actor_loss=0.3473 critic_loss=81184620800.0000 entropy=17.5277 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 183240] reward=-115045753.3 actor_loss=0.2928 critic_loss=74822617941.3333 entropy=17.5186 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 183240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433684.0 mean_steps=14.6
|
|
[Episode 183250] reward=-115850545.2 actor_loss=0.3441 critic_loss=77109147577.3793 entropy=17.5262 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 183260] reward=-117673323.3 actor_loss=0.2657 critic_loss=81686385198.5455 entropy=17.5231 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 183260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-380111.7 mean_steps=16.6
|
|
[Episode 183270] reward=-121921833.2 actor_loss=0.2243 critic_loss=80807656414.9677 entropy=17.5248 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 183280] reward=-113231732.9 actor_loss=0.2955 critic_loss=73094463860.3636 entropy=17.5335 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 183280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476621.5 mean_steps=14.8
|
|
[Episode 183290] reward=-117883770.4 actor_loss=0.2652 critic_loss=80982131507.2000 entropy=17.5428 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 183300] reward=-118872100.8 actor_loss=0.3039 critic_loss=80847308800.0000 entropy=17.5351 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 183300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-539986.1 mean_steps=14.9
|
|
[Episode 183310] reward=-111355504.8 actor_loss=0.3541 critic_loss=73461890070.7556 entropy=17.5449 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 183320] reward=-121008461.7 actor_loss=0.2165 critic_loss=81661451791.5152 entropy=17.5509 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 183320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467073.4 mean_steps=13.7
|
|
[Episode 183330] reward=-114962582.6 actor_loss=0.2950 critic_loss=76298275203.4595 entropy=17.5496 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 183340] reward=-115342430.6 actor_loss=0.2341 critic_loss=78885116859.7333 entropy=17.5434 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 183340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503663.0 mean_steps=14.8
|
|
[Episode 183350] reward=-123048738.3 actor_loss=0.2400 critic_loss=82504631500.8000 entropy=17.5558 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 183360] reward=-110932685.9 actor_loss=0.2007 critic_loss=76713610854.4000 entropy=17.5478 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 183360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439099.0 mean_steps=14.9
|
|
[Episode 183370] reward=-113990479.4 actor_loss=0.2887 critic_loss=75948692965.0526 entropy=17.5437 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 183380] reward=-118899690.6 actor_loss=0.2383 critic_loss=83120392378.1818 entropy=17.5430 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 183380] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-587507.8 mean_steps=11.8
|
|
[Episode 183390] reward=-115915635.5 actor_loss=0.3164 critic_loss=75938657962.6667 entropy=17.5525 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 183400] reward=-113658175.8 actor_loss=0.2818 critic_loss=72564660321.5238 entropy=17.5649 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 183400] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-651926.3 mean_steps=11.2
|
|
[Episode 183410] reward=-111805821.5 actor_loss=0.2595 critic_loss=72707900633.2121 entropy=17.5624 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 183420] reward=-117175026.7 actor_loss=0.3107 critic_loss=227055206400.0000 entropy=17.5710 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 183420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458456.6 mean_steps=15.6
|
|
[Episode 183430] reward=-119864036.3 actor_loss=0.2918 critic_loss=81054098465.0323 entropy=17.5580 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 183440] reward=-111630975.6 actor_loss=0.3492 critic_loss=71364863193.2121 entropy=17.5667 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 183440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591856.6 mean_steps=13.8
|
|
[Episode 183450] reward=-119942034.0 actor_loss=0.2463 critic_loss=81494964859.5862 entropy=17.5658 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 183460] reward=-113748919.5 actor_loss=0.2715 critic_loss=71893071701.3333 entropy=17.5572 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 183460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566870.4 mean_steps=13.4
|
|
[Episode 183470] reward=-110603907.5 actor_loss=0.3782 critic_loss=74473689352.2581 entropy=17.5595 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 183480] reward=-115141319.9 actor_loss=0.2864 critic_loss=76321622817.3913 entropy=17.5649 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 183480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-409645.2 mean_steps=14.4
|
|
[Episode 183490] reward=-118208329.5 actor_loss=0.3317 critic_loss=83956393210.3111 entropy=17.5634 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 183500] reward=-115367957.5 actor_loss=0.3980 critic_loss=74027462656.0000 entropy=17.5504 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 183500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597443.6 mean_steps=12.8
|
|
[Episode 183510] reward=-118736969.8 actor_loss=0.3381 critic_loss=75515233280.0000 entropy=17.5645 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 183520] reward=-124020775.4 actor_loss=0.2508 critic_loss=88009940081.7778 entropy=17.5550 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 183520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-597727.5 mean_steps=13.6
|
|
[Episode 183530] reward=-117109870.3 actor_loss=0.2951 critic_loss=78191596885.3333 entropy=17.5393 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 183540] reward=-114065749.9 actor_loss=0.2690 critic_loss=74104689459.2000 entropy=17.5429 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 183540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491497.5 mean_steps=13.9
|
|
[Episode 183550] reward=-116948026.0 actor_loss=0.2777 critic_loss=76833131520.0000 entropy=17.5369 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 183560] reward=-113698811.6 actor_loss=0.3497 critic_loss=80043628877.3954 entropy=17.5373 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 183560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497294.5 mean_steps=13.8
|
|
[Episode 183570] reward=-118580605.3 actor_loss=0.3126 critic_loss=79923012858.3111 entropy=17.5476 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 183580] reward=-116285532.2 actor_loss=0.1959 critic_loss=69459886503.7241 entropy=17.5550 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 183580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551343.4 mean_steps=12.3
|
|
[Episode 183590] reward=-118633427.1 actor_loss=0.2493 critic_loss=77540894562.4615 entropy=17.5520 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 183600] reward=-113825213.2 actor_loss=0.3079 critic_loss=77841333725.8667 entropy=17.5558 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 183600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-523248.7 mean_steps=15.9
|
|
[Episode 183610] reward=-112441415.2 actor_loss=0.3153 critic_loss=75803697152.0000 entropy=17.5681 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 183620] reward=-113081033.4 actor_loss=0.3405 critic_loss=76289636459.7895 entropy=17.5702 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 183620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423645.2 mean_steps=15.2
|
|
[Episode 183630] reward=-117365267.7 actor_loss=0.3455 critic_loss=81640646332.6316 entropy=17.5790 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 183640] reward=-117600852.5 actor_loss=0.3433 critic_loss=77154558225.0667 entropy=17.5678 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 183640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-599476.3 mean_steps=13.6
|
|
[Episode 183650] reward=-113420680.8 actor_loss=0.3689 critic_loss=77066923446.8571 entropy=17.5769 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 183660] reward=-114739591.2 actor_loss=0.3162 critic_loss=82977642375.5294 entropy=17.5888 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 183660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422909.3 mean_steps=15.1
|
|
[Episode 183670] reward=-124005011.9 actor_loss=0.2364 critic_loss=85425324359.6800 entropy=17.5685 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 183680] reward=-111803923.5 actor_loss=0.3038 critic_loss=78701553994.3226 entropy=17.5644 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 183680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-385443.7 mean_steps=15.2
|
|
[Episode 183690] reward=-118308631.8 actor_loss=0.3657 critic_loss=82034956970.6667 entropy=17.5569 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 183700] reward=-114181123.1 actor_loss=0.2816 critic_loss=75372006240.7111 entropy=17.5486 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 183700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544134.4 mean_steps=13.8
|
|
[Episode 183710] reward=-114173082.1 actor_loss=0.3825 critic_loss=77353666413.7143 entropy=17.5592 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 183720] reward=-115913108.1 actor_loss=0.2787 critic_loss=77777407180.8000 entropy=17.5699 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 183720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496258.4 mean_steps=13.9
|
|
[Episode 183730] reward=-116881741.8 actor_loss=0.2000 critic_loss=77366963950.9333 entropy=17.5714 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 183740] reward=-118065193.5 actor_loss=0.3922 critic_loss=79437724672.0000 entropy=17.5559 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 183740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-554864.9 mean_steps=14.4
|
|
[Episode 183750] reward=-113049522.5 actor_loss=0.4849 critic_loss=70568644881.0667 entropy=17.5617 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1523 front_blocked=0
|
|
[Episode 183760] reward=-120651521.6 actor_loss=0.2286 critic_loss=79723493083.4286 entropy=17.5493 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 183760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-639383.7 mean_steps=12.4
|
|
[Episode 183770] reward=-122217693.6 actor_loss=0.2397 critic_loss=81037722380.1905 entropy=17.5407 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 183780] reward=-117219282.1 actor_loss=0.3684 critic_loss=80178293164.6512 entropy=17.5415 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 183780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548017.8 mean_steps=14.2
|
|
[Episode 183790] reward=-113416246.8 actor_loss=0.2810 critic_loss=75546544992.7111 entropy=17.5331 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 183800] reward=-115989838.6 actor_loss=0.2730 critic_loss=73778377788.2353 entropy=17.5332 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 183800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571402.7 mean_steps=12.5
|
|
[Episode 183810] reward=-114740887.2 actor_loss=0.3573 critic_loss=75822416630.5185 entropy=17.5262 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 183820] reward=-114500669.2 actor_loss=0.3244 critic_loss=72047006281.1429 entropy=17.5253 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 183820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573455.2 mean_steps=13.3
|
|
[Episode 183830] reward=-113820457.6 actor_loss=0.3148 critic_loss=70102431810.0645 entropy=17.5098 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 183840] reward=-116124506.4 actor_loss=0.3758 critic_loss=98149786841.2121 entropy=17.5171 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 183840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412681.0 mean_steps=15.8
|
|
[Episode 183850] reward=-117971159.3 actor_loss=0.3246 critic_loss=80315426718.4762 entropy=17.5144 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 183860] reward=-118989058.9 actor_loss=0.2746 critic_loss=78600994998.0444 entropy=17.5028 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 183860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409975.5 mean_steps=16.1
|
|
[Episode 183870] reward=-115178951.0 actor_loss=0.3922 critic_loss=75738299338.1053 entropy=17.5096 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 183880] reward=-116348567.1 actor_loss=0.2593 critic_loss=76253740056.9756 entropy=17.5079 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 183880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-625185.5 mean_steps=12.9
|
|
[Episode 183890] reward=-110869287.3 actor_loss=0.3386 critic_loss=72300200521.1429 entropy=17.5092 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 183900] reward=-119782435.9 actor_loss=0.3529 critic_loss=82669352401.4545 entropy=17.5064 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 183900] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-740485.3 mean_steps=11.8
|
|
[Episode 183910] reward=-114595758.4 actor_loss=0.2772 critic_loss=76904279271.2258 entropy=17.5166 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 183920] reward=-122835320.2 actor_loss=0.2041 critic_loss=88719592995.7209 entropy=17.5003 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 183920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542111.2 mean_steps=13.4
|
|
[Episode 183930] reward=-117833140.0 actor_loss=0.3332 critic_loss=77408804955.0222 entropy=17.5322 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 183940] reward=-121612675.6 actor_loss=0.2215 critic_loss=80384885760.0000 entropy=17.5161 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 183940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575761.3 mean_steps=13.4
|
|
[Episode 183950] reward=-122030740.2 actor_loss=0.3210 critic_loss=85190489247.2889 entropy=17.5037 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 183960] reward=-119323646.6 actor_loss=0.3673 critic_loss=83089659611.4286 entropy=17.5098 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 183960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433002.4 mean_steps=15.1
|
|
[Episode 183970] reward=-121439402.4 actor_loss=0.2289 critic_loss=74560184037.5172 entropy=17.5085 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 183980] reward=-120656479.4 actor_loss=0.2920 critic_loss=79698816891.8710 entropy=17.5081 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 183980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432561.7 mean_steps=15.2
|
|
[Episode 183990] reward=-115737624.1 actor_loss=0.3228 critic_loss=74976566804.4800 entropy=17.5065 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 184000] reward=-115914179.1 actor_loss=0.2429 critic_loss=76752398540.8000 entropy=17.5143 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 184000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409643.0 mean_steps=16.2
|
|
[Episode 184010] reward=-125941148.4 actor_loss=0.3039 critic_loss=255365504477.8667 entropy=17.5073 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 184020] reward=-117346207.9 actor_loss=0.3143 critic_loss=75194322053.5652 entropy=17.5005 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 184020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461302.4 mean_steps=14.6
|
|
[Episode 184030] reward=-113435158.5 actor_loss=0.3913 critic_loss=81601906096.3556 entropy=17.5163 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 184040] reward=-121560164.1 actor_loss=0.2929 critic_loss=81318228480.0000 entropy=17.5435 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 184040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364640.4 mean_steps=16.1
|
|
[Episode 184050] reward=-117312741.0 actor_loss=0.3707 critic_loss=80348756878.2222 entropy=17.5502 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 184060] reward=-122070299.1 actor_loss=0.2064 critic_loss=85415526664.2581 entropy=17.5619 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 184060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535000.8 mean_steps=14.2
|
|
[Episode 184070] reward=-125547839.9 actor_loss=0.2565 critic_loss=103500172024.6857 entropy=17.5463 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 184080] reward=-115080219.0 actor_loss=0.3054 critic_loss=80765524559.6444 entropy=17.5439 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 184080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434687.7 mean_steps=15.1
|
|
[Episode 184090] reward=-124148697.2 actor_loss=0.2856 critic_loss=83704224153.6000 entropy=17.5051 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 184100] reward=-120546704.0 actor_loss=0.3181 critic_loss=83321579656.5333 entropy=17.4958 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 184100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576662.5 mean_steps=14.0
|
|
[Episode 184110] reward=-119002643.7 actor_loss=0.3382 critic_loss=85522576725.3333 entropy=17.4978 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 184120] reward=-118737943.8 actor_loss=0.3102 critic_loss=85073161966.9333 entropy=17.5066 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 184120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-415325.5 mean_steps=15.2
|
|
[Episode 184130] reward=-120754464.5 actor_loss=0.3145 critic_loss=78683716539.7333 entropy=17.4913 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 184140] reward=-119142520.8 actor_loss=0.3753 critic_loss=81015931843.7647 entropy=17.4917 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 184140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-513213.6 mean_steps=15.2
|
|
[Episode 184150] reward=-114023454.5 actor_loss=0.3673 critic_loss=81206606597.6889 entropy=17.4908 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 184160] reward=-111453616.1 actor_loss=0.3020 critic_loss=77936351165.9355 entropy=17.4918 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 184160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557964.4 mean_steps=12.2
|
|
[Episode 184170] reward=-118560129.3 actor_loss=0.2342 critic_loss=80180922305.9394 entropy=17.4909 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 184180] reward=-115478838.7 actor_loss=0.4139 critic_loss=77311436438.5882 entropy=17.4823 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 184180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-652788.5 mean_steps=12.9
|
|
[Episode 184190] reward=-122513007.4 actor_loss=0.2703 critic_loss=83687693042.5263 entropy=17.4924 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 184200] reward=-118514142.6 actor_loss=0.2932 critic_loss=84530658048.0000 entropy=17.4881 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 184200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469529.0 mean_steps=13.8
|
|
[Episode 184210] reward=-120894795.6 actor_loss=0.3141 critic_loss=77934717849.6000 entropy=17.4874 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 184220] reward=-110908750.5 actor_loss=0.4049 critic_loss=76599469360.4324 entropy=17.4840 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 184220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-529226.5 mean_steps=14.9
|
|
[Episode 184230] reward=-119894475.5 actor_loss=0.3820 critic_loss=77352091283.9111 entropy=17.4965 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 184240] reward=-117838221.0 actor_loss=0.3387 critic_loss=74805812565.3333 entropy=17.4821 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 184240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559790.4 mean_steps=12.6
|
|
[Episode 184250] reward=-120205458.8 actor_loss=0.3063 critic_loss=82552853014.2609 entropy=17.4654 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 184260] reward=-117084756.8 actor_loss=0.3423 critic_loss=76634112182.0444 entropy=17.4742 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 184260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494695.4 mean_steps=15.1
|
|
[Episode 184270] reward=-119059526.4 actor_loss=0.3182 critic_loss=85048680652.8000 entropy=17.4829 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 184280] reward=-119039815.2 actor_loss=0.3457 critic_loss=99165378470.9565 entropy=17.4871 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 184280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545730.3 mean_steps=14.3
|
|
[Episode 184290] reward=-114423502.9 actor_loss=0.3170 critic_loss=80474834224.4324 entropy=17.4985 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 184300] reward=-120945101.6 actor_loss=0.3148 critic_loss=83204055768.1778 entropy=17.5020 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 184300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462678.6 mean_steps=13.7
|
|
[Episode 184310] reward=-122258218.7 actor_loss=0.2450 critic_loss=82594384659.6923 entropy=17.5006 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 184320] reward=-119575621.4 actor_loss=0.2721 critic_loss=85160362287.4074 entropy=17.5141 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 184320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419183.3 mean_steps=15.4
|
|
[Episode 184330] reward=-123771855.4 actor_loss=0.3792 critic_loss=134314219110.4000 entropy=17.5216 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 184340] reward=-114943735.6 actor_loss=0.3553 critic_loss=78813151010.5946 entropy=17.5452 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 184340] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-832384.5 mean_steps=10.4
|
|
[Episode 184350] reward=-115370585.1 actor_loss=0.4311 critic_loss=80346773827.3684 entropy=17.5496 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 184360] reward=-121376165.2 actor_loss=0.3563 critic_loss=137818466683.2592 entropy=17.5489 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 184360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542333.9 mean_steps=14.3
|
|
[Episode 184370] reward=-120441937.1 actor_loss=0.3805 critic_loss=146907887021.4193 entropy=17.5454 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 184380] reward=-126649918.0 actor_loss=0.2012 critic_loss=423772281605.6889 entropy=17.5406 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 184380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389317.8 mean_steps=16.0
|
|
[Episode 184390] reward=-122125333.2 actor_loss=0.3213 critic_loss=144582109985.3913 entropy=17.5528 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 184400] reward=-119697035.8 actor_loss=0.3475 critic_loss=117997822407.1111 entropy=17.5701 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 184400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-525185.5 mean_steps=15.0
|
|
[Episode 184410] reward=-122156858.5 actor_loss=0.3089 critic_loss=81807198344.5333 entropy=17.5573 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 184420] reward=-121108412.2 actor_loss=0.2051 critic_loss=90624858219.7895 entropy=17.5540 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 184420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501537.7 mean_steps=13.7
|
|
[Episode 184430] reward=-124085781.7 actor_loss=0.2720 critic_loss=240860191446.7097 entropy=17.5470 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 184440] reward=-115083035.5 actor_loss=0.3250 critic_loss=75740566505.2444 entropy=17.5395 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 184440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511252.3 mean_steps=13.4
|
|
[Episode 184450] reward=-113147686.2 actor_loss=0.3072 critic_loss=73649822196.6222 entropy=17.5349 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 184460] reward=-122148428.6 actor_loss=0.3592 critic_loss=83187179962.8108 entropy=17.5499 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 184460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547811.2 mean_steps=14.2
|
|
[Episode 184470] reward=-113463135.2 actor_loss=0.4077 critic_loss=78458770265.3023 entropy=17.5422 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 184480] reward=-123743504.3 actor_loss=0.2361 critic_loss=88430961487.4483 entropy=17.5462 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 184480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550021.8 mean_steps=12.5
|
|
[Episode 184490] reward=-113838345.7 actor_loss=0.2529 critic_loss=75697495595.8857 entropy=17.5464 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 184500] reward=-116263681.5 actor_loss=0.2950 critic_loss=77958052819.4783 entropy=17.5384 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 184500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-605813.3 mean_steps=13.6
|
|
[Episode 184510] reward=-119545749.4 actor_loss=0.2420 critic_loss=86869126050.9091 entropy=17.5530 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 184520] reward=-120534098.7 actor_loss=0.2463 critic_loss=88945247118.2222 entropy=17.5575 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 184520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479367.1 mean_steps=14.6
|
|
[Episode 184530] reward=-119133480.7 actor_loss=0.2795 critic_loss=84510975937.9394 entropy=17.5527 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 184540] reward=-117736685.1 actor_loss=0.3786 critic_loss=77577158407.7576 entropy=17.5536 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 184540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394665.3 mean_steps=16.1
|
|
[Episode 184550] reward=-117006361.4 actor_loss=0.2670 critic_loss=84664116370.2857 entropy=17.5519 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 184560] reward=-123705763.0 actor_loss=0.2458 critic_loss=95389406061.7143 entropy=17.5497 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 184560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-356440.8 mean_steps=15.6
|
|
[Episode 184570] reward=-117775641.2 actor_loss=0.2580 critic_loss=93576620605.4400 entropy=17.5410 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 184580] reward=-107302061.7 actor_loss=0.3361 critic_loss=73052151147.3548 entropy=17.5437 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 184580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452417.1 mean_steps=14.8
|
|
[Episode 184590] reward=-117831694.4 actor_loss=0.3034 critic_loss=91077165338.4828 entropy=17.5528 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 184600] reward=-118611082.6 actor_loss=0.3873 critic_loss=90151152403.6923 entropy=17.5483 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 184600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612831.0 mean_steps=12.8
|
|
[Episode 184610] reward=-118111338.7 actor_loss=0.2606 critic_loss=75251738781.5385 entropy=17.5425 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 184620] reward=-113092357.3 actor_loss=0.3717 critic_loss=79138657765.0526 entropy=17.5467 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 184620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563051.1 mean_steps=13.5
|
|
[Episode 184630] reward=-116136523.4 actor_loss=0.2520 critic_loss=78578128798.4762 entropy=17.5503 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 184640] reward=-122417088.0 actor_loss=0.2631 critic_loss=87787056176.7619 entropy=17.5387 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 184640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480210.4 mean_steps=14.7
|
|
[Episode 184650] reward=-114900947.0 actor_loss=0.3188 critic_loss=77630450892.8000 entropy=17.5459 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 184660] reward=-113870792.9 actor_loss=0.2878 critic_loss=76283066122.2400 entropy=17.5583 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 184660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469720.8 mean_steps=14.7
|
|
[Episode 184670] reward=-117233204.7 actor_loss=0.3917 critic_loss=77620982670.2222 entropy=17.5686 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 184680] reward=-118370223.8 actor_loss=0.3793 critic_loss=79111087640.3810 entropy=17.5776 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 184680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485129.3 mean_steps=13.8
|
|
[Episode 184690] reward=-117353127.4 actor_loss=0.2935 critic_loss=76358735341.0370 entropy=17.5607 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 184700] reward=-118775334.2 actor_loss=0.3378 critic_loss=81823024345.2121 entropy=17.5568 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 184700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424868.0 mean_steps=15.3
|
|
[Episode 184710] reward=-118363751.2 actor_loss=0.1888 critic_loss=82026702555.4286 entropy=17.5431 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 184720] reward=-117358617.6 actor_loss=0.3246 critic_loss=118222361395.2000 entropy=17.5390 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 184720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579489.6 mean_steps=12.6
|
|
[Episode 184730] reward=-116513081.3 actor_loss=0.3954 critic_loss=78356432985.0435 entropy=17.5596 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 184740] reward=-118735620.4 actor_loss=0.3055 critic_loss=82782995294.3158 entropy=17.5452 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 184740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412003.4 mean_steps=15.2
|
|
[Episode 184750] reward=-116775500.7 actor_loss=0.3508 critic_loss=81950133881.9048 entropy=17.5362 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 184760] reward=-124945453.5 actor_loss=0.1974 critic_loss=97928003584.0000 entropy=17.5311 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 184760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440925.5 mean_steps=15.1
|
|
[Episode 184770] reward=-122181865.7 actor_loss=0.3330 critic_loss=98990639513.6000 entropy=17.5328 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 184780] reward=-116905123.3 actor_loss=0.2647 critic_loss=85684584448.0000 entropy=17.5372 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 184780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515610.0 mean_steps=13.1
|
|
[Episode 184790] reward=-123174147.1 actor_loss=0.2880 critic_loss=213349041584.3556 entropy=17.5458 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 184800] reward=-117024918.4 actor_loss=0.3876 critic_loss=97010245632.0000 entropy=17.5505 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 184800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420811.9 mean_steps=15.7
|
|
[Episode 184810] reward=-119022964.1 actor_loss=0.3859 critic_loss=104958873915.0769 entropy=17.5442 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 184820] reward=-120664564.4 actor_loss=0.2240 critic_loss=102171945432.6154 entropy=17.5403 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 184820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483650.8 mean_steps=16.1
|
|
[Episode 184830] reward=-116864934.9 actor_loss=0.3287 critic_loss=134614743226.1818 entropy=17.5374 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 184840] reward=-118404663.6 actor_loss=0.2737 critic_loss=84280650885.5652 entropy=17.5563 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 184840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445513.8 mean_steps=14.6
|
|
[Episode 184850] reward=-126538637.6 actor_loss=0.3348 critic_loss=232683054226.2857 entropy=17.5566 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 184860] reward=-111318644.6 actor_loss=0.3571 critic_loss=73789068447.2889 entropy=17.5731 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 184860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437039.1 mean_steps=14.6
|
|
[Episode 184870] reward=-117226967.4 actor_loss=0.3501 critic_loss=241362040832.0000 entropy=17.5857 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 184880] reward=-118514851.3 actor_loss=0.2476 critic_loss=88308702321.7778 entropy=17.5844 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 184880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-633393.5 mean_steps=12.9
|
|
[Episode 184890] reward=-109920107.4 actor_loss=0.4098 critic_loss=71379303655.2258 entropy=17.5699 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 184900] reward=-123393649.9 actor_loss=0.2541 critic_loss=78432073675.4872 entropy=17.5512 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 184900] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-300659.8 mean_steps=17.4
|
|
[Episode 184910] reward=-115695505.3 actor_loss=0.3222 critic_loss=80443996101.4857 entropy=17.5247 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 184920] reward=-114325540.8 actor_loss=0.3100 critic_loss=83253581771.4872 entropy=17.5384 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 184920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510740.2 mean_steps=13.2
|
|
[Episode 184930] reward=-117728259.9 actor_loss=0.2710 critic_loss=75402833704.4211 entropy=17.5363 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 184940] reward=-118486637.9 actor_loss=0.2067 critic_loss=77327890022.4000 entropy=17.5585 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 184940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391093.1 mean_steps=15.9
|
|
[Episode 184950] reward=-119447039.4 actor_loss=0.2577 critic_loss=80290005760.0000 entropy=17.5631 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 184960] reward=-114601701.6 actor_loss=0.2611 critic_loss=74559156315.0222 entropy=17.5617 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 184960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590018.8 mean_steps=12.7
|
|
[Episode 184970] reward=-114352135.4 actor_loss=0.3832 critic_loss=77326323985.0667 entropy=17.5645 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 184980] reward=-121547830.6 actor_loss=0.3937 critic_loss=110459440090.0741 entropy=17.5625 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 184980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348247.3 mean_steps=16.8
|
|
[Episode 184990] reward=-396201676.0 actor_loss=0.2448 critic_loss=234121949674063.6562 entropy=17.5802 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 185000] reward=-117420625.0 actor_loss=0.3281 critic_loss=99970632362.6667 entropy=17.5863 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 185000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461479.6 mean_steps=13.8
|
|
[Episode 185010] reward=-122277817.5 actor_loss=0.2836 critic_loss=182447072177.2308 entropy=17.5806 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 185020] reward=-117294598.3 actor_loss=0.2935 critic_loss=85450272604.1600 entropy=17.5820 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 185020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575937.7 mean_steps=13.7
|
|
[Episode 185030] reward=-121531391.8 actor_loss=0.3375 critic_loss=85224176935.8222 entropy=17.5865 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 185040] reward=-119746526.3 actor_loss=0.3402 critic_loss=96175792518.0952 entropy=17.5754 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 185040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484488.6 mean_steps=13.8
|
|
[Episode 185050] reward=-114827148.2 actor_loss=0.2570 critic_loss=88236235889.7778 entropy=17.5838 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 185060] reward=-112998698.3 actor_loss=0.4012 critic_loss=83952077917.0909 entropy=17.5929 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 185060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465398.9 mean_steps=15.7
|
|
[Episode 185070] reward=-114972051.6 actor_loss=0.3155 critic_loss=80971503160.8889 entropy=17.5944 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 185080] reward=-118246569.1 actor_loss=0.3217 critic_loss=87567115729.4545 entropy=17.6012 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 185080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469782.7 mean_steps=14.1
|
|
[Episode 185090] reward=-114898429.5 actor_loss=0.3949 critic_loss=111144128401.2973 entropy=17.5954 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 185100] reward=-116754529.4 actor_loss=0.3202 critic_loss=80800462772.1481 entropy=17.5998 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 185100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-616288.9 mean_steps=12.1
|
|
[Episode 185110] reward=-119330938.8 actor_loss=0.2666 critic_loss=98082509437.1555 entropy=17.5972 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 185120] reward=-121861415.3 actor_loss=0.3684 critic_loss=97135347848.5333 entropy=17.5995 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 185120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462021.6 mean_steps=13.7
|
|
[Episode 185130] reward=-142864981.0 actor_loss=0.2205 critic_loss=1740605953861.8181 entropy=17.6080 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 185140] reward=-118093471.0 actor_loss=0.3510 critic_loss=99361194299.0769 entropy=17.6075 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 185140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507116.1 mean_steps=13.9
|
|
[Episode 185150] reward=-114244000.9 actor_loss=0.4130 critic_loss=77237680640.0000 entropy=17.6081 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 185160] reward=-118729811.7 actor_loss=0.2974 critic_loss=85124147579.2593 entropy=17.6099 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 185160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547409.2 mean_steps=13.2
|
|
[Episode 185170] reward=-114301575.6 actor_loss=0.2819 critic_loss=74782476834.1333 entropy=17.6194 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 185180] reward=-120820609.4 actor_loss=0.2631 critic_loss=83346290688.0000 entropy=17.6215 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 185180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-602410.4 mean_steps=13.8
|
|
[Episode 185190] reward=-114407823.3 actor_loss=0.3164 critic_loss=83042805229.0370 entropy=17.6079 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 185200] reward=-118489962.2 actor_loss=0.2761 critic_loss=79481359564.8000 entropy=17.6063 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 185200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465597.1 mean_steps=15.6
|
|
[Episode 185210] reward=-119615014.0 actor_loss=0.2387 critic_loss=76292106989.2683 entropy=17.6123 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 185220] reward=-117709092.8 actor_loss=0.2918 critic_loss=88069832118.8571 entropy=17.6066 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 185220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511917.8 mean_steps=14.2
|
|
[Episode 185230] reward=-123803072.9 actor_loss=0.1515 critic_loss=84716317997.1765 entropy=17.6093 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 185240] reward=-113611872.1 actor_loss=0.4072 critic_loss=80692990814.3158 entropy=17.6068 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 185240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-503891.3 mean_steps=15.7
|
|
[Episode 185250] reward=-116653340.8 actor_loss=0.2126 critic_loss=129350220653.7143 entropy=17.6102 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 185260] reward=-118248772.5 actor_loss=0.3742 critic_loss=86374407827.9111 entropy=17.6029 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 185260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551025.7 mean_steps=13.4
|
|
[Episode 185270] reward=-121967566.4 actor_loss=0.3298 critic_loss=91803144960.0000 entropy=17.6122 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 185280] reward=-121424313.0 actor_loss=0.4077 critic_loss=97161918171.4286 entropy=17.6095 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 185280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440073.0 mean_steps=14.2
|
|
[Episode 185290] reward=-121677059.3 actor_loss=0.2814 critic_loss=83356186214.4000 entropy=17.6124 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 185300] reward=-120156569.3 actor_loss=0.3086 critic_loss=154801304429.7143 entropy=17.6134 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 185300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409019.2 mean_steps=14.8
|
|
[Episode 185310] reward=-117117085.5 actor_loss=0.3773 critic_loss=100650304059.5349 entropy=17.6147 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 185320] reward=-117342804.1 actor_loss=0.2743 critic_loss=82450066016.8649 entropy=17.5954 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 185320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-585933.8 mean_steps=13.7
|
|
[Episode 185330] reward=-117283557.6 actor_loss=0.3306 critic_loss=77386884437.3333 entropy=17.6104 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 185340] reward=-117548987.7 actor_loss=0.3291 critic_loss=92204936396.8000 entropy=17.6234 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 185340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580880.7 mean_steps=12.8
|
|
[Episode 185350] reward=-117897433.1 actor_loss=0.3090 critic_loss=73080374665.8462 entropy=17.6247 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 185360] reward=-127346769.7 actor_loss=0.2892 critic_loss=599810829577.4814 entropy=17.6287 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 185360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414308.5 mean_steps=15.2
|
|
[Episode 185370] reward=-128824170.9 actor_loss=0.3015 critic_loss=386430024947.8095 entropy=17.6398 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 185380] reward=-112470132.2 actor_loss=0.3562 critic_loss=143625924483.8788 entropy=17.6367 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 185380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477143.2 mean_steps=14.8
|
|
[Episode 185390] reward=-119202681.5 actor_loss=0.3590 critic_loss=120587178439.1111 entropy=17.6501 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 185400] reward=-122517971.1 actor_loss=0.2382 critic_loss=82361827692.0889 entropy=17.6489 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 185400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536905.9 mean_steps=13.4
|
|
[Episode 185410] reward=-119231652.4 actor_loss=0.3264 critic_loss=113643697408.0000 entropy=17.6435 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 185420] reward=-129507933.9 actor_loss=0.2872 critic_loss=124623555584.0000 entropy=17.6418 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 185420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566657.5 mean_steps=13.3
|
|
[Episode 185430] reward=-118117464.1 actor_loss=0.2723 critic_loss=79450313426.8235 entropy=17.6365 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 185440] reward=-119859640.6 actor_loss=0.2680 critic_loss=80844027904.0000 entropy=17.6434 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 185440] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-300976.9 mean_steps=17.1
|
|
[Episode 185450] reward=-116677352.8 actor_loss=0.3438 critic_loss=76261743664.7619 entropy=17.6430 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 185460] reward=-124204222.8 actor_loss=0.2685 critic_loss=106223114132.2105 entropy=17.6518 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 185460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549536.5 mean_steps=14.1
|
|
[Episode 185470] reward=-119252288.8 actor_loss=0.2795 critic_loss=81148358896.9412 entropy=17.6506 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 185480] reward=-116883871.9 actor_loss=0.2165 critic_loss=105353427353.6000 entropy=17.6572 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 185480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458089.9 mean_steps=14.6
|
|
[Episode 185490] reward=-117841469.9 actor_loss=0.3411 critic_loss=83750342283.6364 entropy=17.6502 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 185500] reward=-114221395.5 actor_loss=0.2923 critic_loss=78542003116.9730 entropy=17.6483 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 185500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-481061.1 mean_steps=15.7
|
|
[Episode 185510] reward=-116643604.0 actor_loss=0.3216 critic_loss=78904089600.0000 entropy=17.6563 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 185520] reward=-120093462.0 actor_loss=0.1999 critic_loss=84297875887.1579 entropy=17.6414 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 185520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-597948.9 mean_steps=13.7
|
|
[Episode 185530] reward=-117651807.7 actor_loss=0.3421 critic_loss=79106912347.0222 entropy=17.6217 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 185540] reward=-118586927.3 actor_loss=0.3468 critic_loss=87199459828.6222 entropy=17.6211 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 185540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-494551.0 mean_steps=16.1
|
|
[Episode 185550] reward=-120922099.1 actor_loss=0.2318 critic_loss=159264024937.4118 entropy=17.6264 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 185560] reward=-116358734.5 actor_loss=0.2339 critic_loss=73942627917.5758 entropy=17.6148 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 185560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570140.2 mean_steps=12.4
|
|
[Episode 185570] reward=-113380116.3 actor_loss=0.3096 critic_loss=154420824498.4243 entropy=17.6141 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 185580] reward=-115685246.2 actor_loss=0.2347 critic_loss=76379397895.7576 entropy=17.6135 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 185580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462930.9 mean_steps=14.6
|
|
[Episode 185590] reward=-115329596.7 actor_loss=0.3273 critic_loss=84454454613.3333 entropy=17.6142 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 185600] reward=-115154727.6 actor_loss=0.2593 critic_loss=76752994918.4000 entropy=17.6026 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 185600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379769.1 mean_steps=16.1
|
|
[Episode 185610] reward=-120909272.5 actor_loss=0.2725 critic_loss=76952138262.2609 entropy=17.5608 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 185620] reward=-120815625.6 actor_loss=0.3081 critic_loss=79666367891.3939 entropy=17.5694 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 185620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477650.7 mean_steps=14.2
|
|
[Episode 185630] reward=-118041402.9 actor_loss=0.3582 critic_loss=72748933120.0000 entropy=17.5643 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 185640] reward=-114569871.1 actor_loss=0.2861 critic_loss=72017052922.3111 entropy=17.5746 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 185640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436731.8 mean_steps=15.6
|
|
[Episode 185650] reward=-119155782.6 actor_loss=0.2480 critic_loss=76022797956.7407 entropy=17.5679 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 185660] reward=-122181700.5 actor_loss=0.1478 critic_loss=80687403125.0286 entropy=17.5749 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 185660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-619881.0 mean_steps=12.1
|
|
[Episode 185670] reward=-118475009.0 actor_loss=0.2057 critic_loss=82141899337.1429 entropy=17.5722 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 185680] reward=-124185443.1 actor_loss=0.2345 critic_loss=83431872132.7407 entropy=17.5721 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 185680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-489859.7 mean_steps=12.8
|
|
[Episode 185690] reward=-115867025.9 actor_loss=0.3695 critic_loss=78514675384.3200 entropy=17.5742 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 185700] reward=-114448659.7 actor_loss=0.3154 critic_loss=74546231751.1111 entropy=17.5696 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 185700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-336873.8 mean_steps=16.7
|
|
[Episode 185710] reward=-117090991.6 actor_loss=0.2639 critic_loss=81851310638.5455 entropy=17.5654 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 185720] reward=-113946406.9 actor_loss=0.3362 critic_loss=78621477546.6667 entropy=17.5785 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 185720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458485.5 mean_steps=14.6
|
|
[Episode 185730] reward=-116556048.1 actor_loss=0.3494 critic_loss=74690070528.0000 entropy=17.5701 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 185740] reward=-3104115618.3 actor_loss=0.3257 critic_loss=28431819495265620.0000 entropy=17.5879 approx_kl=-0.0017 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 185740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569782.2 mean_steps=12.6
|
|
[Episode 185750] reward=-124183128.5 actor_loss=0.2585 critic_loss=84000412794.8800 entropy=17.6024 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 185760] reward=-112209306.5 actor_loss=0.3581 critic_loss=72831602915.5556 entropy=17.5986 approx_kl=0.0045 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 185760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546918.2 mean_steps=14.2
|
|
[Episode 185770] reward=-115166770.2 actor_loss=0.2626 critic_loss=77073949081.6000 entropy=17.5936 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 185780] reward=-110160952.1 actor_loss=0.4679 critic_loss=81423185627.4286 entropy=17.5878 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 185780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479229.9 mean_steps=14.7
|
|
[Episode 185790] reward=-113936134.6 actor_loss=0.3424 critic_loss=77475588388.5714 entropy=17.5863 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 185800] reward=-117630699.6 actor_loss=0.3885 critic_loss=94726812294.7368 entropy=17.5837 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 185800] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-265677.2 mean_steps=17.1
|
|
[Episode 185810] reward=-116513230.8 actor_loss=0.3403 critic_loss=80859476340.3636 entropy=17.5815 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 185820] reward=-120345194.4 actor_loss=0.1789 critic_loss=89787256246.8571 entropy=17.5786 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 185820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482412.1 mean_steps=13.8
|
|
[Episode 185830] reward=-115963632.6 actor_loss=0.2762 critic_loss=77751211885.7143 entropy=17.5655 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 185840] reward=-117618991.9 actor_loss=0.3237 critic_loss=79802855175.7576 entropy=17.5654 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 185840] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-305012.4 mean_steps=17.1
|
|
[Episode 185850] reward=-108908820.8 actor_loss=0.3212 critic_loss=77142645089.1035 entropy=17.5673 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 185860] reward=-119288205.8 actor_loss=0.2495 critic_loss=84088263518.3158 entropy=17.5819 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 185860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530356.3 mean_steps=13.9
|
|
[Episode 185870] reward=-116605062.6 actor_loss=0.2895 critic_loss=76799468001.8824 entropy=17.5804 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 185880] reward=-120123908.8 actor_loss=0.2365 critic_loss=76449031314.2857 entropy=17.5623 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 185880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555619.7 mean_steps=12.2
|
|
[Episode 185890] reward=-119456005.4 actor_loss=0.3005 critic_loss=86554881228.8000 entropy=17.5818 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 185900] reward=-115845487.4 actor_loss=0.3061 critic_loss=85803868160.0000 entropy=17.5882 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 185900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447826.4 mean_steps=15.3
|
|
[Episode 185910] reward=-120740050.8 actor_loss=0.2735 critic_loss=77239077546.6667 entropy=17.5903 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 185920] reward=-111791180.2 actor_loss=0.3827 critic_loss=73137974528.0000 entropy=17.5926 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 185920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566327.9 mean_steps=13.3
|
|
[Episode 185930] reward=-119588851.6 actor_loss=0.2658 critic_loss=80633581443.8788 entropy=17.5961 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 185940] reward=-122211842.3 actor_loss=0.2749 critic_loss=399177456025.6000 entropy=17.5873 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 185940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632783.3 mean_steps=11.8
|
|
[Episode 185950] reward=-113965567.8 actor_loss=0.2415 critic_loss=74340356642.1333 entropy=17.5958 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 185960] reward=-119785426.6 actor_loss=0.2924 critic_loss=83651785841.7778 entropy=17.5957 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 185960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-358412.5 mean_steps=16.9
|
|
[Episode 185970] reward=-117568165.1 actor_loss=0.4092 critic_loss=81041665148.8781 entropy=17.5952 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 185980] reward=-119331916.5 actor_loss=0.3156 critic_loss=78796147712.0000 entropy=17.5914 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 185980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518787.6 mean_steps=14.4
|
|
[Episode 185990] reward=-116893149.6 actor_loss=0.1700 critic_loss=78456692004.5714 entropy=17.5893 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 186000] reward=-123556520.5 actor_loss=0.3581 critic_loss=85259102617.6000 entropy=17.6049 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 186000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481365.7 mean_steps=14.8
|
|
[Episode 186010] reward=-117604839.5 actor_loss=0.3011 critic_loss=79242888533.3333 entropy=17.6136 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 186020] reward=-117669122.5 actor_loss=0.4320 critic_loss=80811600117.7600 entropy=17.6112 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1517 front_blocked=0
|
|
[Eval 186020] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-729178.5 mean_steps=11.6
|
|
[Episode 186030] reward=-125538334.6 actor_loss=0.3625 critic_loss=352668783957.3333 entropy=17.6138 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 186040] reward=-117897018.0 actor_loss=0.2917 critic_loss=79384163906.7826 entropy=17.6006 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 186040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529169.0 mean_steps=13.4
|
|
[Episode 186050] reward=-117002900.7 actor_loss=0.2696 critic_loss=75275166134.8571 entropy=17.6114 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 186060] reward=-119504125.9 actor_loss=0.3343 critic_loss=119123715011.7647 entropy=17.6199 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 186060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-500985.9 mean_steps=15.9
|
|
[Episode 186070] reward=-121334959.1 actor_loss=0.1799 critic_loss=78264377344.0000 entropy=17.6137 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 186080] reward=-117655663.4 actor_loss=0.3106 critic_loss=83934038152.5333 entropy=17.6118 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 186080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409882.5 mean_steps=15.4
|
|
[Episode 186090] reward=-112698078.6 actor_loss=0.3812 critic_loss=93054021200.8421 entropy=17.6078 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 186100] reward=-123399325.4 actor_loss=0.2530 critic_loss=85673216341.3333 entropy=17.6023 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 186100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386691.1 mean_steps=16.0
|
|
[Episode 186110] reward=-118117462.8 actor_loss=0.3153 critic_loss=85581092181.3333 entropy=17.6075 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 186120] reward=-122552656.4 actor_loss=0.1959 critic_loss=145803966464.0000 entropy=17.6149 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 186120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641634.0 mean_steps=11.9
|
|
[Episode 186130] reward=-115637772.8 actor_loss=0.3676 critic_loss=83428168438.5185 entropy=17.5986 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 186140] reward=-117723877.9 actor_loss=0.2794 critic_loss=84657264981.3333 entropy=17.5946 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 186140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-652325.0 mean_steps=12.9
|
|
[Episode 186150] reward=-111971662.0 actor_loss=0.3472 critic_loss=71011675171.3103 entropy=17.5827 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 186160] reward=-118914364.7 actor_loss=0.2962 critic_loss=90758468403.2000 entropy=17.5793 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 186160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-301460.7 mean_steps=16.2
|
|
[Episode 186170] reward=-121233522.9 actor_loss=0.3567 critic_loss=82150227626.6667 entropy=17.5538 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 186180] reward=-123195520.3 actor_loss=0.2012 critic_loss=95969109333.3333 entropy=17.5384 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 186180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599519.0 mean_steps=12.6
|
|
[Episode 186190] reward=-118347189.9 actor_loss=0.2833 critic_loss=85150706158.3448 entropy=17.5441 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 186200] reward=-114877973.0 actor_loss=0.3516 critic_loss=72270552019.4783 entropy=17.5261 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 186200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-612141.4 mean_steps=14.7
|
|
[Episode 186210] reward=-119513444.6 actor_loss=0.2723 critic_loss=78834148175.4483 entropy=17.5321 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 186220] reward=-119738155.8 actor_loss=0.3168 critic_loss=81366566791.5294 entropy=17.5339 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 186220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444412.2 mean_steps=14.8
|
|
[Episode 186230] reward=-118954542.6 actor_loss=0.2625 critic_loss=142934726923.1304 entropy=17.5215 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 186240] reward=-115761238.7 actor_loss=0.3171 critic_loss=73389708051.6923 entropy=17.5159 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 186240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495040.6 mean_steps=15.3
|
|
[Episode 186250] reward=-117980226.8 actor_loss=0.4116 critic_loss=78312451465.8462 entropy=17.5155 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 186260] reward=-133157627.9 actor_loss=0.4126 critic_loss=1638142614921.8462 entropy=17.5346 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 186260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469573.3 mean_steps=13.7
|
|
[Episode 186270] reward=-118576728.3 actor_loss=0.3976 critic_loss=100673788988.2353 entropy=17.5451 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 186280] reward=-119039059.8 actor_loss=0.2171 critic_loss=156394195502.5454 entropy=17.5437 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 186280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-414663.6 mean_steps=17.3
|
|
[Episode 186290] reward=-156157683.7 actor_loss=0.2618 critic_loss=6347352528213.3330 entropy=17.5396 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 186300] reward=-118024625.0 actor_loss=0.2778 critic_loss=82147587501.4194 entropy=17.5453 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 186300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607744.8 mean_steps=12.8
|
|
[Episode 186310] reward=-118404513.5 actor_loss=0.3324 critic_loss=75588761190.4000 entropy=17.5594 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 186320] reward=-117581367.3 actor_loss=0.3050 critic_loss=82733386787.3103 entropy=17.5572 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 186320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417584.2 mean_steps=15.4
|
|
[Episode 186330] reward=-116229694.7 actor_loss=0.1566 critic_loss=89473837501.2174 entropy=17.5703 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 186340] reward=-120365779.2 actor_loss=0.2812 critic_loss=75916606048.8649 entropy=17.5790 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 186340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439361.0 mean_steps=14.5
|
|
[Episode 186350] reward=-118993732.1 actor_loss=0.2589 critic_loss=81245587228.4444 entropy=17.5733 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 186360] reward=-121015489.5 actor_loss=0.1991 critic_loss=93908721264.3902 entropy=17.5765 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 186360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536444.3 mean_steps=13.4
|
|
[Episode 186370] reward=-119831233.5 actor_loss=0.2423 critic_loss=84984532016.7619 entropy=17.5881 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 186380] reward=-119597903.5 actor_loss=0.1992 critic_loss=77258103921.7778 entropy=17.5973 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 186380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-538704.7 mean_steps=15.6
|
|
[Episode 186390] reward=-125158585.2 actor_loss=0.2801 critic_loss=99876025685.3333 entropy=17.6023 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 186400] reward=-114313154.0 actor_loss=0.2270 critic_loss=83509749917.5385 entropy=17.5878 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 186400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594513.4 mean_steps=12.6
|
|
[Episode 186410] reward=-114286929.6 actor_loss=0.3446 critic_loss=86685951590.4000 entropy=17.6027 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 186420] reward=-117378518.9 actor_loss=0.3172 critic_loss=77738186979.5556 entropy=17.6053 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 186420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471514.4 mean_steps=14.4
|
|
[Episode 186430] reward=-121184765.1 actor_loss=0.3062 critic_loss=79777666748.6316 entropy=17.6026 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 186440] reward=-118486739.0 actor_loss=0.2649 critic_loss=77736600826.3111 entropy=17.6008 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 186440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479733.6 mean_steps=15.0
|
|
[Episode 186450] reward=-120955879.0 actor_loss=0.3164 critic_loss=151277732336.4849 entropy=17.6013 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 186460] reward=-112452741.8 actor_loss=0.3221 critic_loss=79478349095.8222 entropy=17.5846 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 186460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542538.9 mean_steps=13.7
|
|
[Episode 186470] reward=-118111056.9 actor_loss=0.2112 critic_loss=78303173327.5676 entropy=17.5747 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 186480] reward=-120675564.9 actor_loss=0.2922 critic_loss=85871307434.6667 entropy=17.5744 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 186480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507456.6 mean_steps=14.1
|
|
[Episode 186490] reward=-111147152.4 actor_loss=0.3721 critic_loss=68647703302.9189 entropy=17.5732 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 186500] reward=-118761572.4 actor_loss=0.2737 critic_loss=77228248086.7556 entropy=17.5893 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 186500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-649379.0 mean_steps=12.8
|
|
[Episode 186510] reward=-127030888.0 actor_loss=0.2568 critic_loss=296294186097.7778 entropy=17.5835 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 186520] reward=-117907118.2 actor_loss=0.1921 critic_loss=77322310451.2000 entropy=17.5919 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 186520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502325.1 mean_steps=13.4
|
|
[Episode 186530] reward=-117508484.4 actor_loss=0.2416 critic_loss=78758157653.3333 entropy=17.6007 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 186540] reward=-116847332.7 actor_loss=0.2112 critic_loss=85681817554.4889 entropy=17.6073 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 186540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-623569.5 mean_steps=13.1
|
|
[Episode 186550] reward=-115603214.3 actor_loss=0.3244 critic_loss=79725054452.6222 entropy=17.6320 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 186560] reward=-121246602.9 actor_loss=0.3190 critic_loss=79499423470.9333 entropy=17.6151 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 186560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469855.8 mean_steps=14.9
|
|
[Episode 186570] reward=-112618453.7 actor_loss=0.3888 critic_loss=84127322567.1111 entropy=17.6297 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 186580] reward=-118298382.0 actor_loss=0.3130 critic_loss=85079407379.6923 entropy=17.6521 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 186580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532257.7 mean_steps=14.2
|
|
[Episode 186590] reward=-117256996.3 actor_loss=0.2847 critic_loss=81779507200.0000 entropy=17.6507 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 186600] reward=-122204110.5 actor_loss=0.3511 critic_loss=126216767849.4118 entropy=17.6518 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 186600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500108.7 mean_steps=13.1
|
|
[Episode 186610] reward=-2225714192.5 actor_loss=1.5313 critic_loss=10328273570846038.0000 entropy=17.6691 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 186620] reward=-29564594219.7 actor_loss=2.3850 critic_loss=1361364542833825536.0000 entropy=17.6824 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Eval 186620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-2722675186.3 mean_steps=26.6
|
|
[Episode 186630] reward=-37362916244.0 actor_loss=0.8800 critic_loss=2130791351138939392.0000 entropy=17.6870 approx_kl=0.0009 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 186640] reward=-64724764653.0 actor_loss=1.0867 critic_loss=3523261193982108672.0000 entropy=17.6904 approx_kl=0.0098 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 186640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595643.3 mean_steps=12.8
|
|
[Episode 186650] reward=-111722627.4 actor_loss=0.3560 critic_loss=75099608642.7826 entropy=17.7385 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 186660] reward=-116871128.5 actor_loss=0.3227 critic_loss=84401529924.2667 entropy=17.7263 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 186660] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-645601.4 mean_steps=11.3
|
|
[Episode 186670] reward=-113708770.6 actor_loss=0.2691 critic_loss=80814521639.8222 entropy=17.7227 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 186680] reward=-119344795.7 actor_loss=0.3526 critic_loss=108777146322.4889 entropy=17.7483 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 186680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566558.0 mean_steps=13.6
|
|
[Episode 186690] reward=-228723218.8 actor_loss=0.2766 critic_loss=40677024639658.6641 entropy=17.7602 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 186700] reward=-137109462.1 actor_loss=0.3503 critic_loss=892012658688.0000 entropy=17.7661 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 186700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-312056.8 mean_steps=17.6
|
|
[Episode 186710] reward=-133386602.2 actor_loss=0.2684 critic_loss=1015383711926.0444 entropy=17.7716 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 186720] reward=-118851300.3 actor_loss=0.2832 critic_loss=85963452871.1111 entropy=17.7656 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 186720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523118.7 mean_steps=14.1
|
|
[Episode 186730] reward=-118485957.4 actor_loss=0.2284 critic_loss=76380252296.5333 entropy=17.7487 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 186740] reward=-114989697.4 actor_loss=0.3736 critic_loss=81144913604.9231 entropy=17.7465 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 186740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587002.5 mean_steps=14.0
|
|
[Episode 186750] reward=-117044764.7 actor_loss=0.2932 critic_loss=92654187935.1351 entropy=17.7366 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 186760] reward=-114609415.1 actor_loss=0.3895 critic_loss=80074428142.9333 entropy=17.7416 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 186760] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-795346.2 mean_steps=10.2
|
|
[Episode 186770] reward=-120997980.4 actor_loss=0.2485 critic_loss=86389401736.5333 entropy=17.7451 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 186780] reward=-117658109.6 actor_loss=0.4279 critic_loss=86185314105.8065 entropy=17.7366 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 186780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475820.0 mean_steps=14.1
|
|
[Episode 186790] reward=-119447876.4 actor_loss=0.3157 critic_loss=84763775720.7273 entropy=17.7615 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 186800] reward=-121941703.4 actor_loss=0.2662 critic_loss=95249312828.2353 entropy=17.7717 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 186800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351932.2 mean_steps=15.8
|
|
[Episode 186810] reward=-117765345.0 actor_loss=0.3065 critic_loss=85591442285.7143 entropy=17.7668 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 186820] reward=-116657022.4 actor_loss=0.2727 critic_loss=82428597134.2222 entropy=17.7687 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 186820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435375.4 mean_steps=15.3
|
|
[Episode 186830] reward=-115223550.4 actor_loss=0.3311 critic_loss=78507562949.4857 entropy=17.7740 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 186840] reward=-4810188722.9 actor_loss=1.6371 critic_loss=47373871717856776.0000 entropy=17.7716 approx_kl=-0.0015 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 186840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-676060.4 mean_steps=12.3
|
|
[Episode 186850] reward=-119478955.6 actor_loss=0.2152 critic_loss=84507640366.5455 entropy=17.7646 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 186860] reward=-120398586.9 actor_loss=0.3493 critic_loss=87604559189.3333 entropy=17.7643 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 186860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485171.8 mean_steps=14.6
|
|
[Episode 186870] reward=-120917363.4 actor_loss=0.3143 critic_loss=88199887310.4516 entropy=17.7612 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 186880] reward=-116267222.7 actor_loss=0.2614 critic_loss=83787971203.6572 entropy=17.7570 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 186880] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-353560.3 mean_steps=17.9
|
|
[Episode 186890] reward=-111934014.6 actor_loss=0.2274 critic_loss=72863935829.3333 entropy=17.7314 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 186900] reward=-117545044.6 actor_loss=0.2328 critic_loss=80442896501.0286 entropy=17.7419 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 186900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385023.3 mean_steps=15.8
|
|
[Episode 186910] reward=-117152125.3 actor_loss=0.3154 critic_loss=84097373277.0909 entropy=17.7228 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 186920] reward=-115039995.8 actor_loss=0.2349 critic_loss=76004085760.0000 entropy=17.7186 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 186920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497419.6 mean_steps=15.0
|
|
[Episode 186930] reward=-120311946.0 actor_loss=0.2779 critic_loss=79257525725.8667 entropy=17.6970 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 186940] reward=-121049047.2 actor_loss=0.2776 critic_loss=92201536898.8445 entropy=17.6968 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 186940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475950.5 mean_steps=15.2
|
|
[Episode 186950] reward=-122659014.4 actor_loss=0.2501 critic_loss=84096850944.0000 entropy=17.6940 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 186960] reward=-117077470.6 actor_loss=0.2366 critic_loss=78097230981.5652 entropy=17.6736 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 186960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587495.8 mean_steps=13.8
|
|
[Episode 186970] reward=-118246092.6 actor_loss=0.3173 critic_loss=78290931438.9333 entropy=17.6803 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 186980] reward=-118377402.0 actor_loss=0.2878 critic_loss=90702282752.0000 entropy=17.6940 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 186980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-668237.1 mean_steps=11.2
|
|
[Episode 186990] reward=-121404190.0 actor_loss=0.3559 critic_loss=81747503905.3913 entropy=17.7049 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 187000] reward=-113454093.2 actor_loss=0.2244 critic_loss=73593196982.8571 entropy=17.6985 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 187000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437363.1 mean_steps=15.2
|
|
[Episode 187010] reward=-118736989.5 actor_loss=0.2268 critic_loss=82337946191.6444 entropy=17.7007 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 187020] reward=-113247908.2 actor_loss=0.2538 critic_loss=79418100589.7143 entropy=17.6945 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 187020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559282.5 mean_steps=13.2
|
|
[Episode 187030] reward=-113462643.7 actor_loss=0.2069 critic_loss=75965980672.0000 entropy=17.6897 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 187040] reward=-119850881.1 actor_loss=0.2238 critic_loss=85016123454.0606 entropy=17.6894 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 187040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487875.4 mean_steps=14.9
|
|
[Episode 187050] reward=-124033652.6 actor_loss=0.2176 critic_loss=152085169883.4286 entropy=17.6985 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 187060] reward=-121247287.8 actor_loss=0.3292 critic_loss=81192718062.9333 entropy=17.7058 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 187060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437104.7 mean_steps=15.5
|
|
[Episode 187070] reward=-121490400.3 actor_loss=0.2379 critic_loss=80907993088.0000 entropy=17.7075 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 187080] reward=-115860376.8 actor_loss=0.2770 critic_loss=85240019712.0000 entropy=17.7028 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 187080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-514479.6 mean_steps=15.4
|
|
[Episode 187090] reward=-121106202.9 actor_loss=0.3701 critic_loss=87078782384.3556 entropy=17.7080 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 187100] reward=-118385873.1 actor_loss=0.2641 critic_loss=77978638848.0000 entropy=17.7279 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 187100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-628483.9 mean_steps=12.1
|
|
[Episode 187110] reward=-121987384.3 actor_loss=0.4657 critic_loss=366346398378.6667 entropy=17.7228 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 187120] reward=-121656479.8 actor_loss=0.3053 critic_loss=83455945750.7556 entropy=17.7179 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 187120] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348343.4 mean_steps=16.9
|
|
[Episode 187130] reward=-116939748.2 actor_loss=0.3130 critic_loss=79632678729.9556 entropy=17.7083 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 187140] reward=-121232401.8 actor_loss=0.3952 critic_loss=85063321190.4000 entropy=17.6959 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 187140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470583.7 mean_steps=13.4
|
|
[Episode 187150] reward=-119946603.1 actor_loss=0.2838 critic_loss=81288640375.4667 entropy=17.6923 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 187160] reward=-123011515.8 actor_loss=0.3076 critic_loss=142558881200.3556 entropy=17.6769 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 187160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496971.7 mean_steps=15.1
|
|
[Episode 187170] reward=-119044189.6 actor_loss=0.2815 critic_loss=81144861127.1111 entropy=17.6619 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 187180] reward=-115318592.7 actor_loss=0.2662 critic_loss=80328745870.2222 entropy=17.6684 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 187180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-458144.6 mean_steps=13.3
|
|
[Episode 187190] reward=-117369842.7 actor_loss=0.3554 critic_loss=80253432718.2222 entropy=17.6743 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 187200] reward=-115042748.8 actor_loss=0.3139 critic_loss=76642130292.3636 entropy=17.6730 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 187200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-685459.0 mean_steps=11.6
|
|
[Episode 187210] reward=-120317665.2 actor_loss=0.2638 critic_loss=80716211175.0244 entropy=17.6933 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 187220] reward=-111675508.6 actor_loss=0.3098 critic_loss=80017172980.6222 entropy=17.7037 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 187220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-495029.7 mean_steps=13.2
|
|
[Episode 187230] reward=-120433600.4 actor_loss=0.2573 critic_loss=80561762304.0000 entropy=17.6987 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 187240] reward=-120353664.0 actor_loss=0.2686 critic_loss=80956018793.0256 entropy=17.6899 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 187240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-472147.5 mean_steps=13.1
|
|
[Episode 187250] reward=-118093653.7 actor_loss=0.4027 critic_loss=84048519987.2000 entropy=17.6961 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 187260] reward=-119540208.9 actor_loss=0.3375 critic_loss=80575084544.0000 entropy=17.6945 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 187260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525529.6 mean_steps=13.1
|
|
[Episode 187270] reward=-112750580.4 actor_loss=0.4008 critic_loss=79048084593.7778 entropy=17.6975 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 187280] reward=-117459526.6 actor_loss=0.2650 critic_loss=79750175464.7273 entropy=17.6981 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 187280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417877.8 mean_steps=16.2
|
|
[Episode 187290] reward=-116712031.0 actor_loss=0.2511 critic_loss=77194102859.8519 entropy=17.6860 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 187300] reward=-119946500.2 actor_loss=0.3065 critic_loss=83121173740.3077 entropy=17.6741 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 187300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-519075.5 mean_steps=15.8
|
|
[Episode 187310] reward=-121435726.2 actor_loss=0.3307 critic_loss=87461847768.1778 entropy=17.6757 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 187320] reward=-120639618.8 actor_loss=0.3319 critic_loss=100719707932.4444 entropy=17.6776 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 187320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482213.3 mean_steps=14.7
|
|
[Episode 187330] reward=-122975539.5 actor_loss=0.2706 critic_loss=325591919086.3448 entropy=17.6858 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 187340] reward=-114771386.5 actor_loss=0.3530 critic_loss=84318990982.7368 entropy=17.6958 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 187340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419267.7 mean_steps=15.5
|
|
[Episode 187350] reward=-115596249.0 actor_loss=0.2667 critic_loss=76277122295.1724 entropy=17.6933 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 187360] reward=-116213022.1 actor_loss=0.3865 critic_loss=76866279131.4286 entropy=17.6927 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 187360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399267.8 mean_steps=16.1
|
|
[Episode 187370] reward=-122204044.1 actor_loss=0.2979 critic_loss=89741898932.7059 entropy=17.6796 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 187380] reward=-114951518.8 actor_loss=0.3496 critic_loss=77291232369.7778 entropy=17.6785 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 187380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-344127.3 mean_steps=16.3
|
|
[Episode 187390] reward=-120002403.8 actor_loss=0.3032 critic_loss=80808917765.6889 entropy=17.6707 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 187400] reward=-119156722.8 actor_loss=0.2797 critic_loss=82776076196.9778 entropy=17.6746 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 187400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428027.7 mean_steps=15.6
|
|
[Episode 187410] reward=-112454986.5 actor_loss=0.2614 critic_loss=73936947328.0000 entropy=17.6658 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 187420] reward=-119721569.1 actor_loss=0.3218 critic_loss=84967938625.6410 entropy=17.6540 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 187420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487057.0 mean_steps=14.2
|
|
[Episode 187430] reward=-116721764.4 actor_loss=0.3617 critic_loss=76794422325.8947 entropy=17.6404 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 187440] reward=-115994170.3 actor_loss=0.2445 critic_loss=91643438080.0000 entropy=17.6506 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 187440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569238.1 mean_steps=13.2
|
|
[Episode 187450] reward=-116565554.0 actor_loss=0.1866 critic_loss=77313359052.8000 entropy=17.6585 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 187460] reward=-118474201.9 actor_loss=0.3044 critic_loss=78470675849.8462 entropy=17.6519 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 187460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-498167.5 mean_steps=12.8
|
|
[Episode 187470] reward=-111768062.0 actor_loss=0.4782 critic_loss=75828065858.7826 entropy=17.6264 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 187480] reward=-120294677.3 actor_loss=0.2589 critic_loss=80379145966.9333 entropy=17.6229 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 187480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391111.9 mean_steps=16.1
|
|
[Episode 187490] reward=-123831204.6 actor_loss=0.2358 critic_loss=83898501227.7895 entropy=17.6320 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 187500] reward=-119449055.8 actor_loss=0.3038 critic_loss=83609452544.0000 entropy=17.6160 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 187500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-641519.8 mean_steps=13.3
|
|
[Episode 187510] reward=-118623127.5 actor_loss=0.3140 critic_loss=80913074688.0000 entropy=17.6204 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 187520] reward=-121051392.3 actor_loss=0.2936 critic_loss=83783237632.0000 entropy=17.6257 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 187520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454760.2 mean_steps=15.2
|
|
[Episode 187530] reward=-116945345.7 actor_loss=0.2667 critic_loss=75898477226.6667 entropy=17.6213 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 187540] reward=-121180955.0 actor_loss=0.2334 critic_loss=86595279912.9600 entropy=17.6253 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 187540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-617474.4 mean_steps=13.6
|
|
[Episode 187550] reward=-117881315.2 actor_loss=0.2522 critic_loss=80291770764.3871 entropy=17.6180 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 187560] reward=-117875036.2 actor_loss=0.2999 critic_loss=78983112890.1818 entropy=17.6240 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 187560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527712.5 mean_steps=12.8
|
|
[Episode 187570] reward=-120968503.2 actor_loss=0.1833 critic_loss=78071324007.7838 entropy=17.6341 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 187580] reward=-125601317.0 actor_loss=0.1621 critic_loss=85337549770.1053 entropy=17.6286 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 187580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541938.1 mean_steps=14.0
|
|
[Episode 187590] reward=-112847084.0 actor_loss=0.3340 critic_loss=75651230440.7273 entropy=17.6396 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 187600] reward=-121756275.0 actor_loss=0.3740 critic_loss=83086172842.6667 entropy=17.6372 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 187600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-599152.9 mean_steps=11.7
|
|
[Episode 187610] reward=-116452061.5 actor_loss=0.3202 critic_loss=75012670364.9032 entropy=17.6347 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 187620] reward=-145658574.4 actor_loss=0.2726 critic_loss=3088199108697.0435 entropy=17.6258 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 187620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-580973.8 mean_steps=14.3
|
|
[Episode 187630] reward=-120816476.2 actor_loss=0.2634 critic_loss=201315904307.2000 entropy=17.6312 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 187640] reward=-118367470.3 actor_loss=0.3137 critic_loss=83168648045.7143 entropy=17.6253 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 187640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-407545.7 mean_steps=15.2
|
|
[Episode 187650] reward=-253977553.1 actor_loss=0.2989 critic_loss=59545467486208.0000 entropy=17.6383 approx_kl=0.0045 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 187660] reward=-111986648.4 actor_loss=0.2639 critic_loss=84845296613.7436 entropy=17.6304 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 187660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581507.8 mean_steps=12.8
|
|
[Episode 187670] reward=-121294242.2 actor_loss=0.3517 critic_loss=377569597121.4222 entropy=17.6336 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 187680] reward=-115083849.2 actor_loss=0.3182 critic_loss=73300958735.5152 entropy=17.6294 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 187680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523216.5 mean_steps=14.1
|
|
[Episode 187690] reward=-119956356.3 actor_loss=0.3265 critic_loss=112420072106.6667 entropy=17.6444 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 187700] reward=-123485115.4 actor_loss=0.2477 critic_loss=101509196003.5556 entropy=17.6489 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 187700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587958.6 mean_steps=13.7
|
|
[Episode 187710] reward=-117945148.9 actor_loss=0.3652 critic_loss=77964684492.8000 entropy=17.6492 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 187720] reward=-121187715.6 actor_loss=0.3060 critic_loss=83020558336.0000 entropy=17.6459 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 187720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-450754.0 mean_steps=16.2
|
|
[Episode 187730] reward=-118103585.8 actor_loss=0.3074 critic_loss=77417484561.0667 entropy=17.6437 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 187740] reward=-117607330.5 actor_loss=0.3010 critic_loss=79991369728.0000 entropy=17.6382 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 187740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581767.0 mean_steps=13.6
|
|
[Episode 187750] reward=-118363565.8 actor_loss=0.3189 critic_loss=80110212647.3846 entropy=17.6354 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 187760] reward=-121220884.6 actor_loss=0.3717 critic_loss=86148120576.0000 entropy=17.6438 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 187760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-639432.6 mean_steps=13.1
|
|
[Episode 187770] reward=-122774788.5 actor_loss=0.2149 critic_loss=81075905772.3077 entropy=17.6552 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 187780] reward=-116139931.1 actor_loss=0.4081 critic_loss=77487273797.8182 entropy=17.6598 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 187780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581129.7 mean_steps=13.8
|
|
[Episode 187790] reward=-115051001.0 actor_loss=0.3313 critic_loss=76849050846.6087 entropy=17.6670 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 187800] reward=-120719542.0 actor_loss=0.3277 critic_loss=82261508336.9412 entropy=17.6614 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 187800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409283.5 mean_steps=15.3
|
|
[Episode 187810] reward=-118365901.1 actor_loss=0.2807 critic_loss=77934679702.5882 entropy=17.6617 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 187820] reward=-122526035.7 actor_loss=0.2136 critic_loss=82793045360.6400 entropy=17.6658 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 187820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465775.8 mean_steps=15.8
|
|
[Episode 187830] reward=-119314423.4 actor_loss=0.3011 critic_loss=84646559744.0000 entropy=17.6576 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 187840] reward=-118780518.7 actor_loss=0.2882 critic_loss=77695288934.4000 entropy=17.6595 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 187840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-385607.0 mean_steps=14.8
|
|
[Episode 187850] reward=-116111565.4 actor_loss=0.3049 critic_loss=79073569060.5714 entropy=17.6395 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 187860] reward=-118367977.5 actor_loss=0.3519 critic_loss=74945790862.2222 entropy=17.6386 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 187860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511100.6 mean_steps=14.9
|
|
[Episode 187870] reward=-115941120.1 actor_loss=0.2785 critic_loss=77117436119.5789 entropy=17.6238 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 187880] reward=-115463276.9 actor_loss=0.2199 critic_loss=71369273058.2326 entropy=17.6435 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 187880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589526.1 mean_steps=12.4
|
|
[Episode 187890] reward=-119032720.6 actor_loss=0.2707 critic_loss=77016859230.8148 entropy=17.6500 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 187900] reward=-115242483.2 actor_loss=0.4237 critic_loss=80380220757.3333 entropy=17.6459 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 187900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378707.2 mean_steps=15.7
|
|
[Episode 187910] reward=-115181462.6 actor_loss=0.3155 critic_loss=74565773409.5238 entropy=17.6374 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 187920] reward=-118533648.5 actor_loss=0.2876 critic_loss=81234038784.0000 entropy=17.6384 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 187920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451882.4 mean_steps=15.3
|
|
[Episode 187930] reward=-122688490.4 actor_loss=0.3013 critic_loss=82756029826.8445 entropy=17.6310 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 187940] reward=-120865168.9 actor_loss=0.3926 critic_loss=85389844480.0000 entropy=17.6159 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 187940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549888.0 mean_steps=14.3
|
|
[Episode 187950] reward=-119829006.3 actor_loss=0.3476 critic_loss=78979207168.0000 entropy=17.6068 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 187960] reward=-121245369.9 actor_loss=0.2212 critic_loss=83494855884.8000 entropy=17.6088 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 187960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552191.9 mean_steps=13.4
|
|
[Episode 187970] reward=-124076531.8 actor_loss=0.2864 critic_loss=84607871410.4242 entropy=17.6039 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 187980] reward=-113816690.9 actor_loss=0.3386 critic_loss=74480645461.3333 entropy=17.6070 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 187980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590405.7 mean_steps=13.6
|
|
[Episode 187990] reward=-115799969.0 actor_loss=0.2939 critic_loss=77619984294.9565 entropy=17.6065 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 188000] reward=-118539791.6 actor_loss=0.3658 critic_loss=76166248857.6000 entropy=17.5966 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 188000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438338.3 mean_steps=15.1
|
|
[Episode 188010] reward=-116437227.7 actor_loss=0.1987 critic_loss=75977478656.0000 entropy=17.5865 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 188020] reward=-118585855.7 actor_loss=0.2356 critic_loss=83219926220.8000 entropy=17.5746 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 188020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590549.6 mean_steps=12.7
|
|
[Episode 188030] reward=-117148297.0 actor_loss=0.3328 critic_loss=77135929202.7586 entropy=17.5782 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 188040] reward=-116312362.9 actor_loss=0.1870 critic_loss=75252447339.7895 entropy=17.5887 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 188040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-512832.8 mean_steps=13.4
|
|
[Episode 188050] reward=-117890435.7 actor_loss=0.2191 critic_loss=73695634227.2000 entropy=17.5906 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 188060] reward=-124893731.9 actor_loss=0.3180 critic_loss=83805081600.0000 entropy=17.5831 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 188060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454138.3 mean_steps=14.6
|
|
[Episode 188070] reward=-121054309.3 actor_loss=0.3469 critic_loss=82268094464.0000 entropy=17.5804 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 188080] reward=-118413025.4 actor_loss=0.2303 critic_loss=75696717824.0000 entropy=17.5743 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 188080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-632683.5 mean_steps=12.6
|
|
[Episode 188090] reward=-121821929.2 actor_loss=0.2699 critic_loss=83804593834.6667 entropy=17.5822 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 188100] reward=-120018010.3 actor_loss=0.2728 critic_loss=76794296469.8537 entropy=17.5943 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 188100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442686.8 mean_steps=14.5
|
|
[Episode 188110] reward=-118118666.0 actor_loss=0.3357 critic_loss=78218258568.5333 entropy=17.5907 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 188120] reward=-120014707.2 actor_loss=0.2573 critic_loss=77621531047.7241 entropy=17.6018 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 188120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-524404.5 mean_steps=14.8
|
|
[Episode 188130] reward=-117825251.6 actor_loss=0.3537 critic_loss=79971695235.6572 entropy=17.6045 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 188140] reward=-117827473.6 actor_loss=0.2716 critic_loss=75332402809.9048 entropy=17.6125 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 188140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573637.4 mean_steps=12.7
|
|
[Episode 188150] reward=-115352597.4 actor_loss=0.3716 critic_loss=80001936483.0968 entropy=17.6085 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 188160] reward=-123950897.4 actor_loss=0.2604 critic_loss=87147457331.2000 entropy=17.6118 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 188160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462091.5 mean_steps=15.1
|
|
[Episode 188170] reward=-114604737.0 actor_loss=0.5236 critic_loss=80120897536.0000 entropy=17.6217 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1530 front_blocked=0
|
|
[Episode 188180] reward=-116313994.2 actor_loss=0.2944 critic_loss=83685859892.9655 entropy=17.6223 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 188180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461125.1 mean_steps=14.4
|
|
[Episode 188190] reward=-118553947.5 actor_loss=0.2867 critic_loss=85138786048.0000 entropy=17.6340 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 188200] reward=-123839099.8 actor_loss=0.2752 critic_loss=86498298197.3333 entropy=17.6268 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 188200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-563684.6 mean_steps=14.3
|
|
[Episode 188210] reward=-123296496.3 actor_loss=0.2346 critic_loss=79302455022.9333 entropy=17.6315 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 188220] reward=-114129329.7 actor_loss=0.3057 critic_loss=75035926710.0444 entropy=17.6223 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 188220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-521284.0 mean_steps=12.2
|
|
[Episode 188230] reward=-121651342.4 actor_loss=0.2451 critic_loss=80717509868.3077 entropy=17.6250 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 188240] reward=-123415599.7 actor_loss=0.2421 critic_loss=87212631836.4444 entropy=17.6455 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 188240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587400.4 mean_steps=13.3
|
|
[Episode 188250] reward=-119465047.2 actor_loss=0.2107 critic_loss=77691249914.3111 entropy=17.6567 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 188260] reward=-116879806.5 actor_loss=0.3527 critic_loss=75869170541.7143 entropy=17.6554 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 188260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568167.2 mean_steps=13.5
|
|
[Episode 188270] reward=-122669799.6 actor_loss=0.2597 critic_loss=86217117440.0000 entropy=17.6532 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 188280] reward=-119338246.4 actor_loss=0.3568 critic_loss=81462728021.3333 entropy=17.6498 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 188280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-477589.9 mean_steps=12.8
|
|
[Episode 188290] reward=-106249540.7 actor_loss=0.3611 critic_loss=70036081231.6444 entropy=17.6564 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 188300] reward=-115789961.1 actor_loss=0.4058 critic_loss=79723444451.5556 entropy=17.6579 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 188300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447478.7 mean_steps=14.5
|
|
[Episode 188310] reward=-118292060.2 actor_loss=0.3303 critic_loss=75931475057.7778 entropy=17.6504 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 188320] reward=-116854723.4 actor_loss=0.2651 critic_loss=75525533286.4000 entropy=17.6561 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 188320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521263.4 mean_steps=13.8
|
|
[Episode 188330] reward=-119631188.9 actor_loss=0.1768 critic_loss=76818174537.1429 entropy=17.6418 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 188340] reward=-120775360.0 actor_loss=0.4292 critic_loss=78958366811.0222 entropy=17.6349 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1523 front_blocked=0
|
|
[Eval 188340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465264.7 mean_steps=14.3
|
|
[Episode 188350] reward=-117752799.1 actor_loss=0.2002 critic_loss=78041292800.0000 entropy=17.6375 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 188360] reward=-117352813.7 actor_loss=0.3841 critic_loss=86438183789.7143 entropy=17.6348 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 188360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548699.5 mean_steps=14.2
|
|
[Episode 188370] reward=-117604157.2 actor_loss=0.2346 critic_loss=75727218157.0370 entropy=17.6482 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 188380] reward=-119822573.6 actor_loss=0.2838 critic_loss=84327203544.1778 entropy=17.6230 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 188380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583470.1 mean_steps=12.7
|
|
[Episode 188390] reward=-119710831.3 actor_loss=0.3128 critic_loss=79075268289.4222 entropy=17.6228 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 188400] reward=-114451449.2 actor_loss=0.2794 critic_loss=72760635665.0667 entropy=17.6083 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 188400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572272.2 mean_steps=12.8
|
|
[Episode 188410] reward=-115757271.6 actor_loss=0.2251 critic_loss=75804717800.7273 entropy=17.5946 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 188420] reward=-120998425.1 actor_loss=0.2504 critic_loss=81626185181.8667 entropy=17.5902 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 188420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-475013.9 mean_steps=15.2
|
|
[Episode 188430] reward=-118946078.2 actor_loss=0.4030 critic_loss=79048851456.0000 entropy=17.5920 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 188440] reward=-118044802.4 actor_loss=0.2146 critic_loss=78934559402.6667 entropy=17.6111 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 188440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470204.9 mean_steps=14.7
|
|
[Episode 188450] reward=-118305929.0 actor_loss=0.3368 critic_loss=77483295278.5455 entropy=17.5944 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 188460] reward=-110813200.1 actor_loss=0.4229 critic_loss=71507855405.5111 entropy=17.5971 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 188460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583374.9 mean_steps=13.4
|
|
[Episode 188470] reward=-122776035.2 actor_loss=0.3017 critic_loss=84796316482.3704 entropy=17.6060 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 188480] reward=-117197734.9 actor_loss=0.3940 critic_loss=78869300110.2222 entropy=17.6178 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 188480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549882.3 mean_steps=13.2
|
|
[Episode 188490] reward=-115158770.5 actor_loss=0.3541 critic_loss=82755650810.3111 entropy=17.6140 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 188500] reward=-115720660.3 actor_loss=0.2931 critic_loss=76645136611.5556 entropy=17.6066 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 188500] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320156.7 mean_steps=16.5
|
|
[Episode 188510] reward=-118707057.6 actor_loss=0.2740 critic_loss=76735747044.3243 entropy=17.5966 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 188520] reward=-120337184.7 actor_loss=0.2880 critic_loss=82697416612.9778 entropy=17.5865 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 188520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-358426.3 mean_steps=15.7
|
|
[Episode 188530] reward=-113902861.2 actor_loss=0.2731 critic_loss=77244279102.5778 entropy=17.5904 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 188540] reward=-112436148.8 actor_loss=0.2940 critic_loss=73441950600.9302 entropy=17.5799 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 188540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537893.0 mean_steps=14.2
|
|
[Episode 188550] reward=-120280839.5 actor_loss=0.2474 critic_loss=76417552748.0889 entropy=17.5711 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 188560] reward=-113368004.1 actor_loss=0.4172 critic_loss=69260891932.4444 entropy=17.5771 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 188560] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-644642.0 mean_steps=11.2
|
|
[Episode 188570] reward=-115961224.8 actor_loss=0.3638 critic_loss=76937189239.4667 entropy=17.5812 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 188580] reward=-120716370.5 actor_loss=0.3756 critic_loss=78716751234.8445 entropy=17.5592 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 188580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570576.6 mean_steps=13.9
|
|
[Episode 188590] reward=-119672235.6 actor_loss=0.3049 critic_loss=79264615264.7111 entropy=17.5430 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 188600] reward=-118418666.1 actor_loss=0.3077 critic_loss=75615313737.9556 entropy=17.5391 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 188600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544973.8 mean_steps=13.4
|
|
[Episode 188610] reward=-118619257.3 actor_loss=0.3717 critic_loss=78054966886.4000 entropy=17.5367 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 188620] reward=-123448128.7 actor_loss=0.2898 critic_loss=79852407694.2222 entropy=17.5372 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 188620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589745.3 mean_steps=12.6
|
|
[Episode 188630] reward=-121322587.3 actor_loss=0.2275 critic_loss=78354786804.6222 entropy=17.5395 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 188640] reward=-119284639.5 actor_loss=0.3131 critic_loss=79796353079.3513 entropy=17.5341 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 188640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549111.5 mean_steps=13.3
|
|
[Episode 188650] reward=-117625913.0 actor_loss=0.3134 critic_loss=74720667940.5714 entropy=17.5243 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 188660] reward=-114878957.5 actor_loss=0.3598 critic_loss=72375601106.4889 entropy=17.5285 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 188660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580696.2 mean_steps=13.7
|
|
[Episode 188670] reward=-120159629.2 actor_loss=0.2512 critic_loss=79951126528.0000 entropy=17.5237 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 188680] reward=-117600380.9 actor_loss=0.3488 critic_loss=77642293065.9556 entropy=17.5224 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 188680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486347.4 mean_steps=14.6
|
|
[Episode 188690] reward=-122312999.7 actor_loss=0.2417 critic_loss=79469890491.7333 entropy=17.5193 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 188700] reward=-115839218.7 actor_loss=0.3491 critic_loss=75988185978.4348 entropy=17.5267 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 188700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-474263.7 mean_steps=15.3
|
|
[Episode 188710] reward=-116116149.3 actor_loss=0.2760 critic_loss=75351042978.9091 entropy=17.5093 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 188720] reward=-116643261.8 actor_loss=0.2449 critic_loss=74465772758.3256 entropy=17.5180 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 188720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459635.2 mean_steps=15.2
|
|
[Episode 188730] reward=-114107033.7 actor_loss=0.2476 critic_loss=76144439660.0889 entropy=17.5278 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 188740] reward=-120163270.5 actor_loss=0.2976 critic_loss=77401559677.1555 entropy=17.5250 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 188740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586775.9 mean_steps=12.7
|
|
[Episode 188750] reward=-113940613.2 actor_loss=0.4185 critic_loss=74360382259.2000 entropy=17.5213 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 188760] reward=-112743711.0 actor_loss=0.3146 critic_loss=72137115997.6585 entropy=17.5210 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 188760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471590.4 mean_steps=14.8
|
|
[Episode 188770] reward=-118339391.6 actor_loss=0.2724 critic_loss=78502337623.7714 entropy=17.5359 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 188780] reward=-120654300.9 actor_loss=0.2709 critic_loss=82081692435.6923 entropy=17.5447 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 188780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433585.9 mean_steps=15.2
|
|
[Episode 188790] reward=-118518961.8 actor_loss=0.3372 critic_loss=77028481280.0000 entropy=17.5375 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 188800] reward=-119315253.6 actor_loss=0.2837 critic_loss=168440803819.5200 entropy=17.5303 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 188800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630360.6 mean_steps=13.0
|
|
[Episode 188810] reward=-116204256.4 actor_loss=0.2323 critic_loss=74673774592.0000 entropy=17.5330 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 188820] reward=-117705374.3 actor_loss=0.3603 critic_loss=74449049026.5600 entropy=17.5225 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 188820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359418.1 mean_steps=16.9
|
|
[Episode 188830] reward=-112162923.3 actor_loss=0.2669 critic_loss=72052685482.6667 entropy=17.5264 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 188840] reward=-115993221.1 actor_loss=0.2112 critic_loss=73069765089.8824 entropy=17.5140 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 188840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574786.1 mean_steps=13.4
|
|
[Episode 188850] reward=-117644834.3 actor_loss=0.3199 critic_loss=73739883520.0000 entropy=17.4894 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 188860] reward=-116289401.9 actor_loss=0.2975 critic_loss=75073691648.0000 entropy=17.4881 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 188860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556967.1 mean_steps=14.2
|
|
[Episode 188870] reward=-125244877.6 actor_loss=0.2132 critic_loss=87119607700.2105 entropy=17.4851 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 188880] reward=-119276609.7 actor_loss=0.2129 critic_loss=78564137227.1304 entropy=17.4902 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 188880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414537.2 mean_steps=15.1
|
|
[Episode 188890] reward=-118056191.8 actor_loss=0.2432 critic_loss=75026725858.7429 entropy=17.4839 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 188900] reward=-120155149.6 actor_loss=0.3168 critic_loss=78149089172.2105 entropy=17.4747 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 188900] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-278906.5 mean_steps=16.8
|
|
[Episode 188910] reward=-122904595.6 actor_loss=0.2662 critic_loss=80967976598.5882 entropy=17.4689 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 188920] reward=-115191901.8 actor_loss=0.2807 critic_loss=76109250195.9111 entropy=17.4658 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 188920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557448.2 mean_steps=13.3
|
|
[Episode 188930] reward=-118071524.1 actor_loss=0.1746 critic_loss=77970665745.0667 entropy=17.4608 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 188940] reward=-119123219.6 actor_loss=0.2999 critic_loss=78005645312.0000 entropy=17.4709 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 188940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550850.5 mean_steps=14.1
|
|
[Episode 188950] reward=-118710854.0 actor_loss=0.4151 critic_loss=75300400742.4000 entropy=17.4682 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 188960] reward=-117650192.1 actor_loss=0.2660 critic_loss=74470719277.9487 entropy=17.4731 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 188960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591240.2 mean_steps=12.7
|
|
[Episode 188970] reward=-119280809.7 actor_loss=0.2805 critic_loss=80091269907.6923 entropy=17.4735 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 188980] reward=-116618176.7 actor_loss=0.2256 critic_loss=78071117378.7826 entropy=17.4732 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 188980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-478541.6 mean_steps=15.2
|
|
[Episode 188990] reward=-117767249.8 actor_loss=0.3303 critic_loss=77613745152.0000 entropy=17.4778 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 189000] reward=-123485706.6 actor_loss=0.2739 critic_loss=82001854181.5172 entropy=17.4726 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 189000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513416.8 mean_steps=14.0
|
|
[Episode 189010] reward=-121146273.3 actor_loss=0.2908 critic_loss=97027346432.0000 entropy=17.4808 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 189020] reward=-120717856.5 actor_loss=0.3291 critic_loss=76525106932.8696 entropy=17.4802 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 189020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-684042.6 mean_steps=14.2
|
|
[Episode 189030] reward=-119729128.8 actor_loss=0.3644 critic_loss=78068813177.2632 entropy=17.4855 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 189040] reward=-117652567.9 actor_loss=0.3227 critic_loss=102883755845.8182 entropy=17.4856 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 189040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-528915.0 mean_steps=16.1
|
|
[Episode 189050] reward=-118041929.3 actor_loss=0.2647 critic_loss=82474521067.5200 entropy=17.4843 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 189060] reward=-121755790.5 actor_loss=0.2323 critic_loss=77396582673.0667 entropy=17.4838 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 189060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394235.5 mean_steps=14.8
|
|
[Episode 189070] reward=-121866727.6 actor_loss=0.2495 critic_loss=87085748224.0000 entropy=17.4841 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 189080] reward=-119249976.9 actor_loss=0.3000 critic_loss=86105450142.8965 entropy=17.4834 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 189080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571119.1 mean_steps=13.6
|
|
[Episode 189090] reward=-114487626.1 actor_loss=0.4277 critic_loss=74612790784.0000 entropy=17.4935 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 189100] reward=-124941406.5 actor_loss=0.2887 critic_loss=133830162090.6667 entropy=17.5009 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 189100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400535.1 mean_steps=15.8
|
|
[Episode 189110] reward=-124538546.3 actor_loss=0.2432 critic_loss=85735464304.6400 entropy=17.5157 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 189120] reward=-120041895.6 actor_loss=0.2507 critic_loss=78264341117.1555 entropy=17.5276 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 189120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-361131.9 mean_steps=14.7
|
|
[Episode 189130] reward=-119854997.1 actor_loss=0.2443 critic_loss=78728462053.5172 entropy=17.5190 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 189140] reward=-119263094.0 actor_loss=0.2865 critic_loss=88564750534.1935 entropy=17.5101 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 189140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503808.1 mean_steps=13.2
|
|
[Episode 189150] reward=-120219256.0 actor_loss=0.2050 critic_loss=76723215109.6889 entropy=17.5138 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 189160] reward=-114468887.0 actor_loss=0.2340 critic_loss=76530624188.6316 entropy=17.5217 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 189160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420526.6 mean_steps=15.3
|
|
[Episode 189170] reward=-120482056.2 actor_loss=0.2601 critic_loss=79522297270.8571 entropy=17.5221 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 189180] reward=-124156273.3 actor_loss=0.2485 critic_loss=130875407902.1176 entropy=17.5353 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 189180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548572.9 mean_steps=14.1
|
|
[Episode 189190] reward=-110340641.8 actor_loss=0.2665 critic_loss=68909799196.4444 entropy=17.5543 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 189200] reward=-121125641.5 actor_loss=0.2371 critic_loss=80465363375.1579 entropy=17.5414 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 189200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-595107.2 mean_steps=14.7
|
|
[Episode 189210] reward=-125080697.5 actor_loss=0.2278 critic_loss=85516046791.1111 entropy=17.5393 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 189220] reward=-117626042.9 actor_loss=0.1870 critic_loss=77450098005.3333 entropy=17.5403 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 189220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-640436.7 mean_steps=11.9
|
|
[Episode 189230] reward=-115489222.9 actor_loss=0.3697 critic_loss=74878163297.1035 entropy=17.5512 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 189240] reward=-113635912.6 actor_loss=0.2811 critic_loss=77253633365.3333 entropy=17.5313 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 189240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-456186.8 mean_steps=15.4
|
|
[Episode 189250] reward=-119287219.4 actor_loss=0.1737 critic_loss=88795774765.9487 entropy=17.5351 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 189260] reward=-125562012.0 actor_loss=0.2579 critic_loss=133143641019.7333 entropy=17.5404 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 189260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401591.3 mean_steps=15.5
|
|
[Episode 189270] reward=-116458905.0 actor_loss=0.3844 critic_loss=77531595093.3333 entropy=17.5320 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 189280] reward=-117504833.1 actor_loss=0.2709 critic_loss=72671800433.7778 entropy=17.5290 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 189280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453559.7 mean_steps=14.4
|
|
[Episode 189290] reward=-123171736.9 actor_loss=0.2602 critic_loss=82470797858.1333 entropy=17.5439 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 189300] reward=-116118443.1 actor_loss=0.3148 critic_loss=72976671744.0000 entropy=17.5246 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 189300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-445939.8 mean_steps=16.1
|
|
[Episode 189310] reward=-113858651.0 actor_loss=0.3112 critic_loss=73348182016.0000 entropy=17.5338 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 189320] reward=-115733567.9 actor_loss=0.2859 critic_loss=75299478771.8095 entropy=17.5410 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 189320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461719.4 mean_steps=14.6
|
|
[Episode 189330] reward=-121340317.6 actor_loss=0.2590 critic_loss=90641786969.0435 entropy=17.5509 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 189340] reward=-115828609.3 actor_loss=0.2320 critic_loss=77730151277.7143 entropy=17.5531 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 189340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-480546.1 mean_steps=12.8
|
|
[Episode 189350] reward=-116924953.9 actor_loss=0.3031 critic_loss=77908978736.7619 entropy=17.5501 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 189360] reward=-117828310.8 actor_loss=0.2776 critic_loss=74404174754.9091 entropy=17.5603 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 189360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439558.8 mean_steps=14.3
|
|
[Episode 189370] reward=-121983662.4 actor_loss=0.3333 critic_loss=78628744630.8571 entropy=17.5525 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 189380] reward=-124314699.2 actor_loss=0.2138 critic_loss=80094399799.6522 entropy=17.5431 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 189380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556851.3 mean_steps=13.2
|
|
[Episode 189390] reward=-118310663.6 actor_loss=0.2439 critic_loss=76775919616.0000 entropy=17.5445 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 189400] reward=-118308747.7 actor_loss=0.3462 critic_loss=81561719496.3478 entropy=17.5437 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 189400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-633905.7 mean_steps=12.8
|
|
[Episode 189410] reward=-119105364.3 actor_loss=0.3337 critic_loss=73636124330.6667 entropy=17.5307 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 189420] reward=-115514371.3 actor_loss=0.3396 critic_loss=70783576649.1429 entropy=17.5359 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 189420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480510.3 mean_steps=14.4
|
|
[Episode 189430] reward=-122244758.4 actor_loss=0.2468 critic_loss=76866003821.7143 entropy=17.5351 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 189440] reward=-121421496.3 actor_loss=0.3403 critic_loss=78760800679.7241 entropy=17.5396 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 189440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532848.1 mean_steps=13.1
|
|
[Episode 189450] reward=-121135137.7 actor_loss=0.3483 critic_loss=76408807424.0000 entropy=17.5245 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 189460] reward=-121612893.1 actor_loss=0.2620 critic_loss=78357051512.4706 entropy=17.5180 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 189460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507072.1 mean_steps=14.2
|
|
[Episode 189470] reward=-118772360.3 actor_loss=0.3970 critic_loss=81182204372.1143 entropy=17.5117 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 189480] reward=-120628307.6 actor_loss=0.3487 critic_loss=83128276855.4667 entropy=17.5089 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 189480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508069.3 mean_steps=13.8
|
|
[Episode 189490] reward=-117953600.3 actor_loss=0.1838 critic_loss=77432647895.5789 entropy=17.5094 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 189500] reward=-118436288.8 actor_loss=0.2855 critic_loss=79425636890.9474 entropy=17.5130 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 189500] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-416824.8 mean_steps=16.8
|
|
[Episode 189510] reward=-117942264.2 actor_loss=0.3930 critic_loss=76121631012.5714 entropy=17.5186 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 189520] reward=-117207508.7 actor_loss=0.3753 critic_loss=109240495497.8462 entropy=17.5112 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 189520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-422486.9 mean_steps=13.9
|
|
[Episode 189530] reward=-126642008.7 actor_loss=0.3210 critic_loss=82079779108.5714 entropy=17.5138 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 189540] reward=-123275216.2 actor_loss=0.3239 critic_loss=85002881385.4118 entropy=17.5145 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 189540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-489784.4 mean_steps=15.3
|
|
[Episode 189550] reward=-122305270.7 actor_loss=0.2388 critic_loss=78359385639.3846 entropy=17.5176 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 189560] reward=-117310902.1 actor_loss=0.4143 critic_loss=80665601228.8000 entropy=17.5186 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 189560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553942.6 mean_steps=13.0
|
|
[Episode 189570] reward=-118149607.5 actor_loss=0.4336 critic_loss=83711954534.4000 entropy=17.5138 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 189580] reward=-111834202.3 actor_loss=0.3966 critic_loss=78728190049.5238 entropy=17.5181 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 189580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536791.3 mean_steps=12.9
|
|
[Episode 189590] reward=-115956547.7 actor_loss=0.3947 critic_loss=78048873520.7619 entropy=17.5144 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 189600] reward=-115736349.0 actor_loss=0.4022 critic_loss=81842762434.2069 entropy=17.5238 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 189600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480715.6 mean_steps=14.6
|
|
[Episode 189610] reward=-114449455.4 actor_loss=0.4739 critic_loss=80250609336.3200 entropy=17.5186 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 189620] reward=-121075982.5 actor_loss=0.3172 critic_loss=80987868321.6842 entropy=17.5264 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 189620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-520762.3 mean_steps=15.8
|
|
[Episode 189630] reward=-120042385.4 actor_loss=0.2373 critic_loss=88373253188.2667 entropy=17.5238 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 189640] reward=-115202408.3 actor_loss=8.3397 critic_loss=82461402812.6316 entropy=17.5186 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 189640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476708.0 mean_steps=13.7
|
|
[Episode 189650] reward=-123517209.3 actor_loss=0.3741 critic_loss=80803906749.6296 entropy=17.5272 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 189660] reward=-113909464.8 actor_loss=0.3128 critic_loss=76064543712.9697 entropy=17.5194 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 189660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-564877.9 mean_steps=12.6
|
|
[Episode 189670] reward=-118432185.1 actor_loss=0.3220 critic_loss=77761539107.3103 entropy=17.5026 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 189680] reward=-113168926.4 actor_loss=0.4023 critic_loss=84652267257.4359 entropy=17.5021 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 189680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538164.6 mean_steps=13.2
|
|
[Episode 189690] reward=-122415364.4 actor_loss=0.2854 critic_loss=112449800979.6923 entropy=17.5097 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 189700] reward=-116460876.5 actor_loss=0.2872 critic_loss=73813061632.0000 entropy=17.5003 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 189700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-508206.2 mean_steps=14.8
|
|
[Episode 189710] reward=-119801261.0 actor_loss=0.2641 critic_loss=77180212224.0000 entropy=17.4948 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 189720] reward=-120780683.2 actor_loss=0.3048 critic_loss=77445665951.2889 entropy=17.5124 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 189720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529453.2 mean_steps=13.8
|
|
[Episode 189730] reward=-117086883.5 actor_loss=0.3757 critic_loss=80229952950.8571 entropy=17.5116 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 189740] reward=-117353335.3 actor_loss=0.2435 critic_loss=74531796751.0588 entropy=17.5121 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 189740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-595269.9 mean_steps=13.8
|
|
[Episode 189750] reward=-119266088.3 actor_loss=0.2312 critic_loss=76077274890.2400 entropy=17.4969 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 189760] reward=-115880524.1 actor_loss=0.2890 critic_loss=86586502972.9524 entropy=17.4797 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 189760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-417936.1 mean_steps=14.1
|
|
[Episode 189770] reward=-116781707.6 actor_loss=0.3379 critic_loss=75747058974.7200 entropy=17.4600 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 189780] reward=-113705031.8 actor_loss=0.3572 critic_loss=72418059605.3333 entropy=17.4574 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 189780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623668.6 mean_steps=12.2
|
|
[Episode 189790] reward=-115207396.1 actor_loss=0.3123 critic_loss=76524038826.6667 entropy=17.4366 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 189800] reward=-118048216.8 actor_loss=0.3554 critic_loss=78849510346.1053 entropy=17.4190 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 189800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505582.1 mean_steps=15.2
|
|
[Episode 189810] reward=-116488073.1 actor_loss=0.3101 critic_loss=75328660880.6956 entropy=17.4145 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 189820] reward=-114831810.7 actor_loss=0.2283 critic_loss=71287136529.0667 entropy=17.4144 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 189820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566158.7 mean_steps=12.4
|
|
[Episode 189830] reward=-122549429.5 actor_loss=0.2946 critic_loss=78775363902.5778 entropy=17.4161 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 189840] reward=-117949285.6 actor_loss=0.2554 critic_loss=73306219861.3333 entropy=17.4186 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 189840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458275.8 mean_steps=15.4
|
|
[Episode 189850] reward=-112853361.5 actor_loss=0.3244 critic_loss=71816214664.5333 entropy=17.4132 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 189860] reward=-121852314.0 actor_loss=0.2569 critic_loss=80028562130.8235 entropy=17.4113 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 189860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596585.8 mean_steps=13.6
|
|
[Episode 189870] reward=-117459172.7 actor_loss=0.3106 critic_loss=69761190229.3333 entropy=17.4132 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 189880] reward=-117072348.2 actor_loss=0.3124 critic_loss=76599697408.0000 entropy=17.4152 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 189880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504160.9 mean_steps=14.8
|
|
[Episode 189890] reward=-119019371.6 actor_loss=0.3029 critic_loss=82513018880.0000 entropy=17.4132 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 189900] reward=-115659533.7 actor_loss=0.3277 critic_loss=79389675852.1081 entropy=17.4214 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 189900] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-289596.6 mean_steps=17.1
|
|
[Episode 189910] reward=-117402427.6 actor_loss=0.3760 critic_loss=75142557801.0256 entropy=17.4302 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 189920] reward=-118289608.7 actor_loss=0.2474 critic_loss=75685128553.4118 entropy=17.4389 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 189920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425547.4 mean_steps=14.8
|
|
[Episode 189930] reward=-117981911.4 actor_loss=0.2713 critic_loss=73446479979.7895 entropy=17.4384 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 189940] reward=-119435079.4 actor_loss=0.3425 critic_loss=82364389376.0000 entropy=17.4447 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 189940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-448546.9 mean_steps=13.8
|
|
[Episode 189950] reward=-120211770.3 actor_loss=0.2587 critic_loss=72923851252.6222 entropy=17.4356 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 189960] reward=-117948400.7 actor_loss=0.2085 critic_loss=77751432892.6316 entropy=17.4036 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 189960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459366.7 mean_steps=14.3
|
|
[Episode 189970] reward=-116412806.0 actor_loss=0.2751 critic_loss=77697202068.2105 entropy=17.4007 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 189980] reward=-122035798.4 actor_loss=0.2968 critic_loss=79805278746.9474 entropy=17.3867 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 189980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540902.7 mean_steps=12.9
|
|
[Episode 189990] reward=-118617036.3 actor_loss=0.3915 critic_loss=91183266394.3529 entropy=17.3939 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 190000] reward=-116528397.8 actor_loss=0.3931 critic_loss=78213388101.8182 entropy=17.3982 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 190000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-401951.3 mean_steps=16.8
|
|
[Episode 190010] reward=-117311715.3 actor_loss=0.2796 critic_loss=76005091328.0000 entropy=17.4116 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 190020] reward=-120913661.4 actor_loss=0.2770 critic_loss=82199262330.8800 entropy=17.4177 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 190020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-568508.5 mean_steps=12.4
|
|
[Episode 190030] reward=-116461171.3 actor_loss=0.3765 critic_loss=74316443192.8889 entropy=17.4005 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 190040] reward=-117873119.1 actor_loss=0.2377 critic_loss=74369830137.0811 entropy=17.4007 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 190040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-555535.5 mean_steps=13.8
|
|
[Episode 190050] reward=-116467709.0 actor_loss=0.2223 critic_loss=73676054831.4074 entropy=17.3991 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 190060] reward=-116327461.1 actor_loss=0.3882 critic_loss=78356038314.6667 entropy=17.3942 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 190060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480333.4 mean_steps=14.7
|
|
[Episode 190070] reward=-121005071.7 actor_loss=0.3284 critic_loss=82557882368.0000 entropy=17.3982 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 190080] reward=-117675482.0 actor_loss=0.2571 critic_loss=73357669814.8571 entropy=17.4103 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 190080] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-255624.6 mean_steps=18.7
|
|
[Episode 190090] reward=-117176320.5 actor_loss=0.3226 critic_loss=73674783857.7778 entropy=17.4187 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 190100] reward=-119549712.9 actor_loss=0.2055 critic_loss=76023267874.1333 entropy=17.4266 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 190100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439602.5 mean_steps=14.4
|
|
[Episode 190110] reward=-119408137.2 actor_loss=0.3642 critic_loss=74303797970.8235 entropy=17.4164 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 190120] reward=-121889604.2 actor_loss=0.2148 critic_loss=80067975655.6190 entropy=17.4146 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 190120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575396.8 mean_steps=13.5
|
|
[Episode 190130] reward=-124831398.1 actor_loss=0.3372 critic_loss=78449075685.0526 entropy=17.4178 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 190140] reward=-120928565.1 actor_loss=0.2340 critic_loss=79191988873.3659 entropy=17.4103 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 190140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-565356.8 mean_steps=14.2
|
|
[Episode 190150] reward=-112438940.5 actor_loss=0.3147 critic_loss=74835514079.1795 entropy=17.4085 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 190160] reward=-119889672.7 actor_loss=0.3294 critic_loss=76666867570.7586 entropy=17.4000 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 190160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535557.0 mean_steps=13.0
|
|
[Episode 190170] reward=-115560579.9 actor_loss=0.2652 critic_loss=78032159047.6800 entropy=17.3963 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 190180] reward=-121138465.1 actor_loss=0.2976 critic_loss=94821853086.4762 entropy=17.3850 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 190180] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-330576.1 mean_steps=17.4
|
|
[Episode 190190] reward=-122541482.5 actor_loss=0.2947 critic_loss=79830024192.0000 entropy=17.3775 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 190200] reward=-116704382.1 actor_loss=0.3402 critic_loss=94371189711.2381 entropy=17.3783 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 190200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603225.0 mean_steps=11.7
|
|
[Episode 190210] reward=-118837976.2 actor_loss=0.3766 critic_loss=78556246662.7368 entropy=17.3787 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 190220] reward=-122936942.3 actor_loss=0.2184 critic_loss=93809724494.7692 entropy=17.3848 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 190220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394972.6 mean_steps=15.9
|
|
[Episode 190230] reward=-116655141.6 actor_loss=0.2409 critic_loss=75415084782.9333 entropy=17.3879 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 190240] reward=-116365820.3 actor_loss=0.2985 critic_loss=73304422894.3448 entropy=17.3814 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 190240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409505.7 mean_steps=14.9
|
|
[Episode 190250] reward=-119341078.7 actor_loss=0.2960 critic_loss=76645855323.0222 entropy=17.3877 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 190260] reward=-117348488.3 actor_loss=0.2488 critic_loss=116729996083.2000 entropy=17.3895 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 190260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581152.0 mean_steps=12.8
|
|
[Episode 190270] reward=-114984795.4 actor_loss=0.3671 critic_loss=76207890863.1579 entropy=17.4003 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 190280] reward=-115150723.9 actor_loss=0.2113 critic_loss=77461028083.8095 entropy=17.4075 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 190280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527858.9 mean_steps=13.2
|
|
[Episode 190290] reward=-117674873.9 actor_loss=0.3475 critic_loss=81304745140.7059 entropy=17.4031 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 190300] reward=-122031443.8 actor_loss=0.3086 critic_loss=80498788165.8182 entropy=17.4053 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 190300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-472780.7 mean_steps=15.4
|
|
[Episode 190310] reward=-124255008.0 actor_loss=0.2889 critic_loss=87038018446.2222 entropy=17.4037 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 190320] reward=-117347759.8 actor_loss=0.3545 critic_loss=75112792506.8108 entropy=17.4010 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 190320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393345.8 mean_steps=15.8
|
|
[Episode 190330] reward=-117490880.0 actor_loss=0.3243 critic_loss=78655342364.4444 entropy=17.4136 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 190340] reward=-111052479.8 actor_loss=0.3257 critic_loss=73155048369.2308 entropy=17.4144 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 190340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-424052.0 mean_steps=13.2
|
|
[Episode 190350] reward=-114782684.1 actor_loss=0.2057 critic_loss=73382989970.2857 entropy=17.4135 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 190360] reward=-116327075.0 actor_loss=0.2837 critic_loss=80234400426.6667 entropy=17.4218 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 190360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-513005.1 mean_steps=12.2
|
|
[Episode 190370] reward=-115571359.6 actor_loss=0.2254 critic_loss=84987872051.2000 entropy=17.4283 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 190380] reward=-117133196.6 actor_loss=0.3285 critic_loss=78527700992.0000 entropy=17.4333 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 190380] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-720538.1 mean_steps=12.5
|
|
[Episode 190390] reward=-125213642.4 actor_loss=0.2283 critic_loss=84897965260.8000 entropy=17.4356 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 190400] reward=-119479446.4 actor_loss=0.2919 critic_loss=76798075270.0952 entropy=17.4347 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 190400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471250.9 mean_steps=13.7
|
|
[Episode 190410] reward=-116397588.6 actor_loss=0.3328 critic_loss=78321072355.5556 entropy=17.4223 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 190420] reward=-113902798.9 actor_loss=0.3596 critic_loss=70773926206.5778 entropy=17.4168 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 190420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509453.0 mean_steps=14.2
|
|
[Episode 190430] reward=-117984309.9 actor_loss=0.2981 critic_loss=79293714116.9231 entropy=17.4301 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 190440] reward=-120596508.2 actor_loss=0.3245 critic_loss=82978642602.6667 entropy=17.4380 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 190440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546549.3 mean_steps=14.2
|
|
[Episode 190450] reward=-118991116.5 actor_loss=0.3279 critic_loss=79335816147.4783 entropy=17.4370 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 190460] reward=-119653957.5 actor_loss=0.3310 critic_loss=80642933126.0952 entropy=17.4423 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 190460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517947.4 mean_steps=12.6
|
|
[Episode 190470] reward=-116570522.4 actor_loss=0.3673 critic_loss=122089209225.8462 entropy=17.4327 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 190480] reward=-112008763.5 actor_loss=0.3223 critic_loss=71700412962.1333 entropy=17.4282 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 190480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442541.7 mean_steps=14.2
|
|
[Episode 190490] reward=-110029523.6 actor_loss=0.3770 critic_loss=71956050970.2564 entropy=17.4279 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 190500] reward=-121324234.9 actor_loss=0.2801 critic_loss=78093871006.4762 entropy=17.4342 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 190500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-480015.1 mean_steps=15.7
|
|
[Episode 190510] reward=-117572884.6 actor_loss=0.2222 critic_loss=85036160037.9259 entropy=17.4361 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 190520] reward=-120039778.0 actor_loss=0.2981 critic_loss=105074529075.2000 entropy=17.4368 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 190520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-337276.5 mean_steps=16.7
|
|
[Episode 190530] reward=-111428715.2 actor_loss=0.3456 critic_loss=67389750218.1053 entropy=17.4299 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 190540] reward=-116771942.9 actor_loss=0.3328 critic_loss=77482737664.0000 entropy=17.4299 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 190540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522370.9 mean_steps=13.3
|
|
[Episode 190550] reward=-119566733.5 actor_loss=0.2619 critic_loss=75596598980.9231 entropy=17.4215 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 190560] reward=-115676833.7 actor_loss=0.2521 critic_loss=75098833773.7143 entropy=17.4170 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 190560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581127.1 mean_steps=13.1
|
|
[Episode 190570] reward=-115694262.1 actor_loss=0.2700 critic_loss=79180486726.6207 entropy=17.4052 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 190580] reward=-120637068.2 actor_loss=0.3825 critic_loss=78171358890.6667 entropy=17.4097 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 190580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-577847.2 mean_steps=14.4
|
|
[Episode 190590] reward=-116803741.1 actor_loss=0.2585 critic_loss=80291461722.3529 entropy=17.4088 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 190600] reward=-123384746.1 actor_loss=0.2530 critic_loss=79262400030.1176 entropy=17.4111 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 190600] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-714298.4 mean_steps=11.7
|
|
[Episode 190610] reward=-112758253.4 actor_loss=0.3088 critic_loss=91715241106.2857 entropy=17.4124 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 190620] reward=-115034852.0 actor_loss=0.2865 critic_loss=76417357141.3333 entropy=17.4061 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 190620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435626.1 mean_steps=15.3
|
|
[Episode 190630] reward=-117521413.0 actor_loss=0.2663 critic_loss=79475845658.9474 entropy=17.4028 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 190640] reward=-117711666.3 actor_loss=0.3007 critic_loss=77760543439.5676 entropy=17.4062 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 190640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549742.7 mean_steps=14.1
|
|
[Episode 190650] reward=-119072279.0 actor_loss=0.2917 critic_loss=88093499695.4074 entropy=17.4113 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 190660] reward=-120150053.2 actor_loss=0.2881 critic_loss=86337551123.6923 entropy=17.4129 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 190660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493899.7 mean_steps=14.5
|
|
[Episode 190670] reward=-121557243.3 actor_loss=0.2819 critic_loss=87645223842.9091 entropy=17.4082 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 190680] reward=-112853353.3 actor_loss=0.2888 critic_loss=72024940544.0000 entropy=17.3985 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 190680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530709.3 mean_steps=14.0
|
|
[Episode 190690] reward=-121057081.2 actor_loss=0.3089 critic_loss=77615513600.0000 entropy=17.3920 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 190700] reward=-122071974.5 actor_loss=0.3032 critic_loss=85948483829.7600 entropy=17.3863 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 190700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-568270.7 mean_steps=14.6
|
|
[Episode 190710] reward=-120801678.5 actor_loss=0.2432 critic_loss=78980013367.6522 entropy=17.3901 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 190720] reward=-113699502.8 actor_loss=0.4484 critic_loss=75672792996.9778 entropy=17.3855 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 190720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558398.3 mean_steps=13.5
|
|
[Episode 190730] reward=-117884641.5 actor_loss=0.1655 critic_loss=74582238901.6774 entropy=17.3761 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 190740] reward=-116472783.6 actor_loss=0.2966 critic_loss=78949021286.4000 entropy=17.3734 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 190740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479469.1 mean_steps=15.8
|
|
[Episode 190750] reward=-119851929.9 actor_loss=0.3181 critic_loss=80533736288.7111 entropy=17.3737 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 190760] reward=-115914582.8 actor_loss=0.3059 critic_loss=72958966603.2941 entropy=17.3552 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 190760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436809.0 mean_steps=15.3
|
|
[Episode 190770] reward=-112808578.5 actor_loss=0.2910 critic_loss=74057304655.6444 entropy=17.3583 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 190780] reward=-116880968.9 actor_loss=0.3380 critic_loss=84702393002.6667 entropy=17.3557 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 190780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421213.4 mean_steps=15.2
|
|
[Episode 190790] reward=-119454262.0 actor_loss=0.3922 critic_loss=82325046476.8000 entropy=17.3533 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 190800] reward=-117165007.3 actor_loss=0.2422 critic_loss=80597260076.1379 entropy=17.3480 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 190800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511528.7 mean_steps=13.9
|
|
[Episode 190810] reward=-118152204.1 actor_loss=0.4536 critic_loss=75242728354.9091 entropy=17.3359 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 190820] reward=-116617888.1 actor_loss=0.3744 critic_loss=77254491136.0000 entropy=17.3338 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 190820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505811.5 mean_steps=13.2
|
|
[Episode 190830] reward=-118120837.4 actor_loss=0.3072 critic_loss=75052176545.6842 entropy=17.3408 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 190840] reward=-117273904.6 actor_loss=0.3021 critic_loss=73601449425.4545 entropy=17.3509 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 190840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-504517.3 mean_steps=13.6
|
|
[Episode 190850] reward=-125486078.5 actor_loss=0.2962 critic_loss=249052895004.4445 entropy=17.3642 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 190860] reward=-113892484.8 actor_loss=0.2869 critic_loss=78943213613.5111 entropy=17.3694 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 190860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423701.8 mean_steps=15.4
|
|
[Episode 190870] reward=-115913517.1 actor_loss=0.3479 critic_loss=75800825036.8000 entropy=17.3622 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 190880] reward=-115071660.7 actor_loss=0.3633 critic_loss=73262943475.8095 entropy=17.3690 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 190880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620727.7 mean_steps=12.0
|
|
[Episode 190890] reward=-114682410.7 actor_loss=0.2547 critic_loss=82118460633.2121 entropy=17.3697 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 190900] reward=-116905032.6 actor_loss=0.2788 critic_loss=76971954508.1081 entropy=17.3714 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 190900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-636917.4 mean_steps=11.9
|
|
[Episode 190910] reward=-125044721.9 actor_loss=0.2606 critic_loss=84759078157.4737 entropy=17.3676 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 190920] reward=-118362539.9 actor_loss=0.3773 critic_loss=80033051648.0000 entropy=17.3776 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 190920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553390.9 mean_steps=13.5
|
|
[Episode 190930] reward=-116079329.2 actor_loss=0.4057 critic_loss=83814505403.7333 entropy=17.3854 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 190940] reward=-116058811.2 actor_loss=0.3426 critic_loss=72382870869.3333 entropy=17.3895 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 190940] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-280996.1 mean_steps=17.8
|
|
[Episode 190950] reward=-119761049.3 actor_loss=0.2647 critic_loss=104901880490.6667 entropy=17.3997 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 190960] reward=-121002970.4 actor_loss=0.3032 critic_loss=140340868827.4286 entropy=17.3973 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 190960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-531629.0 mean_steps=16.0
|
|
[Episode 190970] reward=-121457418.6 actor_loss=0.2983 critic_loss=87748121110.2609 entropy=17.4017 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 190980] reward=-118451924.8 actor_loss=0.2731 critic_loss=76616020878.2222 entropy=17.4077 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 190980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-599394.2 mean_steps=13.8
|
|
[Episode 190990] reward=-121545767.8 actor_loss=0.3002 critic_loss=78513619425.8824 entropy=17.4129 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 191000] reward=-123442918.7 actor_loss=0.2749 critic_loss=80494152089.6000 entropy=17.4184 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 191000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-496073.1 mean_steps=13.2
|
|
[Episode 191010] reward=-118287610.6 actor_loss=0.1987 critic_loss=78530819803.4286 entropy=17.4116 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 191020] reward=-119116901.2 actor_loss=0.3512 critic_loss=77956842455.0400 entropy=17.4198 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 191020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434543.1 mean_steps=15.2
|
|
[Episode 191030] reward=-121179017.2 actor_loss=0.3150 critic_loss=81999368192.0000 entropy=17.4109 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 191040] reward=-120442273.4 actor_loss=0.2531 critic_loss=216820996505.6000 entropy=17.4130 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 191040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557194.8 mean_steps=13.3
|
|
[Episode 191050] reward=-116853421.9 actor_loss=0.3030 critic_loss=77246763008.0000 entropy=17.4097 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 191060] reward=-121821021.6 actor_loss=0.2403 critic_loss=77628855940.7407 entropy=17.4093 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 191060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-542055.9 mean_steps=15.3
|
|
[Episode 191070] reward=-115062015.8 actor_loss=0.3861 critic_loss=74274867200.0000 entropy=17.4170 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 191080] reward=-118158471.6 actor_loss=0.2725 critic_loss=76171373961.8462 entropy=17.4202 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 191080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-561737.3 mean_steps=12.6
|
|
[Episode 191090] reward=-120353583.7 actor_loss=0.2956 critic_loss=80085935445.3333 entropy=17.4227 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 191100] reward=-118403072.8 actor_loss=0.3428 critic_loss=73502753374.8148 entropy=17.4238 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 191100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464725.7 mean_steps=15.5
|
|
[Episode 191110] reward=-117927454.6 actor_loss=0.3039 critic_loss=76852553355.6364 entropy=17.4370 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 191120] reward=-114964195.0 actor_loss=0.3528 critic_loss=77963513856.0000 entropy=17.4359 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 191120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-592161.2 mean_steps=11.7
|
|
[Episode 191130] reward=-121992438.8 actor_loss=0.3166 critic_loss=78951499912.5333 entropy=17.4493 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 191140] reward=-119174597.9 actor_loss=0.1405 critic_loss=78863254572.5217 entropy=17.4534 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 191140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558664.5 mean_steps=13.4
|
|
[Episode 191150] reward=-117160279.8 actor_loss=0.3565 critic_loss=74228886641.7778 entropy=17.4527 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 191160] reward=-117562345.9 actor_loss=0.3737 critic_loss=89224179471.0588 entropy=17.4406 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 191160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-495693.5 mean_steps=13.2
|
|
[Episode 191170] reward=-115872451.2 actor_loss=0.2799 critic_loss=77986695168.0000 entropy=17.4420 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 191180] reward=-117812688.8 actor_loss=0.2716 critic_loss=76016684624.8421 entropy=17.4424 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 191180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574174.9 mean_steps=13.3
|
|
[Episode 191190] reward=-118479099.9 actor_loss=0.2310 critic_loss=78527741952.0000 entropy=17.4421 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 191200] reward=-119503036.5 actor_loss=0.3388 critic_loss=80908443648.0000 entropy=17.4533 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 191200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-326603.8 mean_steps=16.8
|
|
[Episode 191210] reward=-122169984.3 actor_loss=0.1794 critic_loss=92938591928.3200 entropy=17.4485 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 191220] reward=-116887420.4 actor_loss=0.2844 critic_loss=74960687104.0000 entropy=17.4464 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 191220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463738.7 mean_steps=15.8
|
|
[Episode 191230] reward=-118941644.5 actor_loss=0.2533 critic_loss=74599334707.2000 entropy=17.4416 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 191240] reward=-112336314.0 actor_loss=0.2975 critic_loss=70680644142.5455 entropy=17.4450 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 191240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402815.1 mean_steps=15.0
|
|
[Episode 191250] reward=-137631239.0 actor_loss=0.2416 critic_loss=1097551646479.0588 entropy=17.4408 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 191260] reward=-116030831.4 actor_loss=0.2263 critic_loss=87692315852.8000 entropy=17.4464 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 191260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-641390.8 mean_steps=13.2
|
|
[Episode 191270] reward=-124122086.7 actor_loss=0.2983 critic_loss=739841125677.1765 entropy=17.4567 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 191280] reward=-118102929.9 actor_loss=0.1698 critic_loss=74299503957.3333 entropy=17.4559 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 191280] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-683559.8 mean_steps=10.3
|
|
[Episode 191290] reward=-114028342.1 actor_loss=0.3497 critic_loss=75182338582.2609 entropy=17.4545 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 191300] reward=-115187922.4 actor_loss=0.3327 critic_loss=77862646650.4348 entropy=17.4522 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 191300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416367.6 mean_steps=15.2
|
|
[Episode 191310] reward=-119820261.0 actor_loss=0.2004 critic_loss=74203499799.2727 entropy=17.4532 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 191320] reward=-117004657.3 actor_loss=0.2257 critic_loss=77443365683.2000 entropy=17.4480 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 191320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553018.7 mean_steps=14.4
|
|
[Episode 191330] reward=-119763087.5 actor_loss=0.2986 critic_loss=78114753042.9630 entropy=17.4336 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 191340] reward=-116407449.3 actor_loss=0.4743 critic_loss=75099151671.6522 entropy=17.4477 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1556 front_blocked=0
|
|
[Eval 191340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611970.6 mean_steps=12.8
|
|
[Episode 191350] reward=-123526979.8 actor_loss=0.2686 critic_loss=80940302336.0000 entropy=17.4410 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 191360] reward=-120277888.1 actor_loss=0.2430 critic_loss=77006931230.7200 entropy=17.4363 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 191360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-437780.1 mean_steps=16.3
|
|
[Episode 191370] reward=-121165216.3 actor_loss=0.2757 critic_loss=79354840405.3333 entropy=17.4375 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 191380] reward=-122947294.5 actor_loss=0.2564 critic_loss=80065146507.6364 entropy=17.4338 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 191380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473397.5 mean_steps=14.8
|
|
[Episode 191390] reward=-115539043.3 actor_loss=0.2536 critic_loss=75424356761.6000 entropy=17.4282 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 191400] reward=-118620412.8 actor_loss=0.2119 critic_loss=76863615191.5789 entropy=17.4321 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 191400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-529144.7 mean_steps=14.8
|
|
[Episode 191410] reward=-122436952.7 actor_loss=0.2066 critic_loss=78764278877.0909 entropy=17.4310 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 191420] reward=-116535610.6 actor_loss=0.3121 critic_loss=76449783808.0000 entropy=17.4257 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 191420] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-678450.2 mean_steps=11.2
|
|
[Episode 191430] reward=-116095970.0 actor_loss=0.2825 critic_loss=77688283439.4074 entropy=17.4211 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 191440] reward=-119728988.3 actor_loss=0.2940 critic_loss=79086034944.0000 entropy=17.4270 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 191440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563764.8 mean_steps=13.4
|
|
[Episode 191450] reward=-114314999.8 actor_loss=0.3404 critic_loss=81259344532.6452 entropy=17.4343 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 191460] reward=-118518359.2 actor_loss=0.2559 critic_loss=81099630342.2439 entropy=17.4333 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 191460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-461004.7 mean_steps=16.4
|
|
[Episode 191470] reward=-123303672.8 actor_loss=0.2576 critic_loss=81135273301.3333 entropy=17.4311 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 191480] reward=-107030586.4 actor_loss=0.4454 critic_loss=74056686126.5455 entropy=17.4282 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 191480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-619886.4 mean_steps=14.0
|
|
[Episode 191490] reward=-118585913.4 actor_loss=0.3185 critic_loss=73757795328.0000 entropy=17.4208 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 191500] reward=-113767705.2 actor_loss=0.2476 critic_loss=68446471119.2381 entropy=17.4279 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 191500] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-757043.2 mean_steps=11.6
|
|
[Episode 191510] reward=-110633872.7 actor_loss=0.3154 critic_loss=71624422253.7143 entropy=17.4295 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 191520] reward=-119637067.5 actor_loss=0.2819 critic_loss=80782402182.7368 entropy=17.4281 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 191520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421769.8 mean_steps=15.6
|
|
[Episode 191530] reward=-113474846.9 actor_loss=0.3179 critic_loss=70622130517.3333 entropy=17.4436 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 191540] reward=-118613349.4 actor_loss=0.3297 critic_loss=133670728300.6061 entropy=17.4390 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 191540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580782.9 mean_steps=12.2
|
|
[Episode 191550] reward=-117415269.3 actor_loss=0.3661 critic_loss=81112676215.4667 entropy=17.4398 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 191560] reward=-120438580.3 actor_loss=0.2171 critic_loss=81878644922.1818 entropy=17.4579 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 191560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530639.2 mean_steps=14.2
|
|
[Episode 191570] reward=-114175155.0 actor_loss=0.2879 critic_loss=77855347003.0769 entropy=17.4476 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 191580] reward=-123425340.7 actor_loss=0.3180 critic_loss=493988642816.0000 entropy=17.4338 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 191580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488139.9 mean_steps=15.1
|
|
[Episode 191590] reward=-113498078.6 actor_loss=0.3065 critic_loss=114088497060.9778 entropy=17.4409 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 191600] reward=-115496111.3 actor_loss=0.2803 critic_loss=76539960706.8445 entropy=17.4341 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 191600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402267.3 mean_steps=15.8
|
|
[Episode 191610] reward=-116870840.0 actor_loss=0.4045 critic_loss=78218534912.0000 entropy=17.4279 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 191620] reward=-121532782.9 actor_loss=0.2509 critic_loss=79962887145.2444 entropy=17.4265 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 191620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569276.2 mean_steps=13.6
|
|
[Episode 191630] reward=-122274793.2 actor_loss=0.3296 critic_loss=114029811350.5882 entropy=17.4489 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 191640] reward=-121228087.2 actor_loss=0.2956 critic_loss=86052185936.4571 entropy=17.4397 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 191640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477401.4 mean_steps=14.7
|
|
[Episode 191650] reward=-122561161.6 actor_loss=0.2800 critic_loss=80939581849.6000 entropy=17.4376 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 191660] reward=-119018133.3 actor_loss=0.2432 critic_loss=75711139657.9556 entropy=17.4505 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 191660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499801.6 mean_steps=14.7
|
|
[Episode 191670] reward=-116358693.0 actor_loss=0.2084 critic_loss=77454672281.6000 entropy=17.4588 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 191680] reward=-117429413.7 actor_loss=0.3101 critic_loss=76880910199.4667 entropy=17.4511 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 191680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453764.9 mean_steps=15.8
|
|
[Episode 191690] reward=-119589345.3 actor_loss=0.2031 critic_loss=73106342434.1333 entropy=17.4392 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 191700] reward=-112184281.3 actor_loss=0.3502 critic_loss=74572587736.1778 entropy=17.4474 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 191700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-520383.8 mean_steps=15.3
|
|
[Episode 191710] reward=-115494972.0 actor_loss=0.3262 critic_loss=75015233425.2973 entropy=17.4356 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 191720] reward=-114879771.6 actor_loss=0.4059 critic_loss=70945440972.8000 entropy=17.4266 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 191720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571748.1 mean_steps=13.4
|
|
[Episode 191730] reward=-120817152.4 actor_loss=0.2682 critic_loss=82939823900.4444 entropy=17.4439 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 191740] reward=-120406857.7 actor_loss=0.2534 critic_loss=75818150843.7333 entropy=17.4367 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 191740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397405.1 mean_steps=15.1
|
|
[Episode 191750] reward=-113592054.4 actor_loss=0.3309 critic_loss=75806416531.9111 entropy=17.4542 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 191760] reward=-117852252.4 actor_loss=0.3022 critic_loss=80768751678.0606 entropy=17.4538 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 191760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530781.8 mean_steps=13.7
|
|
[Episode 191770] reward=-120205286.3 actor_loss=0.2713 critic_loss=76969937578.6667 entropy=17.4500 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 191780] reward=-123594629.4 actor_loss=0.2261 critic_loss=82411135407.1579 entropy=17.4281 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 191780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487626.8 mean_steps=13.8
|
|
[Episode 191790] reward=-122935508.9 actor_loss=0.4184 critic_loss=99246239744.0000 entropy=17.4236 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 191800] reward=-123894522.9 actor_loss=0.3186 critic_loss=274260224682.6667 entropy=17.4314 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 191800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525142.6 mean_steps=14.2
|
|
[Episode 191810] reward=-116847614.8 actor_loss=0.3314 critic_loss=114840867108.5714 entropy=17.4231 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 191820] reward=-112084403.9 actor_loss=0.3629 critic_loss=71532076807.7576 entropy=17.4224 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 191820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-343401.4 mean_steps=16.8
|
|
[Episode 191830] reward=-118462315.0 actor_loss=0.3408 critic_loss=79670131565.7143 entropy=17.4180 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 191840] reward=-113870902.5 actor_loss=0.4502 critic_loss=79214160058.1818 entropy=17.4170 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 191840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610594.0 mean_steps=12.8
|
|
[Episode 191850] reward=-110815625.9 actor_loss=0.4390 critic_loss=74162333988.5714 entropy=17.4094 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 191860] reward=-115834235.0 actor_loss=0.3011 critic_loss=70754825741.8378 entropy=17.4203 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 191860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-336275.5 mean_steps=16.6
|
|
[Episode 191870] reward=-112203403.8 actor_loss=0.3582 critic_loss=77203290298.1818 entropy=17.4254 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 191880] reward=-124511808.7 actor_loss=0.2718 critic_loss=87501831208.9600 entropy=17.4291 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 191880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520662.2 mean_steps=14.1
|
|
[Episode 191890] reward=-121029539.4 actor_loss=0.2256 critic_loss=81556446576.6400 entropy=17.4337 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 191900] reward=-118179644.5 actor_loss=0.3004 critic_loss=79876831156.1481 entropy=17.4350 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 191900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483035.4 mean_steps=14.1
|
|
[Episode 191910] reward=-122044776.0 actor_loss=0.3172 critic_loss=78551222499.5556 entropy=17.4258 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 191920] reward=-114253856.6 actor_loss=0.2427 critic_loss=73383886848.0000 entropy=17.4257 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 191920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466104.9 mean_steps=14.7
|
|
[Episode 191930] reward=-119724020.2 actor_loss=0.2887 critic_loss=75570804599.4667 entropy=17.4225 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 191940] reward=-122208687.1 actor_loss=0.3151 critic_loss=78873071030.8571 entropy=17.4050 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 191940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492466.9 mean_steps=14.8
|
|
[Episode 191950] reward=-117757684.5 actor_loss=0.3509 critic_loss=79993978669.9487 entropy=17.4048 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 191960] reward=-120571749.5 actor_loss=0.2590 critic_loss=81724846899.2000 entropy=17.4214 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 191960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540984.4 mean_steps=14.3
|
|
[Episode 191970] reward=-117507314.1 actor_loss=0.2522 critic_loss=76258118602.1053 entropy=17.4201 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 191980] reward=-117643010.2 actor_loss=0.3640 critic_loss=75054531925.3333 entropy=17.4276 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 191980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-659746.5 mean_steps=12.4
|
|
[Episode 191990] reward=-121523646.8 actor_loss=0.3070 critic_loss=82941174752.9697 entropy=17.4272 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 192000] reward=-116400262.1 actor_loss=0.2744 critic_loss=71224187172.5714 entropy=17.4393 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 192000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456768.5 mean_steps=13.6
|
|
[Episode 192010] reward=-121704761.0 actor_loss=0.2731 critic_loss=78858704523.6364 entropy=17.4303 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 192020] reward=-110647724.1 actor_loss=0.3906 critic_loss=71144711980.1379 entropy=17.4200 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 192020] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-292669.4 mean_steps=16.9
|
|
[Episode 192030] reward=-117739088.1 actor_loss=0.4405 critic_loss=77208269619.2000 entropy=17.4172 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 192040] reward=-116428693.5 actor_loss=0.3747 critic_loss=73025882794.6667 entropy=17.4114 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 192040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442182.1 mean_steps=15.6
|
|
[Episode 192050] reward=-119210458.6 actor_loss=0.3474 critic_loss=79259390130.0870 entropy=17.4155 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 192060] reward=-121644218.6 actor_loss=0.2228 critic_loss=77417145594.3111 entropy=17.4177 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 192060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-638158.6 mean_steps=12.1
|
|
[Episode 192070] reward=-122426307.4 actor_loss=0.2657 critic_loss=81057080617.2903 entropy=17.4296 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 192080] reward=-129382949.3 actor_loss=0.2998 critic_loss=612255442534.4000 entropy=17.4233 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 192080] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-393001.7 mean_steps=17.4
|
|
[Episode 192090] reward=-120316790.9 actor_loss=0.2800 critic_loss=79246072763.7333 entropy=17.4293 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 192100] reward=-118621490.7 actor_loss=0.2180 critic_loss=81175264597.3333 entropy=17.4410 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 192100] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-374422.4 mean_steps=16.8
|
|
[Episode 192110] reward=-116738388.7 actor_loss=0.3027 critic_loss=75740304813.4194 entropy=17.4474 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 192120] reward=-116646101.2 actor_loss=0.3291 critic_loss=73975372185.6000 entropy=17.4487 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 192120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509626.0 mean_steps=14.2
|
|
[Episode 192130] reward=-119608698.1 actor_loss=0.3076 critic_loss=79227955411.8621 entropy=17.4564 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 192140] reward=-116814273.2 actor_loss=0.3381 critic_loss=76652801401.2632 entropy=17.4645 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 192140] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-636824.6 mean_steps=12.0
|
|
[Episode 192150] reward=-107229359.2 actor_loss=0.3598 critic_loss=64481139643.7333 entropy=17.4585 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 192160] reward=-119167105.2 actor_loss=0.2384 critic_loss=72505656813.0370 entropy=17.4540 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 192160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-374392.9 mean_steps=16.9
|
|
[Episode 192170] reward=-120836326.4 actor_loss=0.3171 critic_loss=79806602907.8261 entropy=17.4480 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 192180] reward=-123055433.1 actor_loss=0.1913 critic_loss=108053427365.1613 entropy=17.4583 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 192180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-541034.9 mean_steps=12.4
|
|
[Episode 192190] reward=-119308471.8 actor_loss=0.3210 critic_loss=77154000151.2727 entropy=17.4748 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 192200] reward=-110987775.0 actor_loss=0.3471 critic_loss=73538128851.4783 entropy=17.4722 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 192200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422804.3 mean_steps=15.4
|
|
[Episode 192210] reward=-120802181.3 actor_loss=0.2682 critic_loss=78506412077.5111 entropy=17.4827 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 192220] reward=-114293646.9 actor_loss=0.3571 critic_loss=73185311905.6842 entropy=17.4853 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 192220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590693.0 mean_steps=13.1
|
|
[Episode 192230] reward=-116743601.7 actor_loss=0.2514 critic_loss=73547392037.9259 entropy=17.5060 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 192240] reward=-117900222.2 actor_loss=0.1842 critic_loss=74983118165.3333 entropy=17.4903 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 192240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-584254.3 mean_steps=13.8
|
|
[Episode 192250] reward=-114372300.4 actor_loss=0.3709 critic_loss=74936116292.2667 entropy=17.4745 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 192260] reward=-119739825.4 actor_loss=0.3331 critic_loss=74995180794.3111 entropy=17.4651 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 192260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569061.5 mean_steps=12.8
|
|
[Episode 192270] reward=-115588395.7 actor_loss=0.2890 critic_loss=71267844915.2000 entropy=17.4723 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 192280] reward=-113883703.8 actor_loss=0.3473 critic_loss=70854660864.0000 entropy=17.4763 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 192280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445371.1 mean_steps=15.2
|
|
[Episode 192290] reward=-115807439.3 actor_loss=0.2819 critic_loss=75479871305.9556 entropy=17.4613 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 192300] reward=-115582855.5 actor_loss=0.3390 critic_loss=73839020532.6222 entropy=17.4754 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 192300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602743.5 mean_steps=12.8
|
|
[Episode 192310] reward=-120070960.3 actor_loss=0.2925 critic_loss=75522658486.0444 entropy=17.4776 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 192320] reward=-121490829.5 actor_loss=0.2441 critic_loss=77762513032.5333 entropy=17.4638 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 192320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-639544.3 mean_steps=13.1
|
|
[Episode 192330] reward=-116571252.9 actor_loss=0.2550 critic_loss=73741528365.1765 entropy=17.4584 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 192340] reward=-114618219.0 actor_loss=0.2514 critic_loss=73672393416.3478 entropy=17.4583 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 192340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-625455.8 mean_steps=12.2
|
|
[Episode 192350] reward=-118276203.1 actor_loss=0.3790 critic_loss=75554502763.7895 entropy=17.4481 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 192360] reward=-121542981.4 actor_loss=0.2795 critic_loss=77484048384.0000 entropy=17.4489 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 192360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416943.6 mean_steps=16.1
|
|
[Episode 192370] reward=-124251862.2 actor_loss=0.2974 critic_loss=78482299172.5714 entropy=17.4463 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 192380] reward=-114912027.2 actor_loss=0.3596 critic_loss=72588196613.6889 entropy=17.4463 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 192380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-611714.9 mean_steps=13.8
|
|
[Episode 192390] reward=-116529513.6 actor_loss=0.2220 critic_loss=71347380082.7586 entropy=17.4429 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 192400] reward=-113937681.6 actor_loss=0.3667 critic_loss=76710034944.0000 entropy=17.4409 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 192400] success_rate=0.750 qp_infeasible_rate=0.250 mean_return=-151551.0 mean_steps=19.2
|
|
[Episode 192410] reward=-119922218.6 actor_loss=0.3541 critic_loss=76926319235.6572 entropy=17.4452 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 192420] reward=-119536125.1 actor_loss=0.2370 critic_loss=80164578918.4000 entropy=17.4471 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 192420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433018.5 mean_steps=15.5
|
|
[Episode 192430] reward=-120281240.1 actor_loss=0.3162 critic_loss=75317939827.6129 entropy=17.4539 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 192440] reward=-115843277.4 actor_loss=0.1804 critic_loss=74321264640.0000 entropy=17.4786 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 192440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-670920.2 mean_steps=12.4
|
|
[Episode 192450] reward=-124090259.9 actor_loss=0.1627 critic_loss=83972123761.7778 entropy=17.4924 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 192460] reward=-122771401.1 actor_loss=0.2838 critic_loss=83336310077.7931 entropy=17.5013 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 192460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460176.0 mean_steps=13.8
|
|
[Episode 192470] reward=-120685857.3 actor_loss=0.2519 critic_loss=79574653505.6410 entropy=17.5028 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 192480] reward=-115573734.4 actor_loss=0.3139 critic_loss=73306403560.7273 entropy=17.5152 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 192480] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-290821.7 mean_steps=17.2
|
|
[Episode 192490] reward=-115201075.8 actor_loss=0.3271 critic_loss=73836228061.8667 entropy=17.5067 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 192500] reward=-119881312.7 actor_loss=0.2011 critic_loss=78905825810.9630 entropy=17.4983 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 192500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-667713.4 mean_steps=12.1
|
|
[Episode 192510] reward=-121098111.9 actor_loss=0.2527 critic_loss=80277495808.0000 entropy=17.5050 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 192520] reward=-117061786.8 actor_loss=0.3585 critic_loss=80894061847.2727 entropy=17.5059 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 192520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-546754.4 mean_steps=12.4
|
|
[Episode 192530] reward=-118546969.5 actor_loss=0.2766 critic_loss=78395395537.4545 entropy=17.5171 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 192540] reward=-118067828.9 actor_loss=0.3045 critic_loss=80203207680.0000 entropy=17.5186 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 192540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500441.5 mean_steps=14.8
|
|
[Episode 192550] reward=-118401698.2 actor_loss=0.3182 critic_loss=75521239332.5714 entropy=17.5178 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 192560] reward=-119995302.8 actor_loss=0.3171 critic_loss=78536973107.2000 entropy=17.5151 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 192560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472275.8 mean_steps=13.8
|
|
[Episode 192570] reward=-119862221.5 actor_loss=0.3034 critic_loss=72622378370.8445 entropy=17.5178 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 192580] reward=-116407642.1 actor_loss=0.2694 critic_loss=74166624529.0667 entropy=17.4958 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 192580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398115.6 mean_steps=15.2
|
|
[Episode 192590] reward=-118103321.2 actor_loss=0.2801 critic_loss=80782756977.7778 entropy=17.4799 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 192600] reward=-126015748.7 actor_loss=0.1582 critic_loss=83920250197.3333 entropy=17.4971 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 192600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488665.1 mean_steps=14.8
|
|
[Episode 192610] reward=-120899555.4 actor_loss=0.3457 critic_loss=78079019690.6667 entropy=17.5019 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 192620] reward=-116728632.7 actor_loss=0.2534 critic_loss=74760022202.1818 entropy=17.5133 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 192620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441096.7 mean_steps=15.6
|
|
[Episode 192630] reward=-118705769.2 actor_loss=0.2307 critic_loss=74948489216.0000 entropy=17.5293 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 192640] reward=-121218120.3 actor_loss=0.3477 critic_loss=81247839573.3333 entropy=17.5317 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 192640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509383.8 mean_steps=14.8
|
|
[Episode 192650] reward=-112625764.4 actor_loss=0.3187 critic_loss=68405763218.2857 entropy=17.5354 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 192660] reward=-128436881.0 actor_loss=0.3505 critic_loss=192530338611.2000 entropy=17.5484 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 192660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430801.9 mean_steps=15.2
|
|
[Episode 192670] reward=-121203091.3 actor_loss=0.3126 critic_loss=79077390612.7568 entropy=17.5471 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 192680] reward=-118734375.2 actor_loss=0.2973 critic_loss=78910629293.4194 entropy=17.5573 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 192680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-366859.9 mean_steps=14.8
|
|
[Episode 192690] reward=-117524728.0 actor_loss=0.2376 critic_loss=76748522837.3333 entropy=17.5691 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 192700] reward=-117079919.6 actor_loss=0.3185 critic_loss=76093789184.0000 entropy=17.5742 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 192700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501933.9 mean_steps=12.8
|
|
[Episode 192710] reward=-112607235.2 actor_loss=0.3561 critic_loss=77080935264.7111 entropy=17.5605 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 192720] reward=-123413394.7 actor_loss=0.2461 critic_loss=77942512571.7333 entropy=17.5561 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 192720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464958.9 mean_steps=13.7
|
|
[Episode 192730] reward=-119147842.8 actor_loss=0.3255 critic_loss=80262545035.6364 entropy=17.5555 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 192740] reward=-113349859.3 actor_loss=0.2816 critic_loss=67169739662.2222 entropy=17.5516 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 192740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492131.3 mean_steps=14.1
|
|
[Episode 192750] reward=-118979849.7 actor_loss=0.3536 critic_loss=76735938808.2424 entropy=17.5548 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 192760] reward=-120496769.3 actor_loss=0.2912 critic_loss=83575467853.9130 entropy=17.5543 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 192760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-594488.7 mean_steps=13.8
|
|
[Episode 192770] reward=-115891068.5 actor_loss=0.2588 critic_loss=75302708277.8947 entropy=17.5366 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 192780] reward=-123017305.1 actor_loss=0.2620 critic_loss=81159294051.0968 entropy=17.5346 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 192780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427548.3 mean_steps=15.4
|
|
[Episode 192790] reward=-117189580.1 actor_loss=0.2590 critic_loss=75355268437.3333 entropy=17.5351 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 192800] reward=-114626474.1 actor_loss=0.3607 critic_loss=77098019157.3333 entropy=17.5405 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 192800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587382.3 mean_steps=12.7
|
|
[Episode 192810] reward=-118982676.4 actor_loss=0.3261 critic_loss=71512628062.3158 entropy=17.5512 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 192820] reward=-114764091.8 actor_loss=0.3779 critic_loss=74516095906.9091 entropy=17.5530 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 192820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551769.8 mean_steps=13.6
|
|
[Episode 192830] reward=-119032383.9 actor_loss=0.3471 critic_loss=79360646090.1053 entropy=17.5660 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 192840] reward=-113648834.6 actor_loss=0.2128 critic_loss=70730169093.6889 entropy=17.5572 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 192840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479404.7 mean_steps=15.0
|
|
[Episode 192850] reward=-115945705.0 actor_loss=0.3255 critic_loss=75127774685.8667 entropy=17.5682 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 192860] reward=-114328029.9 actor_loss=0.2908 critic_loss=71349433794.5600 entropy=17.5636 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 192860] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-707239.9 mean_steps=9.6
|
|
[Episode 192870] reward=-118494622.0 actor_loss=0.2789 critic_loss=75059571580.7179 entropy=17.5612 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 192880] reward=-117838288.8 actor_loss=0.3383 critic_loss=74151024526.2222 entropy=17.5552 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 192880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392108.3 mean_steps=15.9
|
|
[Episode 192890] reward=-117015906.5 actor_loss=0.3737 critic_loss=76844630016.0000 entropy=17.5531 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 192900] reward=-118225589.4 actor_loss=0.2274 critic_loss=78858298514.2857 entropy=17.5461 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 192900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565189.0 mean_steps=13.2
|
|
[Episode 192910] reward=-119627275.7 actor_loss=0.2412 critic_loss=84107336448.0000 entropy=17.5395 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 192920] reward=-115900351.0 actor_loss=0.3699 critic_loss=73705596648.7273 entropy=17.5388 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 192920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607091.6 mean_steps=13.0
|
|
[Episode 192930] reward=-118276810.3 actor_loss=0.3198 critic_loss=78101369323.5200 entropy=17.5288 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 192940] reward=-121484459.6 actor_loss=0.3248 critic_loss=78558333771.2941 entropy=17.5275 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 192940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541731.5 mean_steps=14.1
|
|
[Episode 192950] reward=-116881613.3 actor_loss=0.3447 critic_loss=71924967168.0000 entropy=17.5297 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 192960] reward=-115159534.0 actor_loss=0.2983 critic_loss=73944345804.8000 entropy=17.5299 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 192960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490064.0 mean_steps=13.9
|
|
[Episode 192970] reward=-122388630.6 actor_loss=0.2600 critic_loss=87410834031.3044 entropy=17.5321 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 192980] reward=-121525316.3 actor_loss=0.3265 critic_loss=110067637527.2727 entropy=17.5295 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 192980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624122.1 mean_steps=13.1
|
|
[Episode 192990] reward=-113632846.6 actor_loss=0.3548 critic_loss=75962513180.4444 entropy=17.5406 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 193000] reward=-118358211.3 actor_loss=0.2504 critic_loss=77498767193.9460 entropy=17.5467 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 193000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603290.5 mean_steps=12.8
|
|
[Episode 193010] reward=-121508542.2 actor_loss=0.3248 critic_loss=78750479018.6667 entropy=17.5387 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 193020] reward=-113635124.0 actor_loss=0.3416 critic_loss=78760552857.6000 entropy=17.5495 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 193020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-367039.2 mean_steps=14.7
|
|
[Episode 193030] reward=-114013829.8 actor_loss=0.3144 critic_loss=72023545540.9231 entropy=17.5501 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 193040] reward=-120013989.9 actor_loss=0.3156 critic_loss=102686874337.2800 entropy=17.5620 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 193040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402124.1 mean_steps=15.2
|
|
[Episode 193050] reward=-168771013.6 actor_loss=0.2140 critic_loss=6663815636946.4893 entropy=17.5682 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 193060] reward=-116458522.3 actor_loss=0.2582 critic_loss=76376520704.0000 entropy=17.5887 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 193060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536148.2 mean_steps=13.2
|
|
[Episode 193070] reward=-3531405690.8 actor_loss=2.0675 critic_loss=19777189631929004.0000 entropy=17.5939 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 193080] reward=-128179686.0 actor_loss=0.2510 critic_loss=612217655719.7241 entropy=17.5953 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 193080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434137.6 mean_steps=15.2
|
|
[Episode 193090] reward=-117377948.8 actor_loss=0.3139 critic_loss=99781919201.8824 entropy=17.5927 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 193100] reward=-110157676.0 actor_loss=0.4366 critic_loss=75371605105.7778 entropy=17.5943 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 193100] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-388150.1 mean_steps=16.6
|
|
[Episode 193110] reward=-120086896.8 actor_loss=0.2679 critic_loss=75430577834.6667 entropy=17.5919 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 193120] reward=-114094322.8 actor_loss=0.2841 critic_loss=69358900838.4000 entropy=17.5884 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 193120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450341.9 mean_steps=14.4
|
|
[Episode 193130] reward=-162653733.5 actor_loss=0.3649 critic_loss=6771875691681.6846 entropy=17.5954 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 193140] reward=-122071893.9 actor_loss=0.2424 critic_loss=78857312431.5429 entropy=17.6097 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 193140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529029.5 mean_steps=14.3
|
|
[Episode 193150] reward=-110841662.0 actor_loss=0.3545 critic_loss=76932955249.7778 entropy=17.6083 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 193160] reward=-116068117.8 actor_loss=0.3409 critic_loss=80334711685.1200 entropy=17.5978 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 193160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449715.1 mean_steps=15.7
|
|
[Episode 193170] reward=-118431351.8 actor_loss=0.1804 critic_loss=79827540195.5556 entropy=17.5859 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 193180] reward=-118371208.7 actor_loss=0.2598 critic_loss=80684511342.7027 entropy=17.5979 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 193180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424863.4 mean_steps=15.3
|
|
[Episode 193190] reward=-113499517.5 actor_loss=0.3293 critic_loss=74426095047.1111 entropy=17.5845 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 193200] reward=-117809601.3 actor_loss=0.2362 critic_loss=74729177816.1778 entropy=17.5877 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 193200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453846.1 mean_steps=14.7
|
|
[Episode 193210] reward=-120311210.1 actor_loss=0.2549 critic_loss=79629215694.0488 entropy=17.5793 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 193220] reward=-115954810.5 actor_loss=0.3613 critic_loss=82478168109.5111 entropy=17.5694 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 193220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-608512.4 mean_steps=12.6
|
|
[Episode 193230] reward=-118151844.4 actor_loss=0.2596 critic_loss=76249882532.9778 entropy=17.5777 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 193240] reward=-113033889.1 actor_loss=0.3662 critic_loss=78266468176.4571 entropy=17.5730 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 193240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-618955.1 mean_steps=13.8
|
|
[Episode 193250] reward=-124653912.7 actor_loss=0.3298 critic_loss=301163979183.1579 entropy=17.5651 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 193260] reward=-118935089.7 actor_loss=0.2879 critic_loss=76215929108.7568 entropy=17.5690 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 193260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-472118.0 mean_steps=15.2
|
|
[Episode 193270] reward=-113725026.7 actor_loss=0.3253 critic_loss=78236797486.5455 entropy=17.5710 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 193280] reward=-114732493.7 actor_loss=0.2367 critic_loss=76733656678.4000 entropy=17.5793 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 193280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538723.5 mean_steps=14.6
|
|
[Episode 193290] reward=-111434222.0 actor_loss=0.4566 critic_loss=70117818758.0952 entropy=17.5619 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 193300] reward=-119012162.3 actor_loss=0.3149 critic_loss=80765761536.0000 entropy=17.5478 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 193300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535052.0 mean_steps=13.1
|
|
[Episode 193310] reward=-123102225.2 actor_loss=0.2504 critic_loss=80999909189.8182 entropy=17.5392 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 193320] reward=-5238372417.5 actor_loss=0.1736 critic_loss=16695516058353664.0000 entropy=17.5391 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 193320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604927.2 mean_steps=12.8
|
|
[Episode 193330] reward=-119351379.3 actor_loss=0.2697 critic_loss=76334038552.3810 entropy=17.5480 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 193340] reward=-115511884.7 actor_loss=0.3489 critic_loss=74188191841.5238 entropy=17.5450 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 193340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514516.8 mean_steps=13.8
|
|
[Episode 193350] reward=-163891701.5 actor_loss=0.3374 critic_loss=8054915302104.1777 entropy=17.5647 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 193360] reward=-114379778.8 actor_loss=0.2904 critic_loss=72212933290.6667 entropy=17.5641 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 193360] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-369416.0 mean_steps=16.6
|
|
[Episode 193370] reward=-116308948.7 actor_loss=0.2695 critic_loss=70653940622.2222 entropy=17.5575 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 193380] reward=-137903557.7 actor_loss=0.2780 critic_loss=1505065128053.0286 entropy=17.5533 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 193380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435071.3 mean_steps=14.2
|
|
[Episode 193390] reward=-120495574.7 actor_loss=0.2775 critic_loss=79380215398.4000 entropy=17.5736 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 193400] reward=-113813082.8 actor_loss=0.2970 critic_loss=75716288694.0444 entropy=17.5787 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 193400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-388208.9 mean_steps=14.9
|
|
[Episode 193410] reward=-116341176.7 actor_loss=0.3198 critic_loss=72562942498.1333 entropy=17.5772 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 193420] reward=-119578351.3 actor_loss=0.3418 critic_loss=97625132409.2632 entropy=17.5732 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 193420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465099.0 mean_steps=15.2
|
|
[Episode 193430] reward=-120209823.3 actor_loss=0.2648 critic_loss=73759545844.6222 entropy=17.5525 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 193440] reward=-116000533.7 actor_loss=0.2616 critic_loss=95378881285.6889 entropy=17.5667 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 193440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-442830.9 mean_steps=13.6
|
|
[Episode 193450] reward=-117376507.0 actor_loss=0.2083 critic_loss=74812592376.2424 entropy=17.5696 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 193460] reward=-205608370.9 actor_loss=1.1819 critic_loss=23529775680026.9492 entropy=17.5754 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 193460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-565399.6 mean_steps=12.6
|
|
[Episode 193470] reward=-175875333.0 actor_loss=0.3584 critic_loss=12603396872262.6211 entropy=17.5722 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 193480] reward=-117532337.3 actor_loss=0.2215 critic_loss=79334598246.4000 entropy=17.5755 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 193480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-647707.0 mean_steps=13.2
|
|
[Episode 193490] reward=-113519888.1 actor_loss=0.2807 critic_loss=73867953421.4737 entropy=17.5783 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 193500] reward=-123083084.5 actor_loss=0.2105 critic_loss=103335151977.4118 entropy=17.5935 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 193500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-584194.7 mean_steps=13.7
|
|
[Episode 193510] reward=-115487348.0 actor_loss=0.3303 critic_loss=78672085473.8824 entropy=17.5799 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 193520] reward=-242998035.1 actor_loss=2.6176 critic_loss=47531829144234.6641 entropy=17.5775 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 193520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581987.5 mean_steps=13.4
|
|
[Episode 193530] reward=-145142327.9 actor_loss=0.3190 critic_loss=4174897569974.0444 entropy=17.5870 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 193540] reward=-124295194.7 actor_loss=0.4491 critic_loss=393951218892.8000 entropy=17.6129 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 193540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448974.4 mean_steps=15.3
|
|
[Episode 193550] reward=-120141548.6 actor_loss=0.3296 critic_loss=89330863995.8710 entropy=17.6206 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 193560] reward=-120425193.2 actor_loss=0.2716 critic_loss=76846180281.3793 entropy=17.6143 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 193560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579175.4 mean_steps=13.0
|
|
[Episode 193570] reward=-114173128.8 actor_loss=0.2787 critic_loss=89036596656.3556 entropy=17.6150 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 193580] reward=-118894021.6 actor_loss=0.3341 critic_loss=81289412243.9111 entropy=17.6297 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 193580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533910.4 mean_steps=14.4
|
|
[Episode 193590] reward=-119769144.0 actor_loss=0.2341 critic_loss=90382697176.1778 entropy=17.6270 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 193600] reward=-119784041.2 actor_loss=0.2347 critic_loss=76294921758.1176 entropy=17.6474 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 193600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401066.4 mean_steps=16.1
|
|
[Episode 193610] reward=-145181053.5 actor_loss=0.2623 critic_loss=3102374851015.1113 entropy=17.6452 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 193620] reward=-122456253.3 actor_loss=0.2880 critic_loss=82456709120.0000 entropy=17.6495 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 193620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520641.0 mean_steps=13.1
|
|
[Episode 193630] reward=-123333540.0 actor_loss=0.2680 critic_loss=86487010931.6129 entropy=17.6429 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 193640] reward=-118812893.9 actor_loss=0.3537 critic_loss=82497056768.0000 entropy=17.6402 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 193640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421629.8 mean_steps=16.1
|
|
[Episode 193650] reward=-122197867.0 actor_loss=0.3786 critic_loss=317057805880.8889 entropy=17.6400 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 193660] reward=-121089372.9 actor_loss=0.3336 critic_loss=87963582160.5926 entropy=17.6469 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 193660] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-347655.8 mean_steps=16.7
|
|
[Episode 193670] reward=-113026661.6 actor_loss=0.3401 critic_loss=75757838982.7368 entropy=17.6432 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 193680] reward=-121513166.7 actor_loss=0.3427 critic_loss=80730311728.7619 entropy=17.6465 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 193680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-620020.9 mean_steps=12.8
|
|
[Episode 193690] reward=-119923669.2 actor_loss=0.3336 critic_loss=77601372205.5111 entropy=17.6388 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 193700] reward=-118076665.4 actor_loss=0.2931 critic_loss=71910785297.0667 entropy=17.6315 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 193700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488682.3 mean_steps=14.8
|
|
[Episode 193710] reward=-121565131.0 actor_loss=0.2696 critic_loss=80403499508.6222 entropy=17.6219 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 193720] reward=-121636366.3 actor_loss=0.2943 critic_loss=76909204184.1778 entropy=17.6310 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 193720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390877.7 mean_steps=15.1
|
|
[Episode 193730] reward=-119151837.9 actor_loss=0.2868 critic_loss=74006012086.0444 entropy=17.6126 approx_kl=0.0012 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 193740] reward=-426231243.5 actor_loss=2.4533 critic_loss=284644413565898.1250 entropy=17.6248 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 193740] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-718298.4 mean_steps=10.7
|
|
[Episode 193750] reward=-801645420.5 actor_loss=2.6312 critic_loss=747400556917555.2500 entropy=17.6288 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 193760] reward=-1691256401.4 actor_loss=0.3642 critic_loss=3848463730710937.5000 entropy=17.6452 approx_kl=0.0045 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 193760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-507828.1 mean_steps=12.2
|
|
[Episode 193770] reward=-2657242176.0 actor_loss=1.0102 critic_loss=15217542339680302.0000 entropy=17.6211 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 193780] reward=-121363370.8 actor_loss=0.4235 critic_loss=124635292558.2222 entropy=17.6171 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 193780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517600.3 mean_steps=14.0
|
|
[Episode 193790] reward=-180756755.3 actor_loss=0.2823 critic_loss=12256949136406.7559 entropy=17.6297 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 193800] reward=-117627675.5 actor_loss=0.2424 critic_loss=84826015516.4444 entropy=17.6558 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 193800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351375.4 mean_steps=15.6
|
|
[Episode 193810] reward=-143591428.6 actor_loss=0.3286 critic_loss=2126580827204.2666 entropy=17.6552 approx_kl=0.0042 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 193820] reward=-125084108.3 actor_loss=0.2989 critic_loss=140865713720.8889 entropy=17.6521 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 193820] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-1134100.1 mean_steps=19.6
|
|
[Episode 193830] reward=-118055442.6 actor_loss=0.3838 critic_loss=80919267464.5333 entropy=17.6768 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 193840] reward=-119565948.2 actor_loss=0.1798 critic_loss=88709709277.8667 entropy=17.6764 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 193840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483078.7 mean_steps=14.2
|
|
[Episode 193850] reward=-115870978.7 actor_loss=0.2285 critic_loss=80084313744.4103 entropy=17.6822 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 193860] reward=-128823836.6 actor_loss=0.3494 critic_loss=520868542691.5555 entropy=17.6944 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 193860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520808.7 mean_steps=13.6
|
|
[Episode 193870] reward=-120069043.2 actor_loss=0.2089 critic_loss=77935475825.7778 entropy=17.7140 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 193880] reward=-114579331.2 actor_loss=0.3475 critic_loss=75776279256.1778 entropy=17.6976 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 193880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468820.3 mean_steps=14.6
|
|
[Episode 193890] reward=-111359856.2 actor_loss=0.2680 critic_loss=74094689575.8222 entropy=17.7049 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 193900] reward=-132209976.4 actor_loss=0.3428 critic_loss=768481076370.2858 entropy=17.7042 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 193900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-549468.1 mean_steps=11.4
|
|
[Episode 193910] reward=-122324962.9 actor_loss=0.2780 critic_loss=96621131825.9512 entropy=17.7027 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 193920] reward=-118231085.1 actor_loss=0.2942 critic_loss=79529764272.3556 entropy=17.6955 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 193920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-485329.4 mean_steps=12.9
|
|
[Episode 193930] reward=-2894236451.8 actor_loss=0.1934 critic_loss=8962183022475127.0000 entropy=17.7043 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1022 front_blocked=0
|
|
[Episode 193940] reward=-124508566.4 actor_loss=0.3563 critic_loss=406735809006.3448 entropy=17.7234 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 193940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483204.4 mean_steps=14.8
|
|
[Episode 193950] reward=-120241297.1 actor_loss=0.1932 critic_loss=102519385760.9143 entropy=17.7426 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 193960] reward=-120385592.2 actor_loss=0.2823 critic_loss=84489557581.5758 entropy=17.7369 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 193960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431582.6 mean_steps=14.9
|
|
[Episode 193970] reward=-115125546.8 actor_loss=0.1510 critic_loss=70923564282.3111 entropy=17.7338 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 193980] reward=-129871695.3 actor_loss=0.4152 critic_loss=1064315238107.4286 entropy=17.7171 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 193980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519915.6 mean_steps=13.9
|
|
[Episode 193990] reward=-124133915.7 actor_loss=0.2239 critic_loss=84646577402.3111 entropy=17.7027 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 194000] reward=-114374393.7 actor_loss=0.3180 critic_loss=81158354176.0000 entropy=17.6942 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 194000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581173.3 mean_steps=13.7
|
|
[Episode 194010] reward=-119479424.8 actor_loss=0.3223 critic_loss=75071112078.2222 entropy=17.6920 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 194020] reward=-115924033.2 actor_loss=0.2222 critic_loss=77502789203.3488 entropy=17.7004 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 194020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487313.0 mean_steps=14.0
|
|
[Episode 194030] reward=-121002425.1 actor_loss=0.3012 critic_loss=76966085352.7273 entropy=17.7040 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 194040] reward=-121146227.6 actor_loss=0.3363 critic_loss=81565606518.1538 entropy=17.7094 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 194040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541893.2 mean_steps=13.9
|
|
[Episode 194050] reward=-117148013.1 actor_loss=0.3231 critic_loss=82687733387.6364 entropy=17.7041 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 194060] reward=-141287196.2 actor_loss=0.3798 critic_loss=2255923559355.7334 entropy=17.6972 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 194060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581119.8 mean_steps=13.6
|
|
[Episode 194070] reward=-119199980.5 actor_loss=0.2931 critic_loss=79503680451.7647 entropy=17.6905 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 194080] reward=-119285828.8 actor_loss=0.3011 critic_loss=83739429728.7111 entropy=17.6840 approx_kl=0.0107 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 194080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-388426.6 mean_steps=15.1
|
|
[Episode 194090] reward=-117180969.2 actor_loss=0.3955 critic_loss=78843314176.0000 entropy=17.6794 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 194100] reward=-118802942.8 actor_loss=0.2470 critic_loss=111649915904.0000 entropy=17.6823 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 194100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-447127.7 mean_steps=13.8
|
|
[Episode 194110] reward=-117253885.9 actor_loss=0.1979 critic_loss=76389337600.0000 entropy=17.6823 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 194120] reward=-117253240.4 actor_loss=0.2374 critic_loss=88301374873.6000 entropy=17.6810 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 194120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484854.6 mean_steps=13.7
|
|
[Episode 194130] reward=-156935742.5 actor_loss=0.2073 critic_loss=5115837181619.8916 entropy=17.6892 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 194140] reward=-119120630.5 actor_loss=0.1927 critic_loss=76866980189.6585 entropy=17.6947 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 194140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599300.9 mean_steps=12.9
|
|
[Episode 194150] reward=-123993320.9 actor_loss=0.2983 critic_loss=172637480401.4546 entropy=17.6965 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 194160] reward=-138807750.7 actor_loss=0.2519 critic_loss=1090099635814.4000 entropy=17.6788 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 194160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-645223.8 mean_steps=12.2
|
|
[Episode 194170] reward=-124398993.8 actor_loss=0.2200 critic_loss=100354967844.5714 entropy=17.6739 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 194180] reward=-115277980.8 actor_loss=0.2442 critic_loss=76634888055.4667 entropy=17.6664 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 194180] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-650862.9 mean_steps=11.3
|
|
[Episode 194190] reward=-120628277.0 actor_loss=0.3871 critic_loss=76896634002.2857 entropy=17.6626 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 194200] reward=-118038957.4 actor_loss=0.3411 critic_loss=78311872102.4000 entropy=17.6579 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 194200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392089.1 mean_steps=16.4
|
|
[Episode 194210] reward=-119888297.0 actor_loss=0.3330 critic_loss=78175053414.4000 entropy=17.6625 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 194220] reward=-124352628.3 actor_loss=0.3275 critic_loss=83454179009.4222 entropy=17.6732 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 194220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454061.2 mean_steps=14.4
|
|
[Episode 194230] reward=-119485265.4 actor_loss=0.3306 critic_loss=86915802180.2667 entropy=17.6794 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 194240] reward=-117219783.3 actor_loss=0.3025 critic_loss=74217396815.6444 entropy=17.6622 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 194240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-338768.0 mean_steps=14.7
|
|
[Episode 194250] reward=-118060351.3 actor_loss=0.2490 critic_loss=77975792480.7111 entropy=17.6537 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 194260] reward=-120361732.4 actor_loss=0.2925 critic_loss=82318581339.8974 entropy=17.6639 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 194260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496078.7 mean_steps=14.5
|
|
[Episode 194270] reward=-122896218.7 actor_loss=0.2633 critic_loss=80654380236.8000 entropy=17.6705 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 194280] reward=-116313002.8 actor_loss=0.2784 critic_loss=75035778252.8000 entropy=17.6649 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 194280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-603316.0 mean_steps=13.4
|
|
[Episode 194290] reward=-704931854.9 actor_loss=0.3483 critic_loss=995120935460317.8750 entropy=17.6694 approx_kl=0.0003 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 194300] reward=-121711653.2 actor_loss=0.2917 critic_loss=159421474560.0000 entropy=17.6964 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 194300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-514066.6 mean_steps=14.7
|
|
[Episode 194310] reward=-113799944.7 actor_loss=0.2617 critic_loss=113010975630.2222 entropy=17.7133 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 194320] reward=-122752835.5 actor_loss=0.2154 critic_loss=85661368137.9556 entropy=17.7136 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 194320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555988.8 mean_steps=12.6
|
|
[Episode 194330] reward=-118205930.1 actor_loss=0.2455 critic_loss=80411902862.2222 entropy=17.7244 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 194340] reward=-123685072.4 actor_loss=0.2470 critic_loss=82799929223.5294 entropy=17.7311 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 194340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656695.5 mean_steps=12.2
|
|
[Episode 194350] reward=-115915127.8 actor_loss=0.3970 critic_loss=81412822853.8182 entropy=17.7199 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 194360] reward=-124959445.0 actor_loss=0.1486 critic_loss=119054829977.6000 entropy=17.7119 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 194360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424689.7 mean_steps=14.1
|
|
[Episode 194370] reward=-125431751.8 actor_loss=0.2587 critic_loss=186476999111.1111 entropy=17.7406 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 194380] reward=-224397196.4 actor_loss=0.3035 critic_loss=32923926862961.7773 entropy=17.7544 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 194380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532880.9 mean_steps=13.2
|
|
[Episode 194390] reward=-122956791.5 actor_loss=0.2487 critic_loss=88832029127.1111 entropy=17.7502 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 194400] reward=-121676657.0 actor_loss=0.2712 critic_loss=109768774269.1555 entropy=17.7346 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 194400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454536.7 mean_steps=14.5
|
|
[Episode 194410] reward=-123913325.3 actor_loss=0.2706 critic_loss=89536453381.6889 entropy=17.7154 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 194420] reward=-122942566.1 actor_loss=0.2137 critic_loss=80322114901.3333 entropy=17.7116 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 194420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440404.9 mean_steps=14.4
|
|
[Episode 194430] reward=-117865686.0 actor_loss=0.3256 critic_loss=73161049247.2889 entropy=17.7054 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 194440] reward=-115945686.3 actor_loss=0.3208 critic_loss=81342427044.9778 entropy=17.7170 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 194440] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-349812.0 mean_steps=17.9
|
|
[Episode 194450] reward=-121678348.0 actor_loss=0.1457 critic_loss=82514052670.4390 entropy=17.7080 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 194460] reward=-122485497.3 actor_loss=0.3492 critic_loss=156992180387.8400 entropy=17.7044 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 194460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585617.2 mean_steps=12.8
|
|
[Episode 194470] reward=-117345697.4 actor_loss=0.2335 critic_loss=78179591145.2444 entropy=17.7151 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 194480] reward=-122301822.3 actor_loss=0.3639 critic_loss=111380520517.1892 entropy=17.7156 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 194480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-579719.6 mean_steps=13.7
|
|
[Episode 194490] reward=-122782477.0 actor_loss=0.2420 critic_loss=77936384449.5610 entropy=17.6942 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 194500] reward=-117786874.6 actor_loss=0.3288 critic_loss=80551212009.2444 entropy=17.7009 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 194500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545979.4 mean_steps=13.2
|
|
[Episode 194510] reward=-122925605.7 actor_loss=0.3467 critic_loss=180636876253.8667 entropy=17.6913 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 194520] reward=-123802847.0 actor_loss=0.4670 critic_loss=202087123293.6585 entropy=17.6855 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 194520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546183.8 mean_steps=13.2
|
|
[Episode 194530] reward=-122987082.3 actor_loss=0.2429 critic_loss=77215643306.6667 entropy=17.6812 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 194540] reward=-121748800.1 actor_loss=0.2612 critic_loss=80770837665.6842 entropy=17.6653 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 194540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430568.6 mean_steps=14.4
|
|
[Episode 194550] reward=-121582076.5 actor_loss=0.3222 critic_loss=85106217472.0000 entropy=17.6369 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 194560] reward=-121940562.0 actor_loss=0.2476 critic_loss=76019849992.8276 entropy=17.6308 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 194560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447000.3 mean_steps=15.8
|
|
[Episode 194570] reward=-123748932.2 actor_loss=0.3233 critic_loss=126641564973.1765 entropy=17.6338 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 194580] reward=-116449661.9 actor_loss=0.4174 critic_loss=74298625774.9333 entropy=17.6474 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 194580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501229.9 mean_steps=12.8
|
|
[Episode 194590] reward=-117561248.2 actor_loss=0.2431 critic_loss=76550910057.0256 entropy=17.6610 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 194600] reward=-128123688.6 actor_loss=0.2428 critic_loss=99383599616.0000 entropy=17.6693 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 194600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-630676.5 mean_steps=12.3
|
|
[Episode 194610] reward=-123462785.3 actor_loss=0.2493 critic_loss=80882444788.6222 entropy=17.6637 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 194620] reward=-114906652.8 actor_loss=0.4003 critic_loss=75196070934.7556 entropy=17.6765 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 194620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-454108.3 mean_steps=15.4
|
|
[Episode 194630] reward=-120495865.8 actor_loss=0.1895 critic_loss=77212039001.9460 entropy=17.6752 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 194640] reward=-292168658.4 actor_loss=0.2813 critic_loss=52875078379578.5156 entropy=17.6792 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 194640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547033.5 mean_steps=13.2
|
|
[Episode 194650] reward=-137818185.4 actor_loss=0.2048 critic_loss=1147637320908.8000 entropy=17.6675 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 194660] reward=-118883042.7 actor_loss=0.3201 critic_loss=79526365593.6000 entropy=17.6685 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 194660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499757.3 mean_steps=14.1
|
|
[Episode 194670] reward=-123110624.1 actor_loss=0.2092 critic_loss=116903545675.2941 entropy=17.6704 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 194680] reward=-116066176.1 actor_loss=0.3040 critic_loss=75955493637.6889 entropy=17.6719 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 194680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496491.8 mean_steps=14.6
|
|
[Episode 194690] reward=-121590908.1 actor_loss=0.2673 critic_loss=116928921903.4074 entropy=17.6615 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 194700] reward=-126591953.0 actor_loss=0.3397 critic_loss=300601206374.4000 entropy=17.6482 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 194700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-375611.5 mean_steps=16.8
|
|
[Episode 194710] reward=-114391146.5 actor_loss=0.2508 critic_loss=73669999365.6889 entropy=17.6324 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 194720] reward=-117629843.4 actor_loss=0.2656 critic_loss=74749303102.5778 entropy=17.6185 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 194720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549241.2 mean_steps=13.5
|
|
[Episode 194730] reward=-123824360.4 actor_loss=0.2887 critic_loss=80446363693.5111 entropy=17.6166 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 194740] reward=-140184777.2 actor_loss=0.3149 critic_loss=1696334442951.1111 entropy=17.6157 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 194740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471389.9 mean_steps=14.8
|
|
[Episode 194750] reward=-113238458.2 actor_loss=0.3126 critic_loss=69779475296.7111 entropy=17.6410 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 194760] reward=-122163461.5 actor_loss=0.2956 critic_loss=76525551518.4762 entropy=17.6334 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 194760] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-276051.6 mean_steps=17.5
|
|
[Episode 194770] reward=-120430491.3 actor_loss=0.3609 critic_loss=74699214392.8889 entropy=17.6186 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 194780] reward=-120264499.9 actor_loss=0.2259 critic_loss=76997730474.6667 entropy=17.6081 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 194780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-530579.4 mean_steps=15.4
|
|
[Episode 194790] reward=-119574283.8 actor_loss=0.2277 critic_loss=75383587544.1778 entropy=17.6007 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 194800] reward=-120156339.5 actor_loss=0.3334 critic_loss=79288570171.0769 entropy=17.6057 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 194800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-536860.8 mean_steps=12.6
|
|
[Episode 194810] reward=-1376413476.0 actor_loss=0.3141 critic_loss=3209592266747631.0000 entropy=17.5900 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 194820] reward=-117459904.0 actor_loss=0.3721 critic_loss=74282959503.3600 entropy=17.5797 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 194820] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-257914.5 mean_steps=17.9
|
|
[Episode 194830] reward=-120272839.0 actor_loss=0.3059 critic_loss=76320663552.0000 entropy=17.5808 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 194840] reward=-126270066.4 actor_loss=0.3423 critic_loss=862106691426.4615 entropy=17.5896 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 194840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-502049.9 mean_steps=16.0
|
|
[Episode 194850] reward=-375650305.9 actor_loss=0.3487 critic_loss=129967374916461.7188 entropy=17.5984 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 194860] reward=-125315310.7 actor_loss=0.3249 critic_loss=135188253354.6667 entropy=17.5899 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 194860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457367.0 mean_steps=14.6
|
|
[Episode 194870] reward=-31301313686.9 actor_loss=0.3366 critic_loss=1551500797975904512.0000 entropy=17.6076 approx_kl=-0.0003 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 194880] reward=-126378725.8 actor_loss=0.2915 critic_loss=275725638656.0000 entropy=17.6169 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 194880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-428474.8 mean_steps=14.3
|
|
[Episode 194890] reward=-125863723.4 actor_loss=0.2630 critic_loss=257641693184.0000 entropy=17.6248 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 194900] reward=-117584500.6 actor_loss=0.3752 critic_loss=102476339926.7097 entropy=17.6292 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 194900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-424680.6 mean_steps=16.3
|
|
[Episode 194910] reward=-120496530.8 actor_loss=0.2703 critic_loss=77811626569.1429 entropy=17.6476 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 194920] reward=-121648247.9 actor_loss=0.3196 critic_loss=142613132341.8947 entropy=17.6654 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 194920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398577.8 mean_steps=15.8
|
|
[Episode 194930] reward=-124001483.6 actor_loss=0.2455 critic_loss=78513733632.0000 entropy=17.6883 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 194940] reward=-229707433.9 actor_loss=0.2915 critic_loss=37020435375718.3984 entropy=17.6809 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 194940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435861.4 mean_steps=14.6
|
|
[Episode 194950] reward=-200213468.9 actor_loss=0.2126 critic_loss=20894631953112.1797 entropy=17.7069 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 194960] reward=-119799623.6 actor_loss=0.3515 critic_loss=77265708268.3077 entropy=17.7029 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 194960] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-765033.3 mean_steps=10.8
|
|
[Episode 194970] reward=-121857191.3 actor_loss=0.2802 critic_loss=86000830621.5385 entropy=17.6965 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 194980] reward=-122300715.1 actor_loss=0.3223 critic_loss=78493956710.4000 entropy=17.7065 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 194980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-346256.6 mean_steps=16.1
|
|
[Episode 194990] reward=-176727277.4 actor_loss=0.3302 critic_loss=12393202301883.7324 entropy=17.6946 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 195000] reward=-115803159.2 actor_loss=0.3548 critic_loss=72843811596.1905 entropy=17.7133 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 195000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507228.7 mean_steps=13.9
|
|
[Episode 195010] reward=-144322304.7 actor_loss=0.2248 critic_loss=2270895823621.6890 entropy=17.7178 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 195020] reward=-119242811.4 actor_loss=0.2682 critic_loss=76292774465.6410 entropy=17.7255 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 195020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560717.9 mean_steps=13.1
|
|
[Episode 195030] reward=-120995620.9 actor_loss=0.2522 critic_loss=79568194036.6222 entropy=17.7206 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 195040] reward=-112945628.9 actor_loss=0.3264 critic_loss=71506807156.3636 entropy=17.7112 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 195040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513211.2 mean_steps=13.0
|
|
[Episode 195050] reward=-116913277.6 actor_loss=0.2771 critic_loss=75780971087.6444 entropy=17.7096 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 195060] reward=-117059576.2 actor_loss=0.3448 critic_loss=75605641122.9091 entropy=17.7146 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 195060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526405.3 mean_steps=14.0
|
|
[Episode 195070] reward=-116977273.5 actor_loss=0.2907 critic_loss=79189062912.0000 entropy=17.7134 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 195080] reward=-162211205.6 actor_loss=0.1977 critic_loss=6326990656125.1553 entropy=17.7309 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 195080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533720.2 mean_steps=14.7
|
|
[Episode 195090] reward=-316114025.2 actor_loss=0.3753 critic_loss=125124840434346.6719 entropy=17.7436 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 195100] reward=-35258502274.7 actor_loss=1.5431 critic_loss=1939636349606542336.0000 entropy=17.7646 approx_kl=-0.0029 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 195100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459842.6 mean_steps=15.1
|
|
[Episode 195110] reward=-630040055.9 actor_loss=0.2260 critic_loss=734187296391168.0000 entropy=17.7610 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 195120] reward=-119784892.9 actor_loss=0.3271 critic_loss=77317023698.4889 entropy=17.7593 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 195120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434030.5 mean_steps=15.3
|
|
[Episode 195130] reward=-121233453.8 actor_loss=0.3272 critic_loss=79784537292.8000 entropy=17.7511 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 195140] reward=-122912632.1 actor_loss=0.3136 critic_loss=81537786718.3158 entropy=17.7517 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 195140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597966.4 mean_steps=12.5
|
|
[Episode 195150] reward=-118674985.1 actor_loss=0.3128 critic_loss=99366677861.2093 entropy=17.7500 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 195160] reward=-188846785.4 actor_loss=0.5261 critic_loss=21291008174762.6680 entropy=17.7613 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 195160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-411340.1 mean_steps=15.9
|
|
[Episode 195170] reward=-118585511.1 actor_loss=0.2994 critic_loss=78655064200.5333 entropy=17.7767 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 195180] reward=-119392975.2 actor_loss=0.2659 critic_loss=81343622166.7556 entropy=17.7971 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 195180] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-704709.9 mean_steps=11.8
|
|
[Episode 195190] reward=-118374299.1 actor_loss=0.3330 critic_loss=81097704704.0000 entropy=17.7858 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 195200] reward=-375007652.1 actor_loss=0.3379 critic_loss=208831654390078.5625 entropy=17.7802 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 195200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-456405.6 mean_steps=16.6
|
|
[Episode 195210] reward=-7868161389.7 actor_loss=0.2552 critic_loss=72982658858090496.0000 entropy=17.7744 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1009 front_blocked=0
|
|
[Episode 195220] reward=-120464778.2 actor_loss=0.3320 critic_loss=82063376725.3333 entropy=17.7973 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 195220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501084.1 mean_steps=15.1
|
|
[Episode 195230] reward=-119242937.6 actor_loss=0.3403 critic_loss=82823447552.0000 entropy=17.8145 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 195240] reward=-116686506.3 actor_loss=0.5662 critic_loss=78378904893.7931 entropy=17.8054 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 195240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476379.5 mean_steps=14.8
|
|
[Episode 195250] reward=-116174156.0 actor_loss=0.2638 critic_loss=77777153228.8000 entropy=17.8141 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 195260] reward=-117324031.9 actor_loss=0.2889 critic_loss=77892727772.6897 entropy=17.8128 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 195260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512733.0 mean_steps=13.8
|
|
[Episode 195270] reward=-127046796.7 actor_loss=0.3237 critic_loss=408552298023.3846 entropy=17.8442 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 195280] reward=-119296783.2 actor_loss=0.2981 critic_loss=80032321080.8889 entropy=17.8443 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 195280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-453639.8 mean_steps=13.5
|
|
[Episode 195290] reward=-117472534.9 actor_loss=0.3281 critic_loss=75934607049.6970 entropy=17.8287 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 195300] reward=-121074545.8 actor_loss=0.2522 critic_loss=81221862120.7273 entropy=17.8219 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 195300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-405140.3 mean_steps=14.3
|
|
[Episode 195310] reward=-114990011.9 actor_loss=0.2872 critic_loss=74677456010.3784 entropy=17.8344 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 195320] reward=-328952076.5 actor_loss=0.3572 critic_loss=133311516073000.9531 entropy=17.8300 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 195320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580684.7 mean_steps=13.3
|
|
[Episode 195330] reward=-118216664.0 actor_loss=0.2703 critic_loss=79765562800.3556 entropy=17.8280 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 195340] reward=-121187967.2 actor_loss=0.3345 critic_loss=81910094258.4242 entropy=17.8377 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 195340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-354222.1 mean_steps=16.1
|
|
[Episode 195350] reward=-118978501.3 actor_loss=0.3950 critic_loss=75533145047.0400 entropy=17.8435 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 195360] reward=-123019247.1 actor_loss=0.2496 critic_loss=82678956347.0769 entropy=17.8261 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 195360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-618177.4 mean_steps=13.7
|
|
[Episode 195370] reward=-120345389.8 actor_loss=0.3375 critic_loss=80277589280.8205 entropy=17.8171 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 195380] reward=-112722826.5 actor_loss=0.3478 critic_loss=71865733120.0000 entropy=17.7876 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 195380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483182.7 mean_steps=13.6
|
|
[Episode 195390] reward=-118880256.9 actor_loss=0.3047 critic_loss=78187222630.4000 entropy=17.7674 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 195400] reward=-116240662.9 actor_loss=0.2869 critic_loss=79647318835.2000 entropy=17.7544 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 195400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556734.7 mean_steps=13.6
|
|
[Episode 195410] reward=-121911872.3 actor_loss=0.3172 critic_loss=83705244796.8781 entropy=17.7474 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 195420] reward=-120256108.9 actor_loss=0.3143 critic_loss=76409176974.2222 entropy=17.7414 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 195420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-510682.5 mean_steps=16.1
|
|
[Episode 195430] reward=-118304205.3 actor_loss=0.2109 critic_loss=82407720277.3333 entropy=17.7379 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 195440] reward=-119097305.2 actor_loss=0.2452 critic_loss=75585892352.0000 entropy=17.7385 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 195440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502286.5 mean_steps=15.0
|
|
[Episode 195450] reward=-121666827.0 actor_loss=0.2684 critic_loss=78631004754.5806 entropy=17.7311 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 195460] reward=-117343012.1 actor_loss=0.3922 critic_loss=74480554276.5714 entropy=17.7217 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 195460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450214.2 mean_steps=15.7
|
|
[Episode 195470] reward=-119590088.9 actor_loss=0.2915 critic_loss=77472577598.0606 entropy=17.7242 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 195480] reward=-119852313.5 actor_loss=0.2912 critic_loss=75995447569.0667 entropy=17.7168 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 195480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479776.9 mean_steps=15.8
|
|
[Episode 195490] reward=-119918887.4 actor_loss=0.3568 critic_loss=77080830589.1555 entropy=17.6952 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 195500] reward=-117660004.6 actor_loss=0.3290 critic_loss=77543856640.0000 entropy=17.6933 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 195500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493809.9 mean_steps=15.0
|
|
[Episode 195510] reward=-118260145.4 actor_loss=0.3891 critic_loss=76738245339.4286 entropy=17.6748 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 195520] reward=-119228216.4 actor_loss=0.3432 critic_loss=75107654839.7949 entropy=17.6791 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 195520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-354716.4 mean_steps=17.1
|
|
[Episode 195530] reward=-122070349.3 actor_loss=0.3656 critic_loss=78046815144.2286 entropy=17.7004 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 195540] reward=-115617831.0 actor_loss=0.3622 critic_loss=75654625024.0000 entropy=17.6877 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 195540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-438563.8 mean_steps=13.4
|
|
[Episode 195550] reward=-116247951.4 actor_loss=0.2519 critic_loss=134481070398.5778 entropy=17.7013 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 195560] reward=-5611163700.0 actor_loss=0.3103 critic_loss=51282333383081416.0000 entropy=17.6992 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 195560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509125.6 mean_steps=14.2
|
|
[Episode 195570] reward=-25750146370.3 actor_loss=0.0582 critic_loss=229639636799396992.0000 entropy=17.7281 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 195580] reward=-137004127.4 actor_loss=0.2821 critic_loss=1269173768564.3635 entropy=17.7356 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 195580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613928.0 mean_steps=12.8
|
|
[Episode 195590] reward=-122538828.9 actor_loss=0.3058 critic_loss=78837959475.2000 entropy=17.7448 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 195600] reward=-17312392816.5 actor_loss=0.7634 critic_loss=242457440390380000.0000 entropy=17.7404 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 195600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-354836.1 mean_steps=17.2
|
|
[Episode 195610] reward=-123008282.8 actor_loss=0.3045 critic_loss=84773811100.9032 entropy=17.7354 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 195620] reward=-123778062.5 actor_loss=0.2843 critic_loss=83022171260.8781 entropy=17.7423 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 195620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481956.7 mean_steps=14.8
|
|
[Episode 195630] reward=-6506655168.1 actor_loss=1.3954 critic_loss=65245041180733168.0000 entropy=17.7390 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 195640] reward=-56350559327.9 actor_loss=0.1227 critic_loss=1939974742635184128.0000 entropy=17.7391 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 195640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-468057.8 mean_steps=15.4
|
|
[Episode 195650] reward=-117862533.3 actor_loss=0.1689 critic_loss=77005270220.8000 entropy=17.7513 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 195660] reward=-120096221.0 actor_loss=0.1858 critic_loss=81617275210.3226 entropy=17.7471 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 195660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-676975.7 mean_steps=12.2
|
|
[Episode 195670] reward=-121540517.7 actor_loss=0.2466 critic_loss=75880938259.6923 entropy=17.7475 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 195680] reward=-122152813.8 actor_loss=0.3509 critic_loss=82351413195.4872 entropy=17.7468 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 195680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-393307.0 mean_steps=14.9
|
|
[Episode 195690] reward=-126689356.4 actor_loss=0.2530 critic_loss=85147044051.8621 entropy=17.7576 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 195700] reward=-117868638.6 actor_loss=0.3676 critic_loss=74639892297.9556 entropy=17.7376 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 195700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520620.6 mean_steps=14.2
|
|
[Episode 195710] reward=-119937894.2 actor_loss=0.3401 critic_loss=78574655715.5556 entropy=17.7398 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 195720] reward=-118805303.5 actor_loss=0.2329 critic_loss=80868151387.0222 entropy=17.7126 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 195720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447729.9 mean_steps=15.3
|
|
[Episode 195730] reward=-125100995.6 actor_loss=0.2147 critic_loss=86488445019.0222 entropy=17.6899 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 195740] reward=-122086049.5 actor_loss=0.1860 critic_loss=77654204598.0444 entropy=17.6809 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 195740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-648391.1 mean_steps=12.3
|
|
[Episode 195750] reward=-106941515500.4 actor_loss=16.8730 critic_loss=5222067104270110720.0000 entropy=17.6828 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Episode 195760] reward=-121752954.3 actor_loss=0.3256 critic_loss=82998659572.6222 entropy=17.6904 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 195760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500924.4 mean_steps=13.7
|
|
[Episode 195770] reward=-22212301885.7 actor_loss=0.2354 critic_loss=777253482830577152.0000 entropy=17.6908 approx_kl=0.0036 kl_stop=1 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 195780] reward=-118819544.0 actor_loss=0.3078 critic_loss=77112223334.4000 entropy=17.6946 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 195780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-377110.8 mean_steps=14.8
|
|
[Episode 195790] reward=-124903216.8 actor_loss=0.2940 critic_loss=84573320657.4545 entropy=17.7000 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 195800] reward=-117513313.8 actor_loss=0.2925 critic_loss=75475110400.0000 entropy=17.6948 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 195800] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-239013.8 mean_steps=18.6
|
|
[Episode 195810] reward=-57800705305.6 actor_loss=0.0299 critic_loss=1033438354837165440.0000 entropy=17.6898 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 195820] reward=-53563479257.5 actor_loss=1.4751 critic_loss=1039619679405065984.0000 entropy=17.6914 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 195820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500377.2 mean_steps=14.9
|
|
[Episode 195830] reward=-128759547.8 actor_loss=0.3234 critic_loss=187126196724.6222 entropy=17.6876 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 195840] reward=-120424709.6 actor_loss=0.2805 critic_loss=79270142888.2286 entropy=17.6895 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 195840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518048.7 mean_steps=12.8
|
|
[Episode 195850] reward=-120281943.9 actor_loss=0.3290 critic_loss=78048764632.1778 entropy=17.6909 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 195860] reward=-35714517447.3 actor_loss=0.1513 critic_loss=594717782105391104.0000 entropy=17.6952 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 195860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491256.9 mean_steps=13.2
|
|
[Episode 195870] reward=-116637779.3 actor_loss=0.3045 critic_loss=78336777495.2727 entropy=17.6961 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 195880] reward=-119427562.1 actor_loss=0.2952 critic_loss=78206930163.8095 entropy=17.6947 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 195880] success_rate=0.450 qp_infeasible_rate=0.500 mean_return=-8661207705.4 mean_steps=174.8
|
|
[Episode 195890] reward=-58457283348.8 actor_loss=3.6972 critic_loss=1643146038298242560.0000 entropy=17.7352 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Episode 195900] reward=-82744938465.3 actor_loss=0.5017 critic_loss=1708479884213470976.0000 entropy=17.7373 approx_kl=0.0001 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Eval 195900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516406.2 mean_steps=14.1
|
|
[Episode 195910] reward=-76462751458.7 actor_loss=-0.1408 critic_loss=1171611837706572288.0000 entropy=17.7417 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0059 front_blocked=0
|
|
[Episode 195920] reward=-123128043.7 actor_loss=0.1409 critic_loss=82404721270.1538 entropy=17.7439 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 195920] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-335882.1 mean_steps=16.9
|
|
[Episode 195930] reward=-15670636778.1 actor_loss=0.1392 critic_loss=123735048895675152.0000 entropy=17.7361 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 195940] reward=-47382744266.6 actor_loss=-0.0032 critic_loss=784811904190958848.0000 entropy=17.7359 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 195940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-398378.8 mean_steps=14.2
|
|
[Episode 195950] reward=-72298998827.4 actor_loss=-0.0945 critic_loss=1659359156429593600.0000 entropy=17.7435 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0228 front_blocked=0
|
|
[Episode 195960] reward=-120740438.0 actor_loss=0.3536 critic_loss=83099482269.5385 entropy=17.7601 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 195960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448764.2 mean_steps=15.1
|
|
[Episode 195970] reward=-18198407703.7 actor_loss=0.4281 critic_loss=547631494767143296.0000 entropy=17.7646 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1035 front_blocked=0
|
|
[Episode 195980] reward=-25242558356.1 actor_loss=30.9525 critic_loss=564594410499604480.0000 entropy=17.7672 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 195980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519586.5 mean_steps=13.1
|
|
[Episode 195990] reward=-119097164.6 actor_loss=0.1853 critic_loss=85182691886.5455 entropy=17.7751 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 196000] reward=-23554932305.7 actor_loss=0.5852 critic_loss=445746542513907328.0000 entropy=17.7954 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 196000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488632.5 mean_steps=14.8
|
|
[Episode 196010] reward=-4655557908.3 actor_loss=0.2887 critic_loss=51801378173193784.0000 entropy=17.7931 approx_kl=-0.0005 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 196020] reward=-119274683.6 actor_loss=0.2892 critic_loss=83350744142.7692 entropy=17.7965 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 196020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-411066.8 mean_steps=15.5
|
|
[Episode 196030] reward=-9492567209.6 actor_loss=0.3196 critic_loss=48382513540517576.0000 entropy=17.8026 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 196040] reward=-169269717.9 actor_loss=0.3288 critic_loss=8309813991469.5107 entropy=17.7836 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 196040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419293.7 mean_steps=15.2
|
|
[Episode 196050] reward=-33363862265.9 actor_loss=6.6809 critic_loss=880220029008647168.0000 entropy=17.7751 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 196060] reward=-21494673184.9 actor_loss=0.2102 critic_loss=773693753325600384.0000 entropy=17.7842 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 196060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453020.2 mean_steps=15.2
|
|
[Episode 196070] reward=-13047169902.4 actor_loss=0.1494 critic_loss=104061204400649008.0000 entropy=17.7815 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 196080] reward=-35830371089.5 actor_loss=4.3099 critic_loss=567646543109160960.0000 entropy=17.7846 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0397 front_blocked=0
|
|
[Eval 196080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580811.2 mean_steps=13.2
|
|
[Episode 196090] reward=-29635870787.7 actor_loss=2.5126 critic_loss=217812180865926208.0000 entropy=17.7957 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Episode 196100] reward=-25916513886.6 actor_loss=1.2511 critic_loss=737198434286520832.0000 entropy=17.8148 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 196100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385674.5 mean_steps=15.6
|
|
[Episode 196110] reward=-39057952929.4 actor_loss=0.9203 critic_loss=718404519446882944.0000 entropy=17.8114 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 196120] reward=-44363017849.1 actor_loss=11.0587 critic_loss=1374790456641728256.0000 entropy=17.8224 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 196120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536831.5 mean_steps=13.3
|
|
[Episode 196130] reward=-42193630200.8 actor_loss=-0.0057 critic_loss=685256336950165504.0000 entropy=17.8282 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 196140] reward=-30749967638.9 actor_loss=0.0720 critic_loss=624560640807141376.0000 entropy=17.8271 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 196140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-388537.0 mean_steps=15.8
|
|
[Episode 196150] reward=-15292772735.7 actor_loss=52.7402 critic_loss=297568637975801216.0000 entropy=17.8382 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Episode 196160] reward=-123600453.7 actor_loss=0.2332 critic_loss=87018501120.0000 entropy=17.8423 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 196160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398760.3 mean_steps=15.7
|
|
[Episode 196170] reward=-19866236190.2 actor_loss=0.1893 critic_loss=647683948564582016.0000 entropy=17.8581 approx_kl=-0.0024 kl_stop=1 intervention_rate=0.1055 front_blocked=0
|
|
[Episode 196180] reward=-545391236.2 actor_loss=0.3887 critic_loss=564914359614395.7500 entropy=17.8738 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 196180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420117.3 mean_steps=15.1
|
|
[Episode 196190] reward=-10660065028.6 actor_loss=0.2128 critic_loss=211754248417790464.0000 entropy=17.8932 approx_kl=-0.0024 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 196200] reward=-18410480213.6 actor_loss=0.1681 critic_loss=284691464581083392.0000 entropy=17.8958 approx_kl=0.0123 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 196200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-346494.7 mean_steps=16.4
|
|
[Episode 196210] reward=-16717684789.0 actor_loss=0.8602 critic_loss=403292031179502400.0000 entropy=17.9206 approx_kl=-0.0011 kl_stop=0 intervention_rate=0.1074 front_blocked=0
|
|
[Episode 196220] reward=-127674780.6 actor_loss=0.2778 critic_loss=88975981226.6667 entropy=17.9487 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 196220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578822.7 mean_steps=13.8
|
|
[Episode 196230] reward=-231694394.4 actor_loss=0.3267 critic_loss=56554606588359.1094 entropy=17.9720 approx_kl=0.0005 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 196240] reward=-15555581270.6 actor_loss=0.5887 critic_loss=221418350512771360.0000 entropy=17.9883 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0983 front_blocked=0
|
|
[Eval 196240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382532.4 mean_steps=15.1
|
|
[Episode 196250] reward=-123589156.3 actor_loss=0.3055 critic_loss=89107756517.0526 entropy=18.0147 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 196260] reward=-125912260.6 actor_loss=0.2910 critic_loss=91624950052.5714 entropy=18.0421 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 196260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575269.3 mean_steps=13.2
|
|
[Episode 196270] reward=-796144474.5 actor_loss=0.3002 critic_loss=975225094493525.3750 entropy=18.0520 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 196280] reward=-680953707.3 actor_loss=0.2812 critic_loss=896184192452380.5000 entropy=18.0729 approx_kl=-0.0035 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 196280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-595316.2 mean_steps=13.1
|
|
[Episode 196290] reward=-8097045144.3 actor_loss=0.1822 critic_loss=117404308670781120.0000 entropy=18.0722 approx_kl=0.0010 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Episode 196300] reward=-28012086079.3 actor_loss=7.5096 critic_loss=309162876230662016.0000 entropy=18.0872 approx_kl=0.0068 kl_stop=0 intervention_rate=0.0521 front_blocked=0
|
|
[Eval 196300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465524062.2 mean_steps=22.9
|
|
[Episode 196310] reward=-2459119798.4 actor_loss=0.2412 critic_loss=9448445523554760.0000 entropy=18.1033 approx_kl=0.0013 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Episode 196320] reward=-133100609.3 actor_loss=0.2412 critic_loss=495940928944.3555 entropy=18.1176 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 196320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561534.2 mean_steps=14.3
|
|
[Episode 196330] reward=-22889396409.6 actor_loss=0.1512 critic_loss=495422118988448448.0000 entropy=18.1358 approx_kl=-0.0008 kl_stop=0 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 196340] reward=-35253951354.4 actor_loss=0.0890 critic_loss=845474974426190464.0000 entropy=18.1496 approx_kl=-0.0013 kl_stop=0 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 196340] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-284098.8 mean_steps=16.7
|
|
[Episode 196350] reward=-120852410.0 actor_loss=0.3715 critic_loss=103680471950.2222 entropy=18.1549 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 196360] reward=-11908508166.8 actor_loss=0.1721 critic_loss=259194467358780064.0000 entropy=18.1384 approx_kl=-0.0028 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 196360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-678665567.2 mean_steps=24.7
|
|
[Episode 196370] reward=-18638622394.8 actor_loss=0.2405 critic_loss=388060285478417920.0000 entropy=18.1419 approx_kl=-0.0007 kl_stop=0 intervention_rate=0.1068 front_blocked=0
|
|
[Episode 196380] reward=-3592779876.0 actor_loss=0.1686 critic_loss=26484075305684172.0000 entropy=18.1502 approx_kl=0.0008 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Eval 196380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-673373679.9 mean_steps=25.3
|
|
[Episode 196390] reward=-6340154761.4 actor_loss=0.2285 critic_loss=78812539930437216.0000 entropy=18.1490 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 196400] reward=-164250763.9 actor_loss=0.2961 critic_loss=8323293039820.7998 entropy=18.1657 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 196400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612648351.7 mean_steps=20.5
|
|
[Episode 196410] reward=-123137133.9 actor_loss=0.2927 critic_loss=88858292930.2069 entropy=18.1946 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 196420] reward=-5505081968.9 actor_loss=0.1679 critic_loss=32629955126695748.0000 entropy=18.2165 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 196420] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-244562.8 mean_steps=18.9
|
|
[Episode 196430] reward=-123681323.4 actor_loss=0.3384 critic_loss=83291849124.1026 entropy=18.2311 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 196440] reward=-8983245259.2 actor_loss=0.2479 critic_loss=154520072386155488.0000 entropy=18.2341 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 196440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-532390.2 mean_steps=15.2
|
|
[Episode 196450] reward=-124872240.1 actor_loss=0.2429 critic_loss=89742801826.9091 entropy=18.2302 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 196460] reward=-123406938.7 actor_loss=0.2214 critic_loss=90623506204.4444 entropy=18.2477 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 196460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-514449.0 mean_steps=12.2
|
|
[Episode 196470] reward=-125254647.1 actor_loss=0.3074 critic_loss=92084367616.0000 entropy=18.2392 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 196480] reward=-126748861.9 actor_loss=0.2412 critic_loss=90466478840.6857 entropy=18.2543 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 196480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-496300.5 mean_steps=12.9
|
|
[Episode 196490] reward=-128491594.3 actor_loss=0.1837 critic_loss=129115145830.4000 entropy=18.2587 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 196500] reward=-126550572.1 actor_loss=0.2422 critic_loss=90881970491.0769 entropy=18.2383 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 196500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-605909.7 mean_steps=13.7
|
|
[Episode 196510] reward=-124556415.8 actor_loss=0.3121 critic_loss=88070702039.0400 entropy=18.2404 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 196520] reward=-122505604.2 actor_loss=0.2905 critic_loss=85950897629.8667 entropy=18.2352 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 196520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-631412.8 mean_steps=12.9
|
|
[Episode 196530] reward=-129215155.8 actor_loss=0.1877 critic_loss=91268902274.8445 entropy=18.2454 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 196540] reward=-122323018.3 actor_loss=0.3514 critic_loss=105376752463.4483 entropy=18.2409 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 196540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574875.5 mean_steps=13.3
|
|
[Episode 196550] reward=-126396006.7 actor_loss=0.1725 critic_loss=87959132280.4706 entropy=18.2384 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 196560] reward=-123562016.2 actor_loss=0.2860 critic_loss=84034810600.7273 entropy=18.2390 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 196560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-665496.3 mean_steps=12.5
|
|
[Episode 196570] reward=-11639891652.4 actor_loss=0.3414 critic_loss=232059156422203072.0000 entropy=18.2586 approx_kl=-0.0025 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 196580] reward=-20235344253.0 actor_loss=0.1786 critic_loss=314263997816649984.0000 entropy=18.2745 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1055 front_blocked=0
|
|
[Eval 196580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421634.3 mean_steps=16.2
|
|
[Episode 196590] reward=-26833369502.0 actor_loss=0.4223 critic_loss=439403023745431040.0000 entropy=18.2942 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 196600] reward=-32377830464.3 actor_loss=0.1689 critic_loss=464993138748617344.0000 entropy=18.2952 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 196600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-431016.7 mean_steps=13.6
|
|
[Episode 196610] reward=-5932237746.2 actor_loss=0.2572 critic_loss=91226767519383552.0000 entropy=18.3116 approx_kl=-0.0034 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 196620] reward=-19318159479.1 actor_loss=0.1784 critic_loss=377878417631294272.0000 entropy=18.3196 approx_kl=0.0001 kl_stop=0 intervention_rate=0.1042 front_blocked=0
|
|
[Eval 196620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-419992.4 mean_steps=16.4
|
|
[Episode 196630] reward=-21022093775.1 actor_loss=0.2606 critic_loss=291511163848397824.0000 entropy=18.3235 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1029 front_blocked=0
|
|
[Episode 196640] reward=-2663464046.6 actor_loss=0.2065 critic_loss=15696411651083104.0000 entropy=18.3458 approx_kl=-0.0012 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 196640] success_rate=0.200 qp_infeasible_rate=0.750 mean_return=-10443159487.1 mean_steps=171.6
|
|
[Episode 196650] reward=-47701521030.9 actor_loss=0.1147 critic_loss=907683377612678144.0000 entropy=18.3509 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 196660] reward=-128994148.6 actor_loss=0.3161 critic_loss=94190878720.0000 entropy=18.3559 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 196660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402139.6 mean_steps=16.0
|
|
[Episode 196670] reward=-19200008583.2 actor_loss=0.3661 critic_loss=335730937283448384.0000 entropy=18.3530 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0990 front_blocked=0
|
|
[Episode 196680] reward=-204772075.6 actor_loss=1.7470 critic_loss=14705330587160.3809 entropy=18.3588 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 196680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416121.4 mean_steps=13.9
|
|
[Episode 196690] reward=-51585088064.8 actor_loss=-0.0225 critic_loss=905116319594148992.0000 entropy=18.3672 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 196700] reward=-7643578572.7 actor_loss=0.2975 critic_loss=111807010319816752.0000 entropy=18.3660 approx_kl=-0.0005 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 196700] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-688477.4 mean_steps=12.1
|
|
[Episode 196710] reward=-43039200072.5 actor_loss=0.0113 critic_loss=735606957634142592.0000 entropy=18.3768 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0592 front_blocked=0
|
|
[Episode 196720] reward=-16804659561.2 actor_loss=0.2793 critic_loss=256372390186579104.0000 entropy=18.3989 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Eval 196720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529238.6 mean_steps=14.1
|
|
[Episode 196730] reward=-30260499076.8 actor_loss=48.3937 critic_loss=469544631028853440.0000 entropy=18.4043 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 196740] reward=-32338769960.2 actor_loss=0.0929 critic_loss=592564040785484544.0000 entropy=18.4035 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 196740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526942.8 mean_steps=13.2
|
|
[Episode 196750] reward=-124650974.5 actor_loss=0.1750 critic_loss=94111759837.8667 entropy=18.4220 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 196760] reward=-129626574.4 actor_loss=0.3129 critic_loss=96864714387.9111 entropy=18.4157 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 196760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-343611.0 mean_steps=16.4
|
|
[Episode 196770] reward=-129691383.7 actor_loss=0.3229 critic_loss=101446976762.3111 entropy=18.4327 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 196780] reward=-3345392534.4 actor_loss=0.2247 critic_loss=33319211053199452.0000 entropy=18.4298 approx_kl=-0.0016 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 196780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567231.6 mean_steps=13.1
|
|
[Episode 196790] reward=-127254674.1 actor_loss=0.2773 critic_loss=91510848989.8667 entropy=18.4139 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 196800] reward=-129808361.8 actor_loss=0.1837 critic_loss=100576348547.4595 entropy=18.4143 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 196800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508433.3 mean_steps=13.0
|
|
[Episode 196810] reward=-131019211.6 actor_loss=0.1823 critic_loss=101482023048.5333 entropy=18.4142 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 196820] reward=-129005857.7 actor_loss=0.2783 critic_loss=96800741057.4222 entropy=18.4084 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 196820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602948.2 mean_steps=12.7
|
|
[Episode 196830] reward=-122619273.1 actor_loss=0.3818 critic_loss=92892856816.4848 entropy=18.3957 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 196840] reward=-126057304.2 actor_loss=0.3723 critic_loss=92602215628.8000 entropy=18.3738 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 196840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-706435.7 mean_steps=12.3
|
|
[Episode 196850] reward=-130948421.4 actor_loss=0.3193 critic_loss=96135110656.0000 entropy=18.3696 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 196860] reward=-128074708.0 actor_loss=0.3311 critic_loss=149502024089.6000 entropy=18.3729 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 196860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-584627.5 mean_steps=14.2
|
|
[Episode 196870] reward=-125362869.6 actor_loss=0.2966 critic_loss=94995192581.6889 entropy=18.3895 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 196880] reward=-122831926.5 actor_loss=0.2867 critic_loss=83896345134.5455 entropy=18.3810 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 196880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-383496.2 mean_steps=14.9
|
|
[Episode 196890] reward=-128744358.1 actor_loss=0.2101 critic_loss=137851079884.8000 entropy=18.3617 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 196900] reward=-124631594.8 actor_loss=0.3020 critic_loss=91284710617.2121 entropy=18.3487 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 196900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-604238.8 mean_steps=11.7
|
|
[Episode 196910] reward=-126480881.3 actor_loss=0.2158 critic_loss=177106691646.4390 entropy=18.3503 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 196920] reward=-123938156.3 actor_loss=0.2040 critic_loss=89225165209.6000 entropy=18.3430 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 196920] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-335565.8 mean_steps=16.3
|
|
[Episode 196930] reward=-125469981.5 actor_loss=0.2174 critic_loss=92266011761.7778 entropy=18.3111 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 196940] reward=-129119539.6 actor_loss=0.3524 critic_loss=351416506122.2400 entropy=18.2997 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 196940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519351.7 mean_steps=13.9
|
|
[Episode 196950] reward=-130495787.5 actor_loss=0.3034 critic_loss=93517669262.2222 entropy=18.2914 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 196960] reward=-125131326.7 actor_loss=0.2557 critic_loss=86464093297.7778 entropy=18.2764 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 196960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519640.6 mean_steps=15.0
|
|
[Episode 196970] reward=-128016286.8 actor_loss=0.2386 critic_loss=88270766808.1778 entropy=18.2650 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 196980] reward=-128524904.4 actor_loss=0.2948 critic_loss=246319719468.5217 entropy=18.2472 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 196980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-387986.4 mean_steps=15.8
|
|
[Episode 196990] reward=-126707964.4 actor_loss=0.1762 critic_loss=89189689051.4286 entropy=18.2331 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 197000] reward=-124231791.1 actor_loss=0.2714 critic_loss=92639014456.8889 entropy=18.2261 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 197000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561548.9 mean_steps=14.4
|
|
[Episode 197010] reward=-129779106.9 actor_loss=0.3456 critic_loss=91861998498.9091 entropy=18.2135 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 197020] reward=-6126496483.8 actor_loss=0.3010 critic_loss=74455231184135776.0000 entropy=18.2468 approx_kl=0.0013 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 197020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530539.2 mean_steps=14.2
|
|
[Episode 197030] reward=-130832478.4 actor_loss=0.2832 critic_loss=107810140672.0000 entropy=18.2741 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 197040] reward=-130260518.7 actor_loss=0.3216 critic_loss=97536125466.9474 entropy=18.2616 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 197040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532974.8 mean_steps=13.0
|
|
[Episode 197050] reward=-129084674.6 actor_loss=0.2190 critic_loss=96725894065.2308 entropy=18.2773 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 197060] reward=-123368146.5 actor_loss=0.3105 critic_loss=91374576844.8000 entropy=18.2870 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 197060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524146.2 mean_steps=13.2
|
|
[Episode 197070] reward=-125189330.6 actor_loss=0.2520 critic_loss=87049162752.0000 entropy=18.3014 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 197080] reward=-125293707.7 actor_loss=0.2125 critic_loss=86729473319.8222 entropy=18.2896 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 197080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551013.7 mean_steps=13.3
|
|
[Episode 197090] reward=-126714183.8 actor_loss=0.1512 critic_loss=93573912234.6667 entropy=18.2826 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 197100] reward=-127290580.2 actor_loss=0.3149 critic_loss=110046966272.0000 entropy=18.2779 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 197100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479153.2 mean_steps=15.5
|
|
[Episode 197110] reward=-126252501.2 actor_loss=0.2194 critic_loss=91656814095.5152 entropy=18.2822 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 197120] reward=-127958291.6 actor_loss=0.3004 critic_loss=134417913355.3778 entropy=18.2751 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 197120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-577765.0 mean_steps=15.1
|
|
[Episode 197130] reward=-124300984.7 actor_loss=0.3107 critic_loss=95457937908.6222 entropy=18.2762 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 197140] reward=-126616368.9 actor_loss=0.2940 critic_loss=95695802459.0222 entropy=18.2705 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 197140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392827.7 mean_steps=15.8
|
|
[Episode 197150] reward=-124551673.6 actor_loss=0.3159 critic_loss=86339695781.1613 entropy=18.2588 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 197160] reward=-166984705.4 actor_loss=0.3600 critic_loss=6507256272941.5107 entropy=18.2445 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 197160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-696350.6 mean_steps=12.1
|
|
[Episode 197170] reward=-124098048.6 actor_loss=0.2228 critic_loss=87604087466.6667 entropy=18.2347 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 197180] reward=-124617002.5 actor_loss=0.2440 critic_loss=87214406223.6444 entropy=18.2420 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 197180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410895.2 mean_steps=15.2
|
|
[Episode 197190] reward=-124306157.9 actor_loss=0.2745 critic_loss=92769157848.1778 entropy=18.2482 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 197200] reward=-124839021.3 actor_loss=0.2995 critic_loss=91271946522.4828 entropy=18.2374 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 197200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-406193.0 mean_steps=15.3
|
|
[Episode 197210] reward=-123748996.7 actor_loss=0.3081 critic_loss=92854707313.7778 entropy=18.2208 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 197220] reward=-124413089.7 actor_loss=0.2766 critic_loss=125847905726.3590 entropy=18.2034 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 197220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612358.3 mean_steps=12.8
|
|
[Episode 197230] reward=-127952242.7 actor_loss=0.3242 critic_loss=91851553035.1304 entropy=18.2113 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 197240] reward=-121312984.1 actor_loss=0.3696 critic_loss=129568594875.7333 entropy=18.1944 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 197240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-616269.4 mean_steps=13.7
|
|
[Episode 197250] reward=-127029895.4 actor_loss=0.3545 critic_loss=88576369225.1429 entropy=18.1857 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 197260] reward=-122131565.6 actor_loss=0.3177 critic_loss=88127932916.6222 entropy=18.1629 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 197260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560525.9 mean_steps=13.5
|
|
[Episode 197270] reward=-123519972.2 actor_loss=0.3218 critic_loss=81854527169.4222 entropy=18.1506 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 197280] reward=-126867550.9 actor_loss=0.1892 critic_loss=95841847588.5714 entropy=18.1296 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 197280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-272815.4 mean_steps=15.8
|
|
[Episode 197290] reward=-1516919119.5 actor_loss=0.2876 critic_loss=4791087238729533.0000 entropy=18.1102 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 197300] reward=-125821070.2 actor_loss=0.2763 critic_loss=89792407763.8621 entropy=18.1254 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 197300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465390.5 mean_steps=14.6
|
|
[Episode 197310] reward=-120095783.1 actor_loss=0.2859 critic_loss=82895586372.2667 entropy=18.1093 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 197320] reward=-124733326.7 actor_loss=0.3775 critic_loss=84804531905.4222 entropy=18.1069 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 197320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540456.5 mean_steps=12.9
|
|
[Episode 197330] reward=-128756768.5 actor_loss=0.2987 critic_loss=89711951689.9556 entropy=18.0952 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 197340] reward=-126803382.7 actor_loss=0.3269 critic_loss=89100534837.8947 entropy=18.0757 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 197340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489391.1 mean_steps=13.8
|
|
[Episode 197350] reward=-123618488.5 actor_loss=0.3447 critic_loss=89418859824.4324 entropy=18.0623 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 197360] reward=-122733495.6 actor_loss=0.2191 critic_loss=90263281117.8667 entropy=18.0618 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 197360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482039.4 mean_steps=14.7
|
|
[Episode 197370] reward=-123239575.7 actor_loss=0.2348 critic_loss=80193331564.0889 entropy=18.0814 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 197380] reward=-121282796.3 actor_loss=0.3179 critic_loss=84528160768.0000 entropy=18.0730 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 197380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580681.8 mean_steps=12.6
|
|
[Episode 197390] reward=-123192122.5 actor_loss=0.2988 critic_loss=91070778208.7111 entropy=18.0661 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 197400] reward=-122194471.5 actor_loss=0.3943 critic_loss=84790409807.6444 entropy=18.0482 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 197400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524946.9 mean_steps=14.0
|
|
[Episode 197410] reward=-121768540.9 actor_loss=0.3080 critic_loss=80271253248.0000 entropy=18.0221 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 197420] reward=-125799084.7 actor_loss=0.3388 critic_loss=87473290899.9111 entropy=17.9909 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 197420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423333.1 mean_steps=16.2
|
|
[Episode 197430] reward=-115014327.4 actor_loss=0.3648 critic_loss=82295573890.8445 entropy=17.9882 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 197440] reward=-124954658.8 actor_loss=0.3146 critic_loss=93029045341.0909 entropy=17.9722 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 197440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-506267.6 mean_steps=15.1
|
|
[Episode 197450] reward=-124747707.0 actor_loss=0.3034 critic_loss=91291719725.5111 entropy=17.9621 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 197460] reward=-122824550.1 actor_loss=0.3853 critic_loss=86733565756.9524 entropy=17.9654 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 197460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492073.8 mean_steps=14.7
|
|
[Episode 197470] reward=-123121661.9 actor_loss=0.2743 critic_loss=84997483178.6667 entropy=17.9544 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 197480] reward=-126538299.5 actor_loss=0.2814 critic_loss=88635920501.0286 entropy=17.9481 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 197480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409472.0 mean_steps=15.3
|
|
[Episode 197490] reward=-128445729.1 actor_loss=0.2210 critic_loss=88522142697.2444 entropy=17.9386 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 197500] reward=-120697038.8 actor_loss=0.2673 critic_loss=84211369392.3556 entropy=17.9349 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 197500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451441.7 mean_steps=14.7
|
|
[Episode 197510] reward=-120714813.8 actor_loss=0.3121 critic_loss=82209321164.8000 entropy=17.9386 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 197520] reward=-125330001.0 actor_loss=0.2681 critic_loss=85257079603.2000 entropy=17.9368 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 197520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434438.0 mean_steps=15.6
|
|
[Episode 197530] reward=-123553677.6 actor_loss=0.3104 critic_loss=87556577325.5111 entropy=17.9160 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 197540] reward=-117859180.8 actor_loss=0.5384 critic_loss=82601327547.7333 entropy=17.9075 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 197540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-546945.4 mean_steps=12.4
|
|
[Episode 197550] reward=-122023473.8 actor_loss=0.3141 critic_loss=77929400052.8696 entropy=17.8853 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 197560] reward=-118719454.8 actor_loss=0.2845 critic_loss=80548787293.0909 entropy=17.8890 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 197560] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-345149.2 mean_steps=16.6
|
|
[Episode 197570] reward=-121277601.6 actor_loss=0.3260 critic_loss=81692310945.1852 entropy=17.8788 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 197580] reward=-122907069.1 actor_loss=0.3058 critic_loss=81094350555.4286 entropy=17.8687 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 197580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592640.7 mean_steps=12.9
|
|
[Episode 197590] reward=-125432246.1 actor_loss=0.3306 critic_loss=86549786168.8889 entropy=17.8809 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 197600] reward=-129637702.2 actor_loss=0.3704 critic_loss=87298041532.6316 entropy=17.8789 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 197600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506622.3 mean_steps=14.1
|
|
[Episode 197610] reward=-125489656.9 actor_loss=0.2854 critic_loss=85316964443.0222 entropy=17.8789 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 197620] reward=-120830137.1 actor_loss=0.2821 critic_loss=80104218806.0444 entropy=17.8609 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 197620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536462.4 mean_steps=13.8
|
|
[Episode 197630] reward=-121178001.7 actor_loss=0.3593 critic_loss=81643842218.6667 entropy=17.8661 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 197640] reward=-122757798.9 actor_loss=0.2705 critic_loss=76167151081.7391 entropy=17.8506 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 197640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479345.2 mean_steps=14.8
|
|
[Episode 197650] reward=-118621589.2 actor_loss=0.2970 critic_loss=79514721463.7949 entropy=17.8468 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 197660] reward=-116531262.9 actor_loss=0.2762 critic_loss=74219254852.2667 entropy=17.8512 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 197660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-647579.8 mean_steps=13.2
|
|
[Episode 197670] reward=-124145910.3 actor_loss=0.2800 critic_loss=97992707510.8571 entropy=17.8471 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 197680] reward=-118511333.4 actor_loss=0.2883 critic_loss=81524736000.0000 entropy=17.8208 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 197680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572340.6 mean_steps=12.4
|
|
[Episode 197690] reward=-118463940.0 actor_loss=0.2730 critic_loss=76559800456.5333 entropy=17.7856 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 197700] reward=-118768498.3 actor_loss=0.3284 critic_loss=73842467236.1026 entropy=17.7827 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 197700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583814.4 mean_steps=12.4
|
|
[Episode 197710] reward=-119704404.4 actor_loss=0.2832 critic_loss=218911875618.1333 entropy=17.7608 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 197720] reward=-115303426.8 actor_loss=0.3810 critic_loss=77178709085.0909 entropy=17.7488 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 197720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450783.8 mean_steps=15.4
|
|
[Episode 197730] reward=-118194429.9 actor_loss=0.2725 critic_loss=76097215298.3704 entropy=17.7449 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 197740] reward=-128999688.3 actor_loss=0.3273 critic_loss=524016980614.7368 entropy=17.7405 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 197740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533903.7 mean_steps=13.1
|
|
[Episode 197750] reward=-125390801.2 actor_loss=0.2800 critic_loss=78599762375.1111 entropy=17.7425 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 197760] reward=-118518928.0 actor_loss=0.2830 critic_loss=75284112530.2857 entropy=17.7404 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 197760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454027.1 mean_steps=14.4
|
|
[Episode 197770] reward=-122457215.1 actor_loss=0.3053 critic_loss=160462521623.2727 entropy=17.7266 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 197780] reward=-124012627.7 actor_loss=0.2458 critic_loss=189854615688.5333 entropy=17.7286 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 197780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-537528.0 mean_steps=14.8
|
|
[Episode 197790] reward=-120593316.8 actor_loss=0.3089 critic_loss=106847855047.1111 entropy=17.7353 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 197800] reward=-1362057915.0 actor_loss=0.2634 critic_loss=3354743865984796.5000 entropy=17.7369 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 197800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-686428.1 mean_steps=12.8
|
|
[Episode 197810] reward=-119991048.4 actor_loss=0.3989 critic_loss=102881230475.6364 entropy=17.7341 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 197820] reward=-121063947.5 actor_loss=0.2624 critic_loss=77225783022.9333 entropy=17.7368 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 197820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-559145.0 mean_steps=13.9
|
|
[Episode 197830] reward=-120763560.6 actor_loss=0.2743 critic_loss=88727620266.6667 entropy=17.7372 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 197840] reward=-134700482.0 actor_loss=0.3574 critic_loss=1042879420825.6000 entropy=17.7436 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 197840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-608701.1 mean_steps=11.6
|
|
[Episode 197850] reward=-120549123.8 actor_loss=0.4106 critic_loss=81759361743.5676 entropy=17.7469 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 197860] reward=-126078035.6 actor_loss=0.3249 critic_loss=203789290496.0000 entropy=17.7405 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 197860] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-721064.7 mean_steps=10.7
|
|
[Episode 197870] reward=-122552949.8 actor_loss=0.2757 critic_loss=81567829430.8571 entropy=17.7440 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 197880] reward=-119059514.3 actor_loss=0.2240 critic_loss=81044035531.4872 entropy=17.7528 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 197880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517513.6 mean_steps=14.6
|
|
[Episode 197890] reward=-119341957.8 actor_loss=0.2704 critic_loss=76728152519.1111 entropy=17.7562 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 197900] reward=-116272985.1 actor_loss=0.3057 critic_loss=79678389043.2000 entropy=17.7474 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 197900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446069.2 mean_steps=14.5
|
|
[Episode 197910] reward=-122532558.6 actor_loss=0.2990 critic_loss=82737976320.0000 entropy=17.7444 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 197920] reward=-117256484.4 actor_loss=0.3791 critic_loss=76846201434.3529 entropy=17.7381 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 197920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-671746.0 mean_steps=11.9
|
|
[Episode 197930] reward=-117660964.0 actor_loss=0.3132 critic_loss=78471488762.3111 entropy=17.7263 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 197940] reward=-114171612.8 actor_loss=0.3760 critic_loss=72908648733.7674 entropy=17.7160 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 197940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443303.4 mean_steps=14.2
|
|
[Episode 197950] reward=-125554712.4 actor_loss=0.3012 critic_loss=81298670250.6667 entropy=17.7151 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 197960] reward=-119351674.6 actor_loss=0.3126 critic_loss=82688618496.0000 entropy=17.7023 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 197960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588768.0 mean_steps=12.8
|
|
[Episode 197970] reward=-118373473.3 actor_loss=0.2815 critic_loss=86555428272.3556 entropy=17.7032 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 197980] reward=-113192216.3 actor_loss=0.3504 critic_loss=68251985601.4222 entropy=17.7210 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 197980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-663820.2 mean_steps=11.2
|
|
[Episode 197990] reward=-119985518.3 actor_loss=0.2559 critic_loss=81432429272.1778 entropy=17.7273 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 198000] reward=-111734175.5 actor_loss=0.3511 critic_loss=69892426995.8095 entropy=17.7232 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 198000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491819.3 mean_steps=14.8
|
|
[Episode 198010] reward=-119686278.0 actor_loss=0.2752 critic_loss=76834931624.2286 entropy=17.7225 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 198020] reward=-120265857.2 actor_loss=0.2400 critic_loss=80297585140.6222 entropy=17.7124 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 198020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528276.2 mean_steps=12.9
|
|
[Episode 198030] reward=-118627673.6 actor_loss=0.2355 critic_loss=83765583689.9556 entropy=17.7225 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 198040] reward=-123869755.2 actor_loss=0.3147 critic_loss=201828031829.3333 entropy=17.7136 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 198040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-469784.5 mean_steps=16.4
|
|
[Episode 198050] reward=-120157213.7 actor_loss=0.2326 critic_loss=76882902707.8919 entropy=17.7237 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 198060] reward=-121856520.0 actor_loss=0.2246 critic_loss=79860393574.4000 entropy=17.7331 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 198060] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-693905.9 mean_steps=11.5
|
|
[Episode 198070] reward=-112404201.5 actor_loss=0.4095 critic_loss=74169096448.0000 entropy=17.7317 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 198080] reward=-120385271.2 actor_loss=0.2972 critic_loss=77798100269.1765 entropy=17.7154 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 198080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-313006.9 mean_steps=16.5
|
|
[Episode 198090] reward=-115255262.8 actor_loss=0.3828 critic_loss=71486338662.4000 entropy=17.6986 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 198100] reward=-120462161.2 actor_loss=0.3163 critic_loss=81763860289.4884 entropy=17.6725 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 198100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548719.2 mean_steps=12.2
|
|
[Episode 198110] reward=-117313875.9 actor_loss=0.3256 critic_loss=72938708172.8000 entropy=17.6621 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 198120] reward=-110135554.7 actor_loss=0.4211 critic_loss=72632811155.9111 entropy=17.6514 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 198120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491552.8 mean_steps=13.8
|
|
[Episode 198130] reward=-134522686.8 actor_loss=0.2777 critic_loss=1419579350493.8667 entropy=17.6721 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 198140] reward=-118697841.6 actor_loss=0.3407 critic_loss=79170032981.3333 entropy=17.7156 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 198140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554612.2 mean_steps=12.1
|
|
[Episode 198150] reward=-121319711.6 actor_loss=0.2719 critic_loss=132451289770.6667 entropy=17.7105 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 198160] reward=-117999447.2 actor_loss=0.3028 critic_loss=79866068582.4000 entropy=17.7151 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 198160] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-709328.8 mean_steps=10.5
|
|
[Episode 198170] reward=-118485326.2 actor_loss=0.3130 critic_loss=82328857344.0000 entropy=17.7094 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 198180] reward=-120955408.2 actor_loss=0.2567 critic_loss=105601166155.2941 entropy=17.7165 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 198180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540883.6 mean_steps=13.3
|
|
[Episode 198190] reward=-118917809.7 actor_loss=0.3062 critic_loss=82819933616.3556 entropy=17.6876 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 198200] reward=-118265102.8 actor_loss=0.3425 critic_loss=76316830469.6889 entropy=17.6891 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 198200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529538.5 mean_steps=13.0
|
|
[Episode 198210] reward=-119958519.2 actor_loss=0.2787 critic_loss=80112446395.7333 entropy=17.6889 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 198220] reward=-123129453.3 actor_loss=0.2330 critic_loss=80856386969.6000 entropy=17.6818 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 198220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548776.5 mean_steps=13.2
|
|
[Episode 198230] reward=-116044863.9 actor_loss=0.4127 critic_loss=78604992876.0889 entropy=17.6798 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 198240] reward=-121224844.2 actor_loss=0.2117 critic_loss=94408120858.9474 entropy=17.6747 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 198240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-598910.7 mean_steps=11.9
|
|
[Episode 198250] reward=-121936053.7 actor_loss=0.3584 critic_loss=85254307840.0000 entropy=17.6784 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 198260] reward=-117200336.2 actor_loss=0.3259 critic_loss=74867924549.1892 entropy=17.6835 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 198260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-428018.6 mean_steps=14.3
|
|
[Episode 198270] reward=-335940935.1 actor_loss=0.4328 critic_loss=127635550063092.6250 entropy=17.6799 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 198280] reward=-118868020.0 actor_loss=0.3999 critic_loss=79101572437.3333 entropy=17.6743 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 198280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425257.6 mean_steps=15.2
|
|
[Episode 198290] reward=-120214019.8 actor_loss=0.3761 critic_loss=80936262369.2800 entropy=17.6876 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 198300] reward=-119609184.3 actor_loss=0.3177 critic_loss=77989108004.5714 entropy=17.7069 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 198300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553708.4 mean_steps=13.4
|
|
[Episode 198310] reward=-119525816.5 actor_loss=0.2236 critic_loss=78084907190.0444 entropy=17.7282 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 198320] reward=-118048445.4 actor_loss=0.2023 critic_loss=78900401675.3778 entropy=17.7200 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 198320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508531.1 mean_steps=14.2
|
|
[Episode 198330] reward=-119238266.6 actor_loss=0.2633 critic_loss=94061040435.2000 entropy=17.7175 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 198340] reward=-115680996.3 actor_loss=0.2724 critic_loss=80231958300.4444 entropy=17.7109 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 198340] success_rate=0.300 qp_infeasible_rate=0.650 mean_return=-50283032806.5 mean_steps=173.6
|
|
[Episode 198350] reward=-118966757.0 actor_loss=0.3197 critic_loss=77540444754.5806 entropy=17.6929 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 198360] reward=-119908571.4 actor_loss=0.3221 critic_loss=77994635172.9778 entropy=17.7046 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 198360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431490.6 mean_steps=15.6
|
|
[Episode 198370] reward=-121317710.1 actor_loss=0.3945 critic_loss=87081719296.0000 entropy=17.6984 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 198380] reward=-116902171.7 actor_loss=0.3624 critic_loss=81055381094.4000 entropy=17.7073 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 198380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-580763.3 mean_steps=13.9
|
|
[Episode 198390] reward=-124880028.9 actor_loss=0.3012 critic_loss=94770021262.2222 entropy=17.7057 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 198400] reward=-118059492.3 actor_loss=0.3075 critic_loss=94437834023.8222 entropy=17.7159 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 198400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-660864.4 mean_steps=13.1
|
|
[Episode 198410] reward=-121092425.3 actor_loss=0.1768 critic_loss=80325954560.0000 entropy=17.6979 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 198420] reward=-121898690.4 actor_loss=0.2724 critic_loss=87972302574.9333 entropy=17.7063 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 198420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562820.7 mean_steps=13.7
|
|
[Episode 198430] reward=-120570867.6 actor_loss=0.2873 critic_loss=76082864856.1778 entropy=17.6947 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 198440] reward=-116724130.8 actor_loss=0.3512 critic_loss=79043016960.0000 entropy=17.6781 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 198440] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-290088.6 mean_steps=17.1
|
|
[Episode 198450] reward=-118101534.0 actor_loss=0.3437 critic_loss=82910740480.0000 entropy=17.6660 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 198460] reward=-120562622.7 actor_loss=0.2805 critic_loss=90662409947.4286 entropy=17.6661 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 198460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535257.9 mean_steps=13.5
|
|
[Episode 198470] reward=-115975397.9 actor_loss=0.2519 critic_loss=73565217398.1538 entropy=17.6537 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 198480] reward=-122345300.6 actor_loss=0.2324 critic_loss=84504927591.7838 entropy=17.6422 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 198480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478208.4 mean_steps=13.7
|
|
[Episode 198490] reward=-113908002.0 actor_loss=0.3914 critic_loss=71529925868.3077 entropy=17.6496 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 198500] reward=-116995253.5 actor_loss=0.3396 critic_loss=71219095051.3778 entropy=17.6554 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 198500] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-662417.1 mean_steps=11.4
|
|
[Episode 198510] reward=-115560481.2 actor_loss=0.3910 critic_loss=73896963542.4865 entropy=17.6580 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 198520] reward=-125365537.4 actor_loss=0.3653 critic_loss=86959610148.5714 entropy=17.6505 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 198520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536187.5 mean_steps=13.5
|
|
[Episode 198530] reward=-116462144.1 actor_loss=0.3176 critic_loss=78368810598.4000 entropy=17.6419 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 198540] reward=-118954640.9 actor_loss=0.2261 critic_loss=81590945564.4444 entropy=17.6293 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 198540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504588.3 mean_steps=13.8
|
|
[Episode 198550] reward=-117943435.2 actor_loss=0.4334 critic_loss=82862652875.0345 entropy=17.6283 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 198560] reward=-118952680.9 actor_loss=0.2411 critic_loss=77149556105.8462 entropy=17.6236 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 198560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573142.5 mean_steps=13.8
|
|
[Episode 198570] reward=-120703047.6 actor_loss=0.2596 critic_loss=79803990152.5333 entropy=17.6149 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 198580] reward=-114213558.3 actor_loss=0.3816 critic_loss=71560855454.4762 entropy=17.5952 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 198580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656482.1 mean_steps=12.3
|
|
[Episode 198590] reward=-116947406.9 actor_loss=0.2898 critic_loss=75804171741.8667 entropy=17.6020 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 198600] reward=-123465052.2 actor_loss=0.3279 critic_loss=80151257444.1739 entropy=17.5979 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 198600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440321.9 mean_steps=15.3
|
|
[Episode 198610] reward=-110717232.4 actor_loss=0.3923 critic_loss=68003275358.8148 entropy=17.6018 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 198620] reward=-109649359.6 actor_loss=0.4810 critic_loss=70255044380.4444 entropy=17.6084 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 198620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508833.1 mean_steps=14.0
|
|
[Episode 198630] reward=-116740697.0 actor_loss=0.2951 critic_loss=71768484006.0540 entropy=17.6119 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 198640] reward=-120819394.2 actor_loss=0.3453 critic_loss=202814646954.6667 entropy=17.6195 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 198640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570542.6 mean_steps=12.3
|
|
[Episode 198650] reward=-114616065.3 actor_loss=0.3952 critic_loss=74017138642.4889 entropy=17.6241 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 198660] reward=-137374190.8 actor_loss=0.3312 critic_loss=1150357233664.0000 entropy=17.6154 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 198660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489691.7 mean_steps=14.8
|
|
[Episode 198670] reward=-120344424.4 actor_loss=0.3245 critic_loss=83455656500.9655 entropy=17.5911 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 198680] reward=-115855194.2 actor_loss=0.3365 critic_loss=69842568988.4444 entropy=17.5745 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 198680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-487293.6 mean_steps=12.9
|
|
[Episode 198690] reward=-114539330.0 actor_loss=0.3296 critic_loss=74914193408.0000 entropy=17.5713 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 198700] reward=-118421575.0 actor_loss=0.2672 critic_loss=79974543086.9333 entropy=17.5789 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 198700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-497136.6 mean_steps=13.2
|
|
[Episode 198710] reward=-118636816.1 actor_loss=0.2842 critic_loss=75079505510.4000 entropy=17.5708 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 198720] reward=-119107443.6 actor_loss=0.3448 critic_loss=77918715221.3333 entropy=17.5635 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 198720] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-680489.7 mean_steps=10.7
|
|
[Episode 198730] reward=-122066663.1 actor_loss=0.3009 critic_loss=76559326939.4286 entropy=17.5611 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 198740] reward=-119883403.1 actor_loss=0.2321 critic_loss=81608422704.4324 entropy=17.5686 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 198740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-658969.8 mean_steps=12.7
|
|
[Episode 198750] reward=-121873050.3 actor_loss=0.2372 critic_loss=76442794294.3030 entropy=17.5799 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 198760] reward=-115534730.5 actor_loss=0.2555 critic_loss=71148648903.1111 entropy=17.5706 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 198760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-554276.0 mean_steps=14.4
|
|
[Episode 198770] reward=-118445862.5 actor_loss=0.2923 critic_loss=94817147991.7714 entropy=17.5638 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 198780] reward=-122530251.3 actor_loss=0.2345 critic_loss=150103139942.4000 entropy=17.5792 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 198780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592934.9 mean_steps=12.7
|
|
[Episode 198790] reward=-118192113.7 actor_loss=0.2285 critic_loss=75273065922.5600 entropy=17.5851 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 198800] reward=-115870538.9 actor_loss=0.2761 critic_loss=78419048372.1481 entropy=17.5860 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 198800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576307.3 mean_steps=13.6
|
|
[Episode 198810] reward=-118857131.4 actor_loss=0.1641 critic_loss=76521960123.3171 entropy=17.5956 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 198820] reward=-117352627.1 actor_loss=0.3117 critic_loss=94992528869.0526 entropy=17.5990 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 198820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576148.2 mean_steps=13.4
|
|
[Episode 198830] reward=-122586063.2 actor_loss=0.1941 critic_loss=79570928128.0000 entropy=17.6006 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 198840] reward=-119119562.1 actor_loss=0.2842 critic_loss=75276548833.2800 entropy=17.6143 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 198840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407709.6 mean_steps=16.4
|
|
[Episode 198850] reward=-121513676.3 actor_loss=0.2191 critic_loss=76521860846.9333 entropy=17.6027 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 198860] reward=-121905206.9 actor_loss=0.2471 critic_loss=79129818112.0000 entropy=17.6030 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 198860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586589.9 mean_steps=12.9
|
|
[Episode 198870] reward=-115475447.6 actor_loss=0.2701 critic_loss=70045824068.2667 entropy=17.5847 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 198880] reward=-117066777.4 actor_loss=0.3532 critic_loss=74310523667.6923 entropy=17.5783 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 198880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-599442.4 mean_steps=12.3
|
|
[Episode 198890] reward=-114985940.5 actor_loss=0.2749 critic_loss=73715867556.9778 entropy=17.5722 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 198900] reward=-120301694.4 actor_loss=0.2319 critic_loss=75787034350.9333 entropy=17.5669 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 198900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458211.0 mean_steps=14.8
|
|
[Episode 198910] reward=-118485975.9 actor_loss=0.2417 critic_loss=73848558714.8800 entropy=17.5532 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 198920] reward=-118299933.1 actor_loss=0.3051 critic_loss=78332592362.0571 entropy=17.5634 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 198920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399468.6 mean_steps=16.2
|
|
[Episode 198930] reward=-115804183.1 actor_loss=0.2875 critic_loss=73483909306.1818 entropy=17.5588 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 198940] reward=-114877161.1 actor_loss=0.3223 critic_loss=72217470249.2903 entropy=17.5442 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 198940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-627540.4 mean_steps=11.9
|
|
[Episode 198950] reward=-118094440.5 actor_loss=0.2796 critic_loss=75018823270.4000 entropy=17.5352 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 198960] reward=-124131529.1 actor_loss=0.2733 critic_loss=85457352203.3778 entropy=17.5385 approx_kl=0.0110 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 198960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452204.2 mean_steps=14.8
|
|
[Episode 198970] reward=-120745182.8 actor_loss=0.3692 critic_loss=76368776396.8000 entropy=17.5260 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 198980] reward=-115783119.4 actor_loss=0.2335 critic_loss=74000948428.8000 entropy=17.5199 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 198980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637104.0 mean_steps=12.4
|
|
[Episode 198990] reward=-119466411.0 actor_loss=0.2925 critic_loss=96222302208.0000 entropy=17.5131 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 199000] reward=-121233723.4 actor_loss=0.2757 critic_loss=78215318272.0000 entropy=17.5102 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 199000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510698.4 mean_steps=13.3
|
|
[Episode 199010] reward=-115219778.1 actor_loss=0.2078 critic_loss=75741281408.0000 entropy=17.5214 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 199020] reward=-117087896.6 actor_loss=0.2511 critic_loss=76954171255.4667 entropy=17.5272 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 199020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-441642.4 mean_steps=16.3
|
|
[Episode 199030] reward=-122358701.9 actor_loss=0.3109 critic_loss=91845682462.7200 entropy=17.5262 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 199040] reward=-117661583.6 actor_loss=0.3629 critic_loss=83316526680.2759 entropy=17.5197 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 199040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-553728.5 mean_steps=12.4
|
|
[Episode 199050] reward=-118075888.4 actor_loss=0.3159 critic_loss=109957403443.2000 entropy=17.5073 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 199060] reward=-116650428.1 actor_loss=0.2725 critic_loss=70423457154.8445 entropy=17.5148 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 199060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436341.7 mean_steps=15.5
|
|
[Episode 199070] reward=-116913361.5 actor_loss=0.2360 critic_loss=72270313517.5111 entropy=17.5029 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 199080] reward=-119935683.8 actor_loss=0.1846 critic_loss=74559211578.5143 entropy=17.5153 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 199080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-360526.8 mean_steps=16.9
|
|
[Episode 199090] reward=-125293595.5 actor_loss=0.2611 critic_loss=82619415130.3529 entropy=17.5006 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 199100] reward=-118926404.8 actor_loss=0.3416 critic_loss=79460862313.4118 entropy=17.5063 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 199100] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-317202.9 mean_steps=17.4
|
|
[Episode 199110] reward=-119470351.2 actor_loss=0.3362 critic_loss=104099463168.0000 entropy=17.5151 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 199120] reward=-111254429.1 actor_loss=0.2938 critic_loss=63551760050.6047 entropy=17.5121 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 199120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-445253.0 mean_steps=16.5
|
|
[Episode 199130] reward=-115772014.2 actor_loss=0.3440 critic_loss=78146119326.8965 entropy=17.5215 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 199140] reward=-118756897.5 actor_loss=0.2551 critic_loss=79127864353.0323 entropy=17.5166 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 199140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-628355.6 mean_steps=13.1
|
|
[Episode 199150] reward=-120939191.7 actor_loss=0.2703 critic_loss=87330505185.8824 entropy=17.4976 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 199160] reward=-117352281.5 actor_loss=0.3905 critic_loss=84545832004.2667 entropy=17.4914 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 199160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-420062.4 mean_steps=17.4
|
|
[Episode 199170] reward=-114355224.1 actor_loss=0.3359 critic_loss=86637273322.0571 entropy=17.4854 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 199180] reward=-115154103.2 actor_loss=0.3096 critic_loss=86714148782.0800 entropy=17.4899 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 199180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588340.3 mean_steps=13.7
|
|
[Episode 199190] reward=-122476386.4 actor_loss=0.3181 critic_loss=192399753216.0000 entropy=17.4886 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 199200] reward=-118702154.9 actor_loss=0.3075 critic_loss=80512134409.4815 entropy=17.4845 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 199200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474921.4 mean_steps=14.8
|
|
[Episode 199210] reward=-116248814.5 actor_loss=0.2432 critic_loss=84711044710.4000 entropy=17.4914 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 199220] reward=-122541906.7 actor_loss=0.3096 critic_loss=107051062365.0909 entropy=17.4928 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 199220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-600386.4 mean_steps=13.7
|
|
[Episode 199230] reward=-114412299.6 actor_loss=0.4204 critic_loss=128493626604.3077 entropy=17.4831 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 199240] reward=-127224832.2 actor_loss=0.2794 critic_loss=361145625600.0000 entropy=17.4772 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 199240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588596.2 mean_steps=13.3
|
|
[Episode 199250] reward=-114465442.7 actor_loss=0.4133 critic_loss=86766510262.0444 entropy=17.4661 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 199260] reward=-128131687.7 actor_loss=0.3084 critic_loss=387998219195.7333 entropy=17.4645 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 199260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451974.0 mean_steps=14.6
|
|
[Episode 199270] reward=-114793118.9 actor_loss=0.4229 critic_loss=81749610496.0000 entropy=17.4531 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 199280] reward=-118859608.0 actor_loss=0.2130 critic_loss=78526078429.8667 entropy=17.4530 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 199280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-451554.7 mean_steps=15.9
|
|
[Episode 199290] reward=-117756844.1 actor_loss=0.2712 critic_loss=72585910863.6444 entropy=17.4558 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 199300] reward=-114863978.2 actor_loss=0.3040 critic_loss=70870525838.2222 entropy=17.4613 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 199300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394650.5 mean_steps=15.7
|
|
[Episode 199310] reward=-116173519.5 actor_loss=0.3887 critic_loss=69889833096.5333 entropy=17.4624 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 199320] reward=-116654309.3 actor_loss=0.2799 critic_loss=70281713345.4222 entropy=17.4447 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 199320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-558961.2 mean_steps=13.4
|
|
[Episode 199330] reward=-116604661.7 actor_loss=0.3516 critic_loss=85156054285.4737 entropy=17.4358 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 199340] reward=-119611176.4 actor_loss=0.3226 critic_loss=80084973195.6364 entropy=17.4451 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 199340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-357970.4 mean_steps=16.6
|
|
[Episode 199350] reward=-121864622.2 actor_loss=0.2574 critic_loss=115713323281.0667 entropy=17.4476 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 199360] reward=-115848043.1 actor_loss=0.3695 critic_loss=84843972765.5385 entropy=17.4575 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 199360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510156.5 mean_steps=12.9
|
|
[Episode 199370] reward=-116667451.5 actor_loss=0.3321 critic_loss=84911051434.6667 entropy=17.4462 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 199380] reward=-112059330.8 actor_loss=0.3089 critic_loss=74020280669.6585 entropy=17.4534 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 199380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416530.1 mean_steps=16.2
|
|
[Episode 199390] reward=-116206113.4 actor_loss=0.2921 critic_loss=71361020359.1111 entropy=17.4481 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 199400] reward=-121755197.2 actor_loss=0.2487 critic_loss=204790424296.7273 entropy=17.4408 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 199400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475026.9 mean_steps=14.9
|
|
[Episode 199410] reward=-128078131.7 actor_loss=0.2666 critic_loss=479832602851.5555 entropy=17.4476 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 199420] reward=-121137608.2 actor_loss=0.2946 critic_loss=81228987286.0690 entropy=17.4528 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 199420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-526020.9 mean_steps=15.3
|
|
[Episode 199430] reward=-115785579.5 actor_loss=0.2812 critic_loss=71023804656.9412 entropy=17.4462 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 199440] reward=-2061709842.8 actor_loss=0.2573 critic_loss=9127336206861744.0000 entropy=17.4519 approx_kl=0.0013 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 199440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496591.4 mean_steps=14.1
|
|
[Episode 199450] reward=-115117456.6 actor_loss=0.3067 critic_loss=72528520704.0000 entropy=17.4562 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 199460] reward=-116637936.1 actor_loss=0.2441 critic_loss=75647525410.1333 entropy=17.4586 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 199460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504787.5 mean_steps=13.8
|
|
[Episode 199470] reward=-116550201.1 actor_loss=0.2863 critic_loss=74031405465.6000 entropy=17.4543 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 199480] reward=-115899222.6 actor_loss=0.3850 critic_loss=73279684039.1111 entropy=17.4461 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 199480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519729.6 mean_steps=14.2
|
|
[Episode 199490] reward=-115349392.0 actor_loss=0.4032 critic_loss=72029600426.6667 entropy=17.4455 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 199500] reward=-118797775.6 actor_loss=0.3961 critic_loss=233598584149.3333 entropy=17.4542 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 199500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383170.6 mean_steps=15.8
|
|
[Episode 199510] reward=-115064275.8 actor_loss=0.3157 critic_loss=71604878108.4444 entropy=17.4488 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 199520] reward=-112583495.2 actor_loss=0.3547 critic_loss=78683211548.4444 entropy=17.4357 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 199520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578655.2 mean_steps=12.7
|
|
[Episode 199530] reward=-115495573.4 actor_loss=0.3118 critic_loss=74936818073.6000 entropy=17.4332 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 199540] reward=-112275605.7 actor_loss=0.3033 critic_loss=79880092250.3529 entropy=17.4537 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 199540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573130.2 mean_steps=13.3
|
|
[Episode 199550] reward=-119459723.2 actor_loss=0.3089 critic_loss=77970626998.8571 entropy=17.4519 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 199560] reward=-112521624.4 actor_loss=0.3181 critic_loss=76396992557.5111 entropy=17.4528 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 199560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525801.6 mean_steps=13.3
|
|
[Episode 199570] reward=-122676646.4 actor_loss=0.2356 critic_loss=181387118681.0435 entropy=17.4565 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 199580] reward=-111890298.1 actor_loss=0.4864 critic_loss=69489313233.4545 entropy=17.4473 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1536 front_blocked=0
|
|
[Eval 199580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385850.4 mean_steps=15.7
|
|
[Episode 199590] reward=-115354889.5 actor_loss=0.3310 critic_loss=76884079238.7368 entropy=17.4487 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 199600] reward=-117605635.8 actor_loss=0.3669 critic_loss=72547243622.4000 entropy=17.4567 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 199600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626056.6 mean_steps=12.3
|
|
[Episode 199610] reward=-120489610.0 actor_loss=0.3180 critic_loss=75494145732.9231 entropy=17.4729 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 199620] reward=-120205870.9 actor_loss=0.3617 critic_loss=81356847513.6000 entropy=17.4714 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 199620] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-723198.5 mean_steps=10.8
|
|
[Episode 199630] reward=-129928060.2 actor_loss=0.2366 critic_loss=259737354240.0000 entropy=17.4795 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 199640] reward=-120165077.5 actor_loss=0.2761 critic_loss=75170076094.3590 entropy=17.4850 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 199640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-636285.8 mean_steps=13.1
|
|
[Episode 199650] reward=-116216748.5 actor_loss=0.4280 critic_loss=76630587099.4286 entropy=17.4752 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 199660] reward=-117205248.7 actor_loss=0.3352 critic_loss=73277569797.6889 entropy=17.4716 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 199660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-585884.3 mean_steps=13.5
|
|
[Episode 199670] reward=-104669577.1 actor_loss=0.4175 critic_loss=66568048158.1176 entropy=17.4602 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 199680] reward=-115442244.6 actor_loss=0.3230 critic_loss=68824149385.8462 entropy=17.4753 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 199680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433155.7 mean_steps=15.3
|
|
[Episode 199690] reward=-121689021.4 actor_loss=0.2738 critic_loss=91523802348.3077 entropy=17.4630 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 199700] reward=-117463525.3 actor_loss=0.3229 critic_loss=74410397214.1176 entropy=17.4517 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 199700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491270.8 mean_steps=13.7
|
|
[Episode 199710] reward=-120831620.6 actor_loss=0.2956 critic_loss=74073331419.4286 entropy=17.4399 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 199720] reward=-118377691.6 actor_loss=0.2044 critic_loss=72268838775.4667 entropy=17.4411 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 199720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479693.8 mean_steps=13.6
|
|
[Episode 199730] reward=-117354366.7 actor_loss=0.2936 critic_loss=73941637722.3529 entropy=17.4339 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 199740] reward=-120573298.8 actor_loss=0.3877 critic_loss=77330507889.7778 entropy=17.4329 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 199740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539752.5 mean_steps=14.1
|
|
[Episode 199750] reward=-115319039.0 actor_loss=0.2761 critic_loss=78812183847.8222 entropy=17.4257 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 199760] reward=-122868366.9 actor_loss=0.2852 critic_loss=83109796249.6000 entropy=17.4263 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 199760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474135.2 mean_steps=14.9
|
|
[Episode 199770] reward=-119287901.3 actor_loss=0.2540 critic_loss=80155056947.2000 entropy=17.4238 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 199780] reward=-112582136.0 actor_loss=0.3406 critic_loss=78923073851.0769 entropy=17.4289 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 199780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-555196.9 mean_steps=14.0
|
|
[Episode 199790] reward=-115052014.7 actor_loss=0.2327 critic_loss=81400836096.0000 entropy=17.4214 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 199800] reward=-116332794.8 actor_loss=0.2932 critic_loss=77227528533.3333 entropy=17.4215 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 199800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462716.8 mean_steps=14.4
|
|
[Episode 199810] reward=-122920274.7 actor_loss=0.2604 critic_loss=109746992128.0000 entropy=17.4198 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 199820] reward=-118722795.4 actor_loss=0.3211 critic_loss=78489642715.4286 entropy=17.4205 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 199820] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-194102.8 mean_steps=18.1
|
|
[Episode 199830] reward=-112374266.9 actor_loss=0.3469 critic_loss=69115839829.3333 entropy=17.4257 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 199840] reward=-114498040.1 actor_loss=0.4012 critic_loss=72889608084.2105 entropy=17.4203 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 199840] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-647242.3 mean_steps=11.1
|
|
[Episode 199850] reward=-114042858.5 actor_loss=0.3024 critic_loss=71717185859.3684 entropy=17.4249 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 199860] reward=-114690786.1 actor_loss=0.2776 critic_loss=73020334638.5455 entropy=17.4299 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 199860] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-653918.5 mean_steps=11.2
|
|
[Episode 199870] reward=-119132402.4 actor_loss=0.2645 critic_loss=74198975647.2889 entropy=17.4258 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 199880] reward=-119364945.5 actor_loss=0.3221 critic_loss=84050642534.4000 entropy=17.4210 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 199880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399346.2 mean_steps=15.8
|
|
[Episode 199890] reward=-118843131.8 actor_loss=0.3040 critic_loss=81027092935.1111 entropy=17.4238 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 199900] reward=-117807197.7 actor_loss=0.3218 critic_loss=73640619680.9143 entropy=17.4224 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 199900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509691.0 mean_steps=13.1
|
|
[Episode 199910] reward=-116637330.1 actor_loss=0.3454 critic_loss=75980914395.4286 entropy=17.4332 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 199920] reward=-110319447.5 actor_loss=0.2572 critic_loss=64436288807.8222 entropy=17.4533 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 199920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375662.9 mean_steps=15.8
|
|
[Episode 199930] reward=-109931617.3 actor_loss=0.3773 critic_loss=67877041720.8889 entropy=17.4351 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 199940] reward=-122140357.7 actor_loss=0.2721 critic_loss=82242114304.0000 entropy=17.4398 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 199940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442222.4 mean_steps=15.6
|
|
[Episode 199950] reward=-113392741.5 actor_loss=0.3079 critic_loss=67411639409.7778 entropy=17.4251 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 199960] reward=-113868322.0 actor_loss=0.2765 critic_loss=79110068542.5778 entropy=17.4185 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 199960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537310.1 mean_steps=13.1
|
|
[Episode 199970] reward=-116935867.6 actor_loss=0.2222 critic_loss=73538055627.0345 entropy=17.4372 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 199980] reward=-112011101.6 actor_loss=0.3364 critic_loss=70334125670.4000 entropy=17.4347 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 199980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-590753.1 mean_steps=14.4
|
|
[Episode 199990] reward=-117081546.6 actor_loss=0.2886 critic_loss=70764563023.6444 entropy=17.4348 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 200000] reward=-117802876.8 actor_loss=0.3977 critic_loss=72851437522.4889 entropy=17.4534 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 200000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-585101.5 mean_steps=13.5
|