7842 lines
1.1 MiB
7842 lines
1.1 MiB
nohup: ignoring input
|
|
[Episode 10] reward=-72998586.2 actor_loss=0.3017 critic_loss=138080054272.0000 entropy=4.2500 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1009 front_blocked=0
|
|
[Episode 20] reward=-53663304.6 actor_loss=0.1649 critic_loss=125096317052.1212 entropy=4.2560 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 20] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-659751.9 mean_steps=11.1
|
|
[Episode 30] reward=-67596580.2 actor_loss=0.1236 critic_loss=135641300560.8421 entropy=4.2587 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 40] reward=-49633920.3 actor_loss=0.0966 critic_loss=129638047926.0444 entropy=4.2626 approx_kl=0.0079 kl_stop=0 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 40] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442243.9 mean_steps=14.2
|
|
[Episode 50] reward=-47055867.8 actor_loss=0.1068 critic_loss=123775357542.4000 entropy=4.2568 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 60] reward=-46113177.3 actor_loss=0.1068 critic_loss=121823939606.7556 entropy=4.2502 approx_kl=0.0054 kl_stop=0 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 60] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484432.8 mean_steps=13.9
|
|
[Episode 70] reward=-46698905.7 actor_loss=0.0653 critic_loss=124424508211.2000 entropy=4.2628 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 80] reward=-57590511.7 actor_loss=0.0833 critic_loss=128289309114.8108 entropy=4.2758 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 80] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-525250.4 mean_steps=14.2
|
|
[Episode 90] reward=-44219070.8 actor_loss=0.0416 critic_loss=122204111088.9412 entropy=4.2887 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 100] reward=-77302702.0 actor_loss=0.0807 critic_loss=142857841322.6667 entropy=4.2903 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1009 front_blocked=0
|
|
[Eval 100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-598639.7 mean_steps=12.8
|
|
[Episode 110] reward=-46422296.0 actor_loss=0.0606 critic_loss=122285320192.0000 entropy=4.2979 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 120] reward=-50052675.5 actor_loss=0.0932 critic_loss=122218433974.8571 entropy=4.3092 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375009.1 mean_steps=14.9
|
|
[Episode 130] reward=-42606032.7 actor_loss=0.0601 critic_loss=119346319484.1212 entropy=4.3054 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Episode 140] reward=-53786573.3 actor_loss=0.0747 critic_loss=125960888891.5349 entropy=4.3121 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-636204.8 mean_steps=12.1
|
|
[Episode 150] reward=-62150816.4 actor_loss=0.0688 critic_loss=133417892475.5862 entropy=4.3150 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 160] reward=-51206563.5 actor_loss=0.0895 critic_loss=127842947571.5122 entropy=4.3232 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-333812.4 mean_steps=15.2
|
|
[Episode 170] reward=-50081690.4 actor_loss=0.0519 critic_loss=126633011275.8519 entropy=4.3348 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 180] reward=-56362263.5 actor_loss=0.1066 critic_loss=130564375040.0000 entropy=4.3507 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400099.9 mean_steps=14.2
|
|
[Episode 190] reward=-60468271.4 actor_loss=0.1266 critic_loss=129338251166.4762 entropy=4.3570 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 200] reward=-59245290.5 actor_loss=0.0943 critic_loss=132056779811.3103 entropy=4.3576 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502954.8 mean_steps=13.4
|
|
[Episode 210] reward=-43706327.8 actor_loss=0.0994 critic_loss=121341031671.1724 entropy=4.3593 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 220] reward=-41955441.7 actor_loss=0.1056 critic_loss=120536499814.4000 entropy=4.3749 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526870.4 mean_steps=12.5
|
|
[Episode 230] reward=-56673476.9 actor_loss=0.0769 critic_loss=130400351810.7826 entropy=4.3650 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 240] reward=-46037807.9 actor_loss=0.0824 critic_loss=122714100447.1795 entropy=4.3862 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-333726.9 mean_steps=15.5
|
|
[Episode 250] reward=-44570915.8 actor_loss=0.0533 critic_loss=121985424203.2941 entropy=4.3874 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 260] reward=-67592261.0 actor_loss=0.1019 critic_loss=133293201817.6000 entropy=4.3925 approx_kl=0.0105 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383040.6 mean_steps=15.2
|
|
[Episode 270] reward=-51418332.7 actor_loss=0.0685 critic_loss=123503266560.0000 entropy=4.3916 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 280] reward=-43717392.2 actor_loss=0.0605 critic_loss=119053747159.0400 entropy=4.4039 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464633.3 mean_steps=13.3
|
|
[Episode 290] reward=-53731592.6 actor_loss=0.0880 critic_loss=129133550055.6190 entropy=4.4206 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 300] reward=-49159773.9 actor_loss=0.0724 critic_loss=125320039992.8889 entropy=4.4136 approx_kl=0.0101 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378976.0 mean_steps=15.2
|
|
[Episode 310] reward=-48312066.2 actor_loss=0.0827 critic_loss=124921762182.0952 entropy=4.4168 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 320] reward=-53376161.7 actor_loss=0.0957 critic_loss=127305752932.1739 entropy=4.4267 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579570.8 mean_steps=12.1
|
|
[Episode 330] reward=-46968497.9 actor_loss=0.0820 critic_loss=120225463356.2353 entropy=4.4278 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 340] reward=-53077549.0 actor_loss=0.0736 critic_loss=125604483072.0000 entropy=4.4324 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-381021.5 mean_steps=15.4
|
|
[Episode 350] reward=-54568524.0 actor_loss=0.0921 critic_loss=128628627683.5556 entropy=4.4389 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 360] reward=-68228970.6 actor_loss=0.0849 critic_loss=133993314862.5455 entropy=4.4480 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421356.2 mean_steps=14.3
|
|
[Episode 370] reward=-45769179.1 actor_loss=0.0783 critic_loss=121957795157.3333 entropy=4.4490 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 380] reward=-53894899.9 actor_loss=0.0935 critic_loss=126139775337.4118 entropy=4.4635 approx_kl=0.0102 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 380] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-251191.5 mean_steps=15.8
|
|
[Episode 390] reward=-40347186.8 actor_loss=0.0946 critic_loss=113778008808.7273 entropy=4.4677 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 400] reward=-56006298.9 actor_loss=0.0960 critic_loss=126746072157.0909 entropy=4.4847 approx_kl=0.0105 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-385721.4 mean_steps=14.1
|
|
[Episode 410] reward=-52433080.8 actor_loss=0.1099 critic_loss=124657345957.6471 entropy=4.4996 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 420] reward=-47826936.5 actor_loss=0.0597 critic_loss=120730056824.4706 entropy=4.5029 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 420] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-699170.2 mean_steps=10.6
|
|
[Episode 430] reward=-41801159.8 actor_loss=0.0660 critic_loss=122075588697.0435 entropy=4.5102 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 440] reward=-51852845.5 actor_loss=0.0697 critic_loss=126575427584.0000 entropy=4.5110 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555929.7 mean_steps=12.9
|
|
[Episode 450] reward=-48562589.4 actor_loss=0.0538 critic_loss=118882009770.6667 entropy=4.5239 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 460] reward=-55234582.7 actor_loss=0.1129 critic_loss=126805633954.9091 entropy=4.5206 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-444017.8 mean_steps=15.4
|
|
[Episode 470] reward=-57283386.9 actor_loss=0.0813 critic_loss=127480594822.0952 entropy=4.5199 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 480] reward=-45270585.6 actor_loss=0.0728 critic_loss=121532391424.0000 entropy=4.5294 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-489264.6 mean_steps=12.4
|
|
[Episode 490] reward=-46300794.5 actor_loss=0.0619 critic_loss=123200349481.2903 entropy=4.5341 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 500] reward=-43910019.8 actor_loss=0.0761 critic_loss=117153194831.4483 entropy=4.5294 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-604429.2 mean_steps=11.5
|
|
[Episode 510] reward=-57350371.1 actor_loss=0.0609 critic_loss=130192431706.3529 entropy=4.5404 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 520] reward=-53688062.4 actor_loss=0.0760 critic_loss=125707627315.2000 entropy=4.5543 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320757.1 mean_steps=15.4
|
|
[Episode 530] reward=-56823920.7 actor_loss=0.0576 critic_loss=130341251229.5385 entropy=4.5522 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 540] reward=-48132661.0 actor_loss=0.0631 critic_loss=124058215936.0000 entropy=4.5613 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474459.6 mean_steps=13.2
|
|
[Episode 550] reward=-47221935.5 actor_loss=0.0753 critic_loss=124212877393.9200 entropy=4.5633 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 560] reward=-56484280.6 actor_loss=0.0670 critic_loss=128806450972.4444 entropy=4.5733 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430183.1 mean_steps=14.4
|
|
[Episode 570] reward=-49995150.6 actor_loss=0.0813 critic_loss=120400907702.8571 entropy=4.5912 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 580] reward=-66161305.0 actor_loss=0.0508 critic_loss=134859467264.0000 entropy=4.5949 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320377.0 mean_steps=15.4
|
|
[Episode 590] reward=-47910004.0 actor_loss=0.0624 critic_loss=123571607875.3684 entropy=4.6140 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 600] reward=-51850694.6 actor_loss=0.0520 critic_loss=124033825698.9091 entropy=4.6179 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 600] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-641574.6 mean_steps=11.1
|
|
[Episode 610] reward=-61692198.6 actor_loss=0.0718 critic_loss=129761947739.0222 entropy=4.6264 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 620] reward=-64394874.0 actor_loss=0.0828 critic_loss=135451265469.2174 entropy=4.6340 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436043.0 mean_steps=14.2
|
|
[Episode 630] reward=-63710481.2 actor_loss=0.0667 critic_loss=138342157880.8889 entropy=4.6394 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 640] reward=-49855998.0 actor_loss=0.0785 critic_loss=122099866009.6000 entropy=4.6499 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-338261.8 mean_steps=15.4
|
|
[Episode 650] reward=-61077476.6 actor_loss=0.0731 critic_loss=128685489902.9333 entropy=4.6632 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 660] reward=-66995918.8 actor_loss=0.0751 critic_loss=134115317880.4706 entropy=4.6737 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540761.2 mean_steps=12.4
|
|
[Episode 670] reward=-44843363.7 actor_loss=0.0564 critic_loss=122180784128.0000 entropy=4.6820 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 680] reward=-44124394.5 actor_loss=0.0982 critic_loss=121139932943.0588 entropy=4.6908 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480481.8 mean_steps=13.8
|
|
[Episode 690] reward=-50991151.3 actor_loss=0.0791 critic_loss=128689696452.9231 entropy=4.7021 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 700] reward=-57815228.2 actor_loss=0.0670 critic_loss=126640117917.5385 entropy=4.7081 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-608107.6 mean_steps=11.3
|
|
[Episode 710] reward=-57534415.2 actor_loss=0.0509 critic_loss=127584470016.0000 entropy=4.7070 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 720] reward=-53784085.8 actor_loss=0.0767 critic_loss=125846335728.9412 entropy=4.7197 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520095.2 mean_steps=13.3
|
|
[Episode 730] reward=-49154441.9 actor_loss=0.1085 critic_loss=121495267800.6154 entropy=4.7255 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 740] reward=-56145577.6 actor_loss=0.0856 critic_loss=125386126034.8235 entropy=4.7338 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426957.1 mean_steps=13.8
|
|
[Episode 750] reward=-49157921.6 actor_loss=0.0754 critic_loss=122083749515.6364 entropy=4.7397 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 760] reward=-53667293.2 actor_loss=0.0760 critic_loss=123926680462.2222 entropy=4.7559 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-599224.7 mean_steps=12.9
|
|
[Episode 770] reward=-48659685.1 actor_loss=0.0455 critic_loss=122377802043.0769 entropy=4.7593 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 780] reward=-50780690.8 actor_loss=0.0666 critic_loss=124193601693.5385 entropy=4.7639 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453661.8 mean_steps=13.6
|
|
[Episode 790] reward=-64936869.5 actor_loss=0.0943 critic_loss=135974655317.3333 entropy=4.7687 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 800] reward=-53383371.7 actor_loss=0.1071 critic_loss=123379683012.9231 entropy=4.7647 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-403145.7 mean_steps=14.9
|
|
[Episode 810] reward=-48200015.8 actor_loss=0.0662 critic_loss=122403719577.6000 entropy=4.7720 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 820] reward=-55990425.0 actor_loss=0.0658 critic_loss=128459384229.6471 entropy=4.7889 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-416908.9 mean_steps=15.6
|
|
[Episode 830] reward=-45098262.0 actor_loss=0.0649 critic_loss=120583621409.3913 entropy=4.7931 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 840] reward=-52869078.0 actor_loss=0.0775 critic_loss=124161526232.6154 entropy=4.8025 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631936.7 mean_steps=11.8
|
|
[Episode 850] reward=-55861209.1 actor_loss=0.0838 critic_loss=124002487864.8889 entropy=4.8060 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 860] reward=-60712468.0 actor_loss=0.0742 critic_loss=130732592090.0741 entropy=4.8154 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 860] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-268618.5 mean_steps=15.5
|
|
[Episode 870] reward=-44006541.8 actor_loss=0.0528 critic_loss=122740209956.5714 entropy=4.8227 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 880] reward=-54778212.9 actor_loss=0.0896 critic_loss=124946205549.7143 entropy=4.8347 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409136.3 mean_steps=14.9
|
|
[Episode 890] reward=-44683055.0 actor_loss=0.0692 critic_loss=123374749468.4444 entropy=4.8421 approx_kl=0.0098 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 900] reward=-56283325.0 actor_loss=0.0695 critic_loss=128001324373.3333 entropy=4.8477 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467749.8 mean_steps=13.7
|
|
[Episode 910] reward=-42465155.6 actor_loss=0.0484 critic_loss=120555453480.9600 entropy=4.8575 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 920] reward=-44798283.5 actor_loss=0.0716 critic_loss=117414117376.0000 entropy=4.8627 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433326.1 mean_steps=14.3
|
|
[Episode 930] reward=-53948295.1 actor_loss=0.0632 critic_loss=126252388352.0000 entropy=4.8628 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 940] reward=-44635618.6 actor_loss=0.0807 critic_loss=118977285928.4211 entropy=4.8698 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540156.2 mean_steps=13.6
|
|
[Episode 950] reward=-46682761.8 actor_loss=0.0526 critic_loss=121637163248.9412 entropy=4.8708 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 960] reward=-40770140.0 actor_loss=0.0452 critic_loss=117916856320.0000 entropy=4.8739 approx_kl=0.0098 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 960] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-217438.0 mean_steps=16.1
|
|
[Episode 970] reward=-46426240.7 actor_loss=0.0914 critic_loss=124566997219.5556 entropy=4.8816 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 980] reward=-50487443.2 actor_loss=0.0640 critic_loss=122242542411.2941 entropy=4.8845 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508337.4 mean_steps=13.1
|
|
[Episode 990] reward=-52526183.1 actor_loss=0.0500 critic_loss=123104629853.0909 entropy=4.8911 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1000] reward=-52166669.9 actor_loss=0.0536 critic_loss=127572502118.4000 entropy=4.8838 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 1000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491245.6 mean_steps=12.1
|
|
[Episode 1010] reward=-50326575.4 actor_loss=0.0552 critic_loss=121800408502.8571 entropy=4.8869 approx_kl=0.0109 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 1020] reward=-56215026.5 actor_loss=0.0718 critic_loss=131672807316.2105 entropy=4.8975 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 1020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490199.6 mean_steps=13.9
|
|
[Episode 1030] reward=-48440898.7 actor_loss=0.0376 critic_loss=123637407451.4286 entropy=4.9175 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1040] reward=-62251597.4 actor_loss=0.1034 critic_loss=134940279239.1111 entropy=4.9164 approx_kl=0.0104 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404290.5 mean_steps=15.0
|
|
[Episode 1050] reward=-51213887.9 actor_loss=0.0970 critic_loss=124047354217.4118 entropy=4.9220 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 1060] reward=-48330299.9 actor_loss=0.0660 critic_loss=120712244428.8000 entropy=4.9286 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409924.1 mean_steps=14.2
|
|
[Episode 1070] reward=-49915024.8 actor_loss=0.0772 critic_loss=123853767436.1905 entropy=4.9424 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1080] reward=-46696373.4 actor_loss=0.0532 critic_loss=121139769070.9333 entropy=4.9394 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 1080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-520272.2 mean_steps=11.8
|
|
[Episode 1090] reward=-56079126.5 actor_loss=0.0821 critic_loss=127425652872.5333 entropy=4.9511 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 1100] reward=-60148571.7 actor_loss=0.0546 critic_loss=132377950021.8182 entropy=4.9630 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 1100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-410335.3 mean_steps=13.3
|
|
[Episode 1110] reward=-55155575.2 actor_loss=0.0718 critic_loss=124707647247.0588 entropy=4.9709 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 1120] reward=-52932036.7 actor_loss=0.0595 critic_loss=128713842331.8261 entropy=4.9836 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 1120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425060.1 mean_steps=14.4
|
|
[Episode 1130] reward=-56095608.9 actor_loss=0.0730 critic_loss=127203219968.0000 entropy=4.9946 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 1140] reward=-57347510.2 actor_loss=0.1094 critic_loss=127794554880.0000 entropy=5.0051 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486891.6 mean_steps=13.1
|
|
[Episode 1150] reward=-49416311.9 actor_loss=0.0660 critic_loss=123019174518.1538 entropy=5.0136 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 1160] reward=-66028337.8 actor_loss=0.0543 critic_loss=131784529830.9565 entropy=5.0251 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 1160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-326710.3 mean_steps=15.4
|
|
[Episode 1170] reward=-53324782.0 actor_loss=0.0929 critic_loss=127227828224.0000 entropy=5.0236 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 1180] reward=-54982917.0 actor_loss=0.0686 critic_loss=127069949711.0588 entropy=5.0357 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 1180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538851.7 mean_steps=13.7
|
|
[Episode 1190] reward=-39580903.6 actor_loss=0.0661 critic_loss=112677232996.1739 entropy=5.0394 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1200] reward=-45700343.1 actor_loss=0.0529 critic_loss=124833938090.6667 entropy=5.0373 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 1200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-451248.7 mean_steps=13.1
|
|
[Episode 1210] reward=-57256411.4 actor_loss=0.1005 critic_loss=128235054019.7647 entropy=5.0403 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 1220] reward=-51509979.2 actor_loss=0.0717 critic_loss=124548089856.0000 entropy=5.0509 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 1220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448950.9 mean_steps=14.5
|
|
[Episode 1230] reward=-64815360.5 actor_loss=0.0911 critic_loss=134339378029.7143 entropy=5.0692 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 1240] reward=-49635609.3 actor_loss=0.0846 critic_loss=125225627921.0667 entropy=5.0722 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 1240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502372.5 mean_steps=12.8
|
|
[Episode 1250] reward=-44188636.0 actor_loss=0.0768 critic_loss=119677715251.2000 entropy=5.0791 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1260] reward=-49508779.6 actor_loss=0.0583 critic_loss=122989072232.2963 entropy=5.0855 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 1260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-640039.3 mean_steps=11.7
|
|
[Episode 1270] reward=-62964799.9 actor_loss=0.0928 critic_loss=131676798619.8261 entropy=5.0967 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 1280] reward=-60996379.5 actor_loss=0.0871 critic_loss=125743380187.4286 entropy=5.1112 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 1280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429937.4 mean_steps=14.2
|
|
[Episode 1290] reward=-50021769.2 actor_loss=0.0779 critic_loss=125852279125.3333 entropy=5.1176 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1300] reward=-49289072.4 actor_loss=0.0761 critic_loss=123182530244.9231 entropy=5.1326 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 1300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-381000.0 mean_steps=14.1
|
|
[Episode 1310] reward=-49804141.6 actor_loss=0.0666 critic_loss=122503423772.4444 entropy=5.1344 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 1320] reward=-54338796.6 actor_loss=0.0701 critic_loss=127756262576.5517 entropy=5.1306 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458658.6 mean_steps=14.3
|
|
[Episode 1330] reward=-63912818.8 actor_loss=0.0952 critic_loss=131328620771.5556 entropy=5.1432 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 1340] reward=-53073687.4 actor_loss=0.1006 critic_loss=126879496794.3529 entropy=5.1508 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 1340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551144.9 mean_steps=12.8
|
|
[Episode 1350] reward=-46861028.6 actor_loss=0.0559 critic_loss=122511516360.3478 entropy=5.1654 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1360] reward=-44578089.0 actor_loss=0.0746 critic_loss=116022504106.6667 entropy=5.1689 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 1360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520783.4 mean_steps=13.2
|
|
[Episode 1370] reward=-56026278.5 actor_loss=0.0594 critic_loss=124577463713.1852 entropy=5.1868 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 1380] reward=-47447530.6 actor_loss=0.0668 critic_loss=121302119046.7368 entropy=5.1956 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 1380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-445316.9 mean_steps=12.8
|
|
[Episode 1390] reward=-52044807.5 actor_loss=0.0656 critic_loss=120879124480.0000 entropy=5.2029 approx_kl=0.0095 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1400] reward=-45298107.2 actor_loss=0.0429 critic_loss=119380344504.3200 entropy=5.2079 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 1400] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-327415.5 mean_steps=14.9
|
|
[Episode 1410] reward=-60512899.8 actor_loss=0.0425 critic_loss=129239048548.1739 entropy=5.2113 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 1420] reward=-54732106.4 actor_loss=0.0673 critic_loss=128298629012.2105 entropy=5.2204 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 1420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431378.6 mean_steps=13.2
|
|
[Episode 1430] reward=-65731794.9 actor_loss=0.0639 critic_loss=136226406809.6000 entropy=5.2242 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 1440] reward=-54597841.8 actor_loss=0.1042 critic_loss=129503659874.4615 entropy=5.2385 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 1440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440524.5 mean_steps=13.5
|
|
[Episode 1450] reward=-53351059.1 actor_loss=0.0763 critic_loss=120099734621.0909 entropy=5.2444 approx_kl=0.0095 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1460] reward=-59623620.7 actor_loss=0.0671 critic_loss=131624606573.7143 entropy=5.2547 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656165.3 mean_steps=11.6
|
|
[Episode 1470] reward=-64022801.5 actor_loss=0.0777 critic_loss=132515345294.2222 entropy=5.2612 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 1480] reward=-59135802.5 actor_loss=0.1106 critic_loss=126075169611.2941 entropy=5.2651 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 1480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-424140.7 mean_steps=14.8
|
|
[Episode 1490] reward=-46993889.7 actor_loss=0.0724 critic_loss=123577821476.5714 entropy=5.2663 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 1500] reward=-57804527.2 actor_loss=0.0779 critic_loss=130188445137.4545 entropy=5.2793 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498907.3 mean_steps=12.9
|
|
[Episode 1510] reward=-41109528.5 actor_loss=0.0414 critic_loss=117799376668.4444 entropy=5.2853 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 1520] reward=-41927614.8 actor_loss=0.0666 critic_loss=120115623526.4000 entropy=5.2900 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 1520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-410926.9 mean_steps=13.1
|
|
[Episode 1530] reward=-52949221.6 actor_loss=0.0570 critic_loss=124324159488.0000 entropy=5.2922 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 1540] reward=-58050963.8 actor_loss=0.0554 critic_loss=132056897290.2400 entropy=5.2874 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-617339.9 mean_steps=12.3
|
|
[Episode 1550] reward=-49178679.7 actor_loss=0.0629 critic_loss=124195752072.5333 entropy=5.2856 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 1560] reward=-41178467.6 actor_loss=0.0828 critic_loss=117855073962.6667 entropy=5.2891 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 1560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624294.2 mean_steps=12.3
|
|
[Episode 1570] reward=-45620652.5 actor_loss=0.0723 critic_loss=116498778404.5714 entropy=5.2982 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 1580] reward=-47208027.9 actor_loss=0.0775 critic_loss=120627133644.8000 entropy=5.3023 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366150.3 mean_steps=15.2
|
|
[Episode 1590] reward=-52395285.3 actor_loss=0.0719 critic_loss=124861188778.6667 entropy=5.3067 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1600] reward=-63873217.5 actor_loss=0.0681 critic_loss=133297030680.3810 entropy=5.3096 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-576377.8 mean_steps=11.1
|
|
[Episode 1610] reward=-50191749.3 actor_loss=0.0757 critic_loss=122547012547.7647 entropy=5.3154 approx_kl=0.0109 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 1620] reward=-71984060.6 actor_loss=0.0813 critic_loss=137758454930.2857 entropy=5.3292 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 1620] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-353674.1 mean_steps=15.9
|
|
[Episode 1630] reward=-63965774.3 actor_loss=0.0757 critic_loss=128789705386.6667 entropy=5.3270 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1640] reward=-53106096.1 actor_loss=0.0545 critic_loss=127326730532.5714 entropy=5.3245 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 1640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-386520.0 mean_steps=14.0
|
|
[Episode 1650] reward=-56779038.5 actor_loss=0.0896 critic_loss=127133025219.7647 entropy=5.3231 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1660] reward=-58765442.0 actor_loss=0.0808 critic_loss=125918771712.0000 entropy=5.3227 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-682718.0 mean_steps=11.9
|
|
[Episode 1670] reward=-62053449.1 actor_loss=0.0635 critic_loss=131275133168.9412 entropy=5.3233 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 1680] reward=-49726960.2 actor_loss=0.0628 critic_loss=125194938660.5714 entropy=5.3269 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 1680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526294.4 mean_steps=12.3
|
|
[Episode 1690] reward=-48097385.5 actor_loss=0.0859 critic_loss=117175851495.6190 entropy=5.3294 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 1700] reward=-47786809.0 actor_loss=0.0706 critic_loss=121594341052.6316 entropy=5.3363 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 1700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492691.6 mean_steps=13.1
|
|
[Episode 1710] reward=-48638459.4 actor_loss=0.0655 critic_loss=120386072791.5789 entropy=5.3349 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 1720] reward=-54549017.1 actor_loss=0.0998 critic_loss=120943239168.0000 entropy=5.3462 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543718.1 mean_steps=12.8
|
|
[Episode 1730] reward=-53075255.0 actor_loss=0.0671 critic_loss=126477641318.4000 entropy=5.3427 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1740] reward=-57652317.4 actor_loss=0.0704 critic_loss=128345598882.9091 entropy=5.3445 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 1740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599723.9 mean_steps=12.3
|
|
[Episode 1750] reward=-58018890.7 actor_loss=0.0698 critic_loss=126060673347.3684 entropy=5.3630 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1760] reward=-57919425.7 actor_loss=0.0487 critic_loss=126673139939.5556 entropy=5.3669 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432778.2 mean_steps=14.1
|
|
[Episode 1770] reward=-55265240.4 actor_loss=0.0556 critic_loss=123570158871.2727 entropy=5.3771 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 1780] reward=-47803506.0 actor_loss=0.0613 critic_loss=121406901816.8889 entropy=5.3835 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 1780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-636403.3 mean_steps=11.4
|
|
[Episode 1790] reward=-61144283.8 actor_loss=0.0757 critic_loss=131039108763.8261 entropy=5.3986 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1800] reward=-53394050.9 actor_loss=0.0675 critic_loss=124766785536.0000 entropy=5.3997 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 1800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451151.7 mean_steps=14.4
|
|
[Episode 1810] reward=-55595871.3 actor_loss=0.0588 critic_loss=129518106487.4667 entropy=5.3976 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1820] reward=-48927408.8 actor_loss=0.0355 critic_loss=122573973690.1818 entropy=5.3943 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 1820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522487.4 mean_steps=12.1
|
|
[Episode 1830] reward=-64994437.7 actor_loss=0.0672 critic_loss=133571438933.3333 entropy=5.4012 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1840] reward=-53725517.7 actor_loss=0.0576 critic_loss=123661685009.0667 entropy=5.4204 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 1840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621991.6 mean_steps=11.6
|
|
[Episode 1850] reward=-51136283.9 actor_loss=0.0746 critic_loss=123396656878.9333 entropy=5.4370 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 1860] reward=-53111240.9 actor_loss=0.0679 critic_loss=120445360314.1818 entropy=5.4466 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 1860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442426.0 mean_steps=13.6
|
|
[Episode 1870] reward=-40262401.8 actor_loss=0.0463 critic_loss=118764973624.8889 entropy=5.4574 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 1880] reward=-44866848.9 actor_loss=0.0447 critic_loss=117750135739.7333 entropy=5.4596 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 1880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468068.9 mean_steps=12.7
|
|
[Episode 1890] reward=-60527872.4 actor_loss=0.0747 critic_loss=129819893564.9524 entropy=5.4649 approx_kl=0.0102 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 1900] reward=-53277331.1 actor_loss=0.0830 critic_loss=123930410188.8000 entropy=5.4668 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515893.8 mean_steps=12.3
|
|
[Episode 1910] reward=-47651528.4 actor_loss=0.0553 critic_loss=121196045251.7647 entropy=5.4698 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1920] reward=-63778981.3 actor_loss=0.0847 critic_loss=135477977088.0000 entropy=5.4792 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 1920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459036.4 mean_steps=14.0
|
|
[Episode 1930] reward=-46569590.2 actor_loss=0.0541 critic_loss=121250684245.3333 entropy=5.4925 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1940] reward=-47666215.5 actor_loss=0.0460 critic_loss=121987912499.2000 entropy=5.4944 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 1940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-557729.3 mean_steps=13.3
|
|
[Episode 1950] reward=-71290411.3 actor_loss=0.0805 critic_loss=139036095938.5600 entropy=5.4970 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 1960] reward=-45824195.3 actor_loss=0.0582 critic_loss=119961156769.6842 entropy=5.4973 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 1960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545310.8 mean_steps=11.9
|
|
[Episode 1970] reward=-62561995.8 actor_loss=0.0921 critic_loss=128791701865.4118 entropy=5.5091 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 1980] reward=-58051858.8 actor_loss=0.0787 critic_loss=126520387993.6000 entropy=5.5143 approx_kl=0.0102 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 1980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431468.1 mean_steps=13.2
|
|
[Episode 1990] reward=-65283626.6 actor_loss=0.0801 critic_loss=134127516330.6667 entropy=5.5151 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 2000] reward=-59229309.0 actor_loss=0.0681 critic_loss=127130049649.7778 entropy=5.5150 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 2000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524122.8 mean_steps=12.9
|
|
[Episode 2010] reward=-54596232.0 actor_loss=0.0764 critic_loss=120740965677.1765 entropy=5.5240 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 2020] reward=-58086157.4 actor_loss=0.0829 critic_loss=129013109760.0000 entropy=5.5275 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 2020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564077.5 mean_steps=12.5
|
|
[Episode 2030] reward=-54219025.9 actor_loss=0.0640 critic_loss=128521652410.1818 entropy=5.5349 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 2040] reward=-58614234.5 actor_loss=0.1075 critic_loss=128035455795.2000 entropy=5.5398 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 2040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484076.9 mean_steps=13.6
|
|
[Episode 2050] reward=-55922278.5 actor_loss=0.0664 critic_loss=128284885955.7647 entropy=5.5543 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 2060] reward=-55627108.2 actor_loss=0.1057 critic_loss=126244711992.8889 entropy=5.5565 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 2060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521897.2 mean_steps=13.0
|
|
[Episode 2070] reward=-59166313.8 actor_loss=0.0691 critic_loss=123912126464.0000 entropy=5.5664 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 2080] reward=-47058657.2 actor_loss=0.0472 critic_loss=120296501521.0667 entropy=5.5704 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 2080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-629397.2 mean_steps=11.3
|
|
[Episode 2090] reward=-53005608.5 actor_loss=0.0677 critic_loss=124603018519.2727 entropy=5.5808 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 2100] reward=-47790216.3 actor_loss=0.0630 critic_loss=120824129828.5714 entropy=5.5829 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 2100] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-278494.3 mean_steps=16.1
|
|
[Episode 2110] reward=-43842337.8 actor_loss=0.0559 critic_loss=115091411889.2308 entropy=5.5917 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 2120] reward=-43596699.8 actor_loss=0.0524 critic_loss=119212209018.4348 entropy=5.6008 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 2120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531036.3 mean_steps=13.1
|
|
[Episode 2130] reward=-57044803.0 actor_loss=0.0657 critic_loss=125174392711.5294 entropy=5.6090 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 2140] reward=-54491939.9 actor_loss=0.0661 critic_loss=121330810880.0000 entropy=5.6124 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 2140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-345087.8 mean_steps=14.3
|
|
[Episode 2150] reward=-49282883.8 actor_loss=0.0859 critic_loss=119696324765.5385 entropy=5.6165 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 2160] reward=-55079825.5 actor_loss=0.0533 critic_loss=125475479552.0000 entropy=5.6118 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-521655.4 mean_steps=12.1
|
|
[Episode 2170] reward=-51682578.3 actor_loss=0.0568 critic_loss=123632576804.5714 entropy=5.6120 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 2180] reward=-50020335.4 actor_loss=0.0578 critic_loss=118910512429.1765 entropy=5.6193 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 2180] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-331650.6 mean_steps=15.6
|
|
[Episode 2190] reward=-53706890.3 actor_loss=0.0842 critic_loss=122928294297.6000 entropy=5.6267 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 2200] reward=-47577051.3 actor_loss=0.0615 critic_loss=124569705403.7333 entropy=5.6298 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 2200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437280.8 mean_steps=13.8
|
|
[Episode 2210] reward=-56020259.5 actor_loss=0.0681 critic_loss=122930838771.8095 entropy=5.6392 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 2220] reward=-58043531.3 actor_loss=0.0794 critic_loss=124776720156.4444 entropy=5.6529 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 2220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399930.8 mean_steps=13.9
|
|
[Episode 2230] reward=-46026652.1 actor_loss=0.0386 critic_loss=117146306402.4615 entropy=5.6664 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 2240] reward=-43728121.1 actor_loss=0.0380 critic_loss=119278326897.7778 entropy=5.6842 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 2240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485097.6 mean_steps=13.4
|
|
[Episode 2250] reward=-46766134.2 actor_loss=0.0560 critic_loss=118027375838.6087 entropy=5.6791 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 2260] reward=-49259928.2 actor_loss=0.0653 critic_loss=122330059697.2308 entropy=5.6887 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 2260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-401076.8 mean_steps=13.9
|
|
[Episode 2270] reward=-51707862.9 actor_loss=0.0567 critic_loss=126868030532.2667 entropy=5.6916 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 2280] reward=-50917369.7 actor_loss=0.0687 critic_loss=121718516814.7692 entropy=5.7047 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 2280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493542.7 mean_steps=12.9
|
|
[Episode 2290] reward=-65662731.7 actor_loss=0.0478 critic_loss=136848515072.0000 entropy=5.7043 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 2300] reward=-52866089.3 actor_loss=0.0648 critic_loss=125284155938.1333 entropy=5.7171 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563809.8 mean_steps=12.5
|
|
[Episode 2310] reward=-59584297.9 actor_loss=0.0742 critic_loss=124862720986.0741 entropy=5.7241 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 2320] reward=-45547280.5 actor_loss=0.0509 critic_loss=115496971342.7692 entropy=5.7354 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 2320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451483.2 mean_steps=13.7
|
|
[Episode 2330] reward=-54550217.7 actor_loss=0.0734 critic_loss=120158229299.2000 entropy=5.7417 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 2340] reward=-49445964.1 actor_loss=0.0929 critic_loss=120717082996.3636 entropy=5.7452 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 2340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397963.5 mean_steps=13.9
|
|
[Episode 2350] reward=-41822733.6 actor_loss=0.0589 critic_loss=115053989888.0000 entropy=5.7522 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 2360] reward=-59186246.8 actor_loss=0.0746 critic_loss=128392165785.6000 entropy=5.7539 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 2360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505350.6 mean_steps=13.8
|
|
[Episode 2370] reward=-49223740.9 actor_loss=0.0336 critic_loss=123399961531.7333 entropy=5.7582 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 2380] reward=-48647864.6 actor_loss=0.0387 critic_loss=121061440625.7778 entropy=5.7571 approx_kl=0.0107 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 2380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425649.6 mean_steps=13.6
|
|
[Episode 2390] reward=-54606274.0 actor_loss=0.0666 critic_loss=121937474901.3333 entropy=5.7624 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 2400] reward=-47723545.6 actor_loss=0.0604 critic_loss=117984891997.0909 entropy=5.7648 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 2400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491961.3 mean_steps=13.9
|
|
[Episode 2410] reward=-41029741.9 actor_loss=0.0717 critic_loss=116818416298.6667 entropy=5.7752 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 2420] reward=-46435014.6 actor_loss=0.0582 critic_loss=117160464699.0769 entropy=5.7799 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 2420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-593909.5 mean_steps=11.2
|
|
[Episode 2430] reward=-58267478.1 actor_loss=0.0286 critic_loss=127855797452.8000 entropy=5.7922 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 2440] reward=-50833007.1 actor_loss=0.0475 critic_loss=120766527780.5714 entropy=5.8017 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 2440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446555.9 mean_steps=13.5
|
|
[Episode 2450] reward=-46418684.3 actor_loss=0.0757 critic_loss=120126363461.8182 entropy=5.8074 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 2460] reward=-47890628.2 actor_loss=0.0634 critic_loss=117143723716.9231 entropy=5.8152 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 2460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-629484.8 mean_steps=11.2
|
|
[Episode 2470] reward=-58328320.1 actor_loss=0.0764 critic_loss=127853143740.6316 entropy=5.8176 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 2480] reward=-54254777.7 actor_loss=0.0913 critic_loss=122718309814.8571 entropy=5.8253 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 2480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-636676.5 mean_steps=12.2
|
|
[Episode 2490] reward=-50251094.8 actor_loss=0.0635 critic_loss=121231186478.5455 entropy=5.8269 approx_kl=0.0102 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 2500] reward=-58822492.7 actor_loss=0.0881 critic_loss=127297187840.0000 entropy=5.8276 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 2500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467885.5 mean_steps=13.3
|
|
[Episode 2510] reward=-38812720.2 actor_loss=0.0565 critic_loss=111376931418.3529 entropy=5.8359 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 2520] reward=-57869941.1 actor_loss=0.0519 critic_loss=124164118528.0000 entropy=5.8415 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 2520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-464006.6 mean_steps=12.1
|
|
[Episode 2530] reward=-62460796.0 actor_loss=0.0894 critic_loss=129499908778.6667 entropy=5.8429 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 2540] reward=-54432812.8 actor_loss=0.0812 critic_loss=122320874797.1765 entropy=5.8556 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 2540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624755.0 mean_steps=12.2
|
|
[Episode 2550] reward=-49505362.1 actor_loss=0.0476 critic_loss=118994891124.3636 entropy=5.8700 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 2560] reward=-54998862.3 actor_loss=0.0463 critic_loss=122458572572.4444 entropy=5.8761 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 2560] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-367739.3 mean_steps=15.6
|
|
[Episode 2570] reward=-52582366.2 actor_loss=0.0723 critic_loss=117959608506.1818 entropy=5.8831 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 2580] reward=-54587067.4 actor_loss=0.0773 critic_loss=122779860992.0000 entropy=5.8940 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 2580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568072.9 mean_steps=12.8
|
|
[Episode 2590] reward=-41851441.2 actor_loss=0.0674 critic_loss=113538373252.7407 entropy=5.9008 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 2600] reward=-48176231.2 actor_loss=0.0473 critic_loss=119117508608.0000 entropy=5.9065 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-380847.9 mean_steps=14.8
|
|
[Episode 2610] reward=-58655732.5 actor_loss=0.0726 critic_loss=126780493368.8889 entropy=5.9108 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 2620] reward=-43004360.1 actor_loss=0.0494 critic_loss=114958352384.0000 entropy=5.9156 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 2620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417690.6 mean_steps=13.9
|
|
[Episode 2630] reward=-40354121.3 actor_loss=0.0533 critic_loss=113636621926.4000 entropy=5.9203 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 2640] reward=-37012942.8 actor_loss=0.0411 critic_loss=112323196928.0000 entropy=5.9232 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 2640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320326.4 mean_steps=15.1
|
|
[Episode 2650] reward=-49660120.4 actor_loss=0.0565 critic_loss=122344435712.0000 entropy=5.9187 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 2660] reward=-42574484.2 actor_loss=0.0445 critic_loss=116789840749.7143 entropy=5.9230 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 2660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-458533.6 mean_steps=12.8
|
|
[Episode 2670] reward=-44059319.9 actor_loss=0.0409 critic_loss=114987164330.6667 entropy=5.9265 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 2680] reward=-46457820.0 actor_loss=0.0458 critic_loss=117283459794.8235 entropy=5.9318 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 2680] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-648032.7 mean_steps=11.2
|
|
[Episode 2690] reward=-57763153.0 actor_loss=0.0876 critic_loss=126983837923.5556 entropy=5.9414 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 2700] reward=-64716977.9 actor_loss=0.0848 critic_loss=131380658176.0000 entropy=5.9505 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 2700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438811.7 mean_steps=14.1
|
|
[Episode 2710] reward=-57503059.3 actor_loss=0.0458 critic_loss=124915508689.4545 entropy=5.9462 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 2720] reward=-57930580.3 actor_loss=0.0817 critic_loss=126038313642.6667 entropy=5.9472 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Eval 2720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460962.9 mean_steps=13.5
|
|
[Episode 2730] reward=-52961479.1 actor_loss=0.0675 critic_loss=122551389992.4211 entropy=5.9596 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 2740] reward=-47949619.3 actor_loss=0.0713 critic_loss=118312956648.7273 entropy=5.9625 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 2740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402932.6 mean_steps=13.8
|
|
[Episode 2750] reward=-51292788.4 actor_loss=0.0786 critic_loss=118480345208.4706 entropy=5.9637 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 2760] reward=-54492448.7 actor_loss=0.0870 critic_loss=119005895611.7333 entropy=5.9701 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 2760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-379104.2 mean_steps=15.5
|
|
[Episode 2770] reward=-57085844.7 actor_loss=0.0643 critic_loss=122718410069.3333 entropy=5.9796 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 2780] reward=-48664984.5 actor_loss=0.0607 critic_loss=119746480007.5294 entropy=5.9883 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 2780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405511.7 mean_steps=13.8
|
|
[Episode 2790] reward=-48955669.3 actor_loss=0.0474 critic_loss=119445148392.7273 entropy=5.9926 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 2800] reward=-38961234.9 actor_loss=0.0374 critic_loss=113236870212.2667 entropy=5.9931 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 2800] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-364227.6 mean_steps=15.3
|
|
[Episode 2810] reward=-57940270.6 actor_loss=0.0733 critic_loss=126066181188.2667 entropy=5.9962 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 2820] reward=-50117173.3 actor_loss=0.0814 critic_loss=120104604852.7059 entropy=6.0007 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 2820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395979.6 mean_steps=14.6
|
|
[Episode 2830] reward=-60844438.6 actor_loss=0.0857 critic_loss=127108843520.0000 entropy=6.0246 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 2840] reward=-58283887.1 actor_loss=0.0590 critic_loss=124718099757.1765 entropy=6.0325 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 2840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545480.4 mean_steps=12.8
|
|
[Episode 2850] reward=-34084258.3 actor_loss=0.0320 critic_loss=106487198671.2381 entropy=6.0466 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 2860] reward=-52362116.6 actor_loss=0.0806 critic_loss=119610406619.4286 entropy=6.0573 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 2860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-455460.6 mean_steps=12.2
|
|
[Episode 2870] reward=-56619582.5 actor_loss=0.0552 critic_loss=128466644992.0000 entropy=6.0678 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 2880] reward=-47290718.3 actor_loss=0.0546 critic_loss=115665207296.0000 entropy=6.0821 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 2880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538681.1 mean_steps=13.8
|
|
[Episode 2890] reward=-49629797.4 actor_loss=0.0624 critic_loss=117889839826.8235 entropy=6.0921 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 2900] reward=-32090594.6 actor_loss=0.0472 critic_loss=108572119686.7368 entropy=6.0909 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 2900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434039.0 mean_steps=14.2
|
|
[Episode 2910] reward=-43731605.6 actor_loss=0.0562 critic_loss=113495807317.3333 entropy=6.0977 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 2920] reward=-44449244.5 actor_loss=0.0339 critic_loss=117830321421.4737 entropy=6.0995 approx_kl=0.0104 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 2920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-448672.7 mean_steps=12.9
|
|
[Episode 2930] reward=-49590450.9 actor_loss=0.0659 critic_loss=119248500736.0000 entropy=6.1080 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 2940] reward=-56025702.1 actor_loss=0.0606 critic_loss=125335429120.0000 entropy=6.1080 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 2940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615387.3 mean_steps=12.3
|
|
[Episode 2950] reward=-35760750.0 actor_loss=0.0230 critic_loss=109678961664.0000 entropy=6.1181 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 2960] reward=-42521271.9 actor_loss=0.0794 critic_loss=113062947659.2941 entropy=6.1227 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 2960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-374048.7 mean_steps=14.4
|
|
[Episode 2970] reward=-47638547.6 actor_loss=0.0500 critic_loss=117355471394.1333 entropy=6.1267 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 2980] reward=-35247320.5 actor_loss=0.0481 critic_loss=110444299729.4545 entropy=6.1248 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Eval 2980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416666.0 mean_steps=13.9
|
|
[Episode 2990] reward=-61202340.5 actor_loss=0.1199 critic_loss=124686272354.4615 entropy=6.1256 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 3000] reward=-59426021.1 actor_loss=0.0794 critic_loss=129161697280.0000 entropy=6.1394 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 3000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473269.0 mean_steps=12.8
|
|
[Episode 3010] reward=-42624893.7 actor_loss=0.0538 critic_loss=113759629824.0000 entropy=6.1427 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 3020] reward=-56448058.4 actor_loss=0.0887 critic_loss=124091035062.8571 entropy=6.1395 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 3020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430669.2 mean_steps=13.9
|
|
[Episode 3030] reward=-63240780.5 actor_loss=0.0711 critic_loss=127401769518.5455 entropy=6.1420 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 3040] reward=-50397864.1 actor_loss=0.0419 critic_loss=120702951424.0000 entropy=6.1541 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 3040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-548512.8 mean_steps=11.1
|
|
[Episode 3050] reward=-57245870.9 actor_loss=0.0663 critic_loss=122719312749.7143 entropy=6.1613 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 3060] reward=-53818835.4 actor_loss=0.0586 critic_loss=126066274759.1111 entropy=6.1628 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 3060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550324.5 mean_steps=13.2
|
|
[Episode 3070] reward=-47414869.8 actor_loss=0.0616 critic_loss=117330148010.6667 entropy=6.1739 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 3080] reward=-53149088.5 actor_loss=0.0370 critic_loss=124360134851.0476 entropy=6.1847 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 3080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500500.9 mean_steps=12.7
|
|
[Episode 3090] reward=-44432909.8 actor_loss=0.0465 critic_loss=112806987217.4545 entropy=6.2002 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 3100] reward=-49955319.0 actor_loss=0.0352 critic_loss=119212648448.0000 entropy=6.2153 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 3100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451779.6 mean_steps=13.5
|
|
[Episode 3110] reward=-53687266.1 actor_loss=0.0745 critic_loss=119840503125.3333 entropy=6.2194 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 3120] reward=-47239463.0 actor_loss=0.0408 critic_loss=115461510940.4444 entropy=6.2199 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 3120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-706909.9 mean_steps=12.0
|
|
[Episode 3130] reward=-45340952.7 actor_loss=0.0817 critic_loss=111965580083.2000 entropy=6.2208 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 3140] reward=-54171853.7 actor_loss=0.0488 critic_loss=119635074108.2353 entropy=6.2306 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 3140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422297.6 mean_steps=13.8
|
|
[Episode 3150] reward=-52882577.4 actor_loss=0.0506 critic_loss=122958419606.5882 entropy=6.2386 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 3160] reward=-58595472.1 actor_loss=0.0750 critic_loss=124771749888.0000 entropy=6.2433 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 3160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566857.6 mean_steps=11.8
|
|
[Episode 3170] reward=-58520014.6 actor_loss=0.0732 critic_loss=125803616814.5455 entropy=6.2419 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 3180] reward=-63182665.3 actor_loss=0.0844 critic_loss=126668632436.3636 entropy=6.2501 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 3180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583110.8 mean_steps=11.9
|
|
[Episode 3190] reward=-47503797.4 actor_loss=0.0655 critic_loss=118081571328.0000 entropy=6.2628 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 3200] reward=-39780198.1 actor_loss=0.0483 critic_loss=111429755426.1333 entropy=6.2696 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 3200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536027.0 mean_steps=12.2
|
|
[Episode 3210] reward=-44803312.8 actor_loss=0.0614 critic_loss=113266548105.8462 entropy=6.2777 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 3220] reward=-53544714.6 actor_loss=0.0689 critic_loss=115138734762.6667 entropy=6.2810 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 3220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490675.0 mean_steps=13.3
|
|
[Episode 3230] reward=-57258081.4 actor_loss=0.0803 critic_loss=129585683660.8000 entropy=6.2891 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 3240] reward=-50050833.8 actor_loss=0.0577 critic_loss=116924976332.8000 entropy=6.2924 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 3240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-565876.1 mean_steps=11.1
|
|
[Episode 3250] reward=-39653842.2 actor_loss=0.0321 critic_loss=108300176952.8889 entropy=6.2907 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Episode 3260] reward=-46425590.7 actor_loss=0.0359 critic_loss=117261505194.6667 entropy=6.2959 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 3260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492071.2 mean_steps=12.8
|
|
[Episode 3270] reward=-39691587.7 actor_loss=0.0431 critic_loss=114108441746.2857 entropy=6.3051 approx_kl=0.0123 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Episode 3280] reward=-45019735.9 actor_loss=0.0382 critic_loss=112090240887.4667 entropy=6.3032 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 3280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555408.2 mean_steps=11.7
|
|
[Episode 3290] reward=-44265856.6 actor_loss=0.0458 critic_loss=112551222385.7778 entropy=6.3139 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 3300] reward=-52319315.0 actor_loss=0.0614 critic_loss=118050071893.3333 entropy=6.3250 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 3300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430229.9 mean_steps=13.9
|
|
[Episode 3310] reward=-53636155.0 actor_loss=0.0872 critic_loss=121581920938.6667 entropy=6.3350 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 3320] reward=-56658939.0 actor_loss=0.0614 critic_loss=118791911424.0000 entropy=6.3427 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 3320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397291.8 mean_steps=13.8
|
|
[Episode 3330] reward=-56068302.6 actor_loss=0.0804 critic_loss=121975876096.0000 entropy=6.3473 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 3340] reward=-60021763.4 actor_loss=0.0606 critic_loss=125208241421.4737 entropy=6.3540 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 3340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504942.4 mean_steps=12.9
|
|
[Episode 3350] reward=-30047687.8 actor_loss=0.0218 critic_loss=106673061456.8421 entropy=6.3643 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0521 front_blocked=0
|
|
[Episode 3360] reward=-45272032.6 actor_loss=0.0566 critic_loss=116658438963.2000 entropy=6.3669 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 3360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-405442.1 mean_steps=13.1
|
|
[Episode 3370] reward=-50814864.8 actor_loss=0.0592 critic_loss=118146717988.5714 entropy=6.3724 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 3380] reward=-53324090.8 actor_loss=0.0492 critic_loss=125187967051.8519 entropy=6.3828 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 3380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489677.2 mean_steps=13.5
|
|
[Episode 3390] reward=-52388861.9 actor_loss=0.0487 critic_loss=122621918916.9231 entropy=6.3787 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 3400] reward=-55278852.5 actor_loss=0.0650 critic_loss=125393751691.6364 entropy=6.3820 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 3400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455186.6 mean_steps=13.6
|
|
[Episode 3410] reward=-45391066.6 actor_loss=0.0543 critic_loss=111116019712.0000 entropy=6.3895 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 3420] reward=-36785577.9 actor_loss=0.0509 critic_loss=107463867703.6522 entropy=6.3920 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 3420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-560799.0 mean_steps=10.7
|
|
[Episode 3430] reward=-47722724.3 actor_loss=0.0478 critic_loss=118618392712.5333 entropy=6.3953 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 3440] reward=-46892003.7 actor_loss=0.0544 critic_loss=115012109458.2857 entropy=6.4062 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 3440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463152.8 mean_steps=12.6
|
|
[Episode 3450] reward=-47043768.3 actor_loss=0.0613 critic_loss=110451526860.8000 entropy=6.4166 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 3460] reward=-51579551.2 actor_loss=0.0721 critic_loss=119119307532.1905 entropy=6.4131 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 3460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-407085.5 mean_steps=13.8
|
|
[Episode 3470] reward=-55085790.1 actor_loss=0.0775 critic_loss=118211030317.1765 entropy=6.4172 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 3480] reward=-55076678.0 actor_loss=0.0946 critic_loss=118782485740.3077 entropy=6.4256 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 3480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441059.5 mean_steps=13.9
|
|
[Episode 3490] reward=-54489017.5 actor_loss=0.0597 critic_loss=119989294421.3333 entropy=6.4266 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 3500] reward=-53531791.4 actor_loss=0.0669 critic_loss=117888044165.5652 entropy=6.4358 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 3500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-406010.9 mean_steps=14.7
|
|
[Episode 3510] reward=-48491302.9 actor_loss=0.0456 critic_loss=117640113590.8571 entropy=6.4471 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 3520] reward=-44532695.1 actor_loss=0.0735 critic_loss=116609380625.0667 entropy=6.4537 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 3520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439807.3 mean_steps=13.1
|
|
[Episode 3530] reward=-38767625.8 actor_loss=0.0591 critic_loss=109951504624.9412 entropy=6.4574 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Episode 3540] reward=-43278006.9 actor_loss=0.0515 critic_loss=110960192625.7778 entropy=6.4630 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 3540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621937.4 mean_steps=11.3
|
|
[Episode 3550] reward=-47163371.4 actor_loss=0.0446 critic_loss=118743236280.3200 entropy=6.4696 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 3560] reward=-40910715.5 actor_loss=0.0485 critic_loss=106206169533.2174 entropy=6.4702 approx_kl=0.0104 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 3560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-374930.8 mean_steps=12.9
|
|
[Episode 3570] reward=-37570919.2 actor_loss=0.0487 critic_loss=108285955276.8000 entropy=6.4721 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Episode 3580] reward=-51085378.0 actor_loss=0.0629 critic_loss=121155525563.7333 entropy=6.4832 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 3580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559277.2 mean_steps=12.7
|
|
[Episode 3590] reward=-52502513.6 actor_loss=0.0469 critic_loss=116523186333.5385 entropy=6.4854 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 3600] reward=-55826441.5 actor_loss=0.0758 critic_loss=122674518425.6000 entropy=6.4893 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 3600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449022.8 mean_steps=13.9
|
|
[Episode 3610] reward=-38125856.7 actor_loss=0.0506 critic_loss=106722656687.1579 entropy=6.4850 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Episode 3620] reward=-38073844.7 actor_loss=0.0349 critic_loss=108530830713.2632 entropy=6.4888 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 3620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-472334.7 mean_steps=11.7
|
|
[Episode 3630] reward=-42618015.0 actor_loss=0.0347 critic_loss=112175652408.8889 entropy=6.4963 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 3640] reward=-43191742.8 actor_loss=0.0651 critic_loss=109830565888.0000 entropy=6.5064 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 3640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-495632.5 mean_steps=11.8
|
|
[Episode 3650] reward=-47383466.5 actor_loss=0.0705 critic_loss=117081673081.2632 entropy=6.5045 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 3660] reward=-39649207.6 actor_loss=0.0675 critic_loss=107620273005.7143 entropy=6.5074 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 3660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-648225.4 mean_steps=12.1
|
|
[Episode 3670] reward=-49043173.7 actor_loss=0.0529 critic_loss=118168740352.0000 entropy=6.5059 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 3680] reward=-52227968.8 actor_loss=0.0729 critic_loss=116476282321.4545 entropy=6.5165 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 3680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503613.1 mean_steps=12.1
|
|
[Episode 3690] reward=-40531573.9 actor_loss=0.0611 critic_loss=107519662957.7143 entropy=6.5299 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 3700] reward=-50688328.8 actor_loss=0.0538 critic_loss=119736137591.4667 entropy=6.5331 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 3700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-468087.2 mean_steps=11.8
|
|
[Episode 3710] reward=-47653677.3 actor_loss=0.0651 critic_loss=110664052053.3333 entropy=6.5458 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 3720] reward=-43424391.0 actor_loss=0.0483 critic_loss=113457469597.5385 entropy=6.5473 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 3720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408694.7 mean_steps=13.6
|
|
[Episode 3730] reward=-44518119.2 actor_loss=0.0495 critic_loss=114496756035.3684 entropy=6.5445 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 3740] reward=-48647381.8 actor_loss=0.0351 critic_loss=115209555057.7778 entropy=6.5419 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 3740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512574.1 mean_steps=12.7
|
|
[Episode 3750] reward=-48731138.5 actor_loss=0.0747 critic_loss=113443507501.1765 entropy=6.5431 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 3760] reward=-40349808.6 actor_loss=0.0458 critic_loss=108165383899.4286 entropy=6.5486 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 3760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-551995.5 mean_steps=13.0
|
|
[Episode 3770] reward=-42026972.9 actor_loss=0.0578 critic_loss=109682387051.7895 entropy=6.5550 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Episode 3780] reward=-35283051.7 actor_loss=0.0580 critic_loss=103627273011.2000 entropy=6.5579 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 3780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468555.3 mean_steps=13.4
|
|
[Episode 3790] reward=-46669945.1 actor_loss=0.0668 critic_loss=113860705757.8667 entropy=6.5597 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 3800] reward=-55080020.4 actor_loss=0.0844 critic_loss=120753011097.6000 entropy=6.5545 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 3800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396893.5 mean_steps=13.7
|
|
[Episode 3810] reward=-46921455.6 actor_loss=0.0366 critic_loss=116603726116.5714 entropy=6.5586 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 3820] reward=-39506151.9 actor_loss=0.0559 critic_loss=108769107968.0000 entropy=6.5607 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 3820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419811.4 mean_steps=13.7
|
|
[Episode 3830] reward=-43703574.6 actor_loss=0.0620 critic_loss=107028484681.1429 entropy=6.5628 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 3840] reward=-57932342.5 actor_loss=0.0724 critic_loss=119913337514.6667 entropy=6.5718 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 3840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534117.1 mean_steps=12.9
|
|
[Episode 3850] reward=-33807444.5 actor_loss=0.0621 critic_loss=103986128896.0000 entropy=6.5731 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 3860] reward=-46378847.9 actor_loss=0.0565 critic_loss=113717480789.3333 entropy=6.5741 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 3860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575936.5 mean_steps=11.6
|
|
[Episode 3870] reward=-39406949.0 actor_loss=0.0325 critic_loss=105990636001.8824 entropy=6.5842 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 3880] reward=-43760499.1 actor_loss=0.0607 critic_loss=108670870089.1429 entropy=6.5825 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 3880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507901.2 mean_steps=12.8
|
|
[Episode 3890] reward=-50642926.0 actor_loss=0.0478 critic_loss=117613506560.0000 entropy=6.5799 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 3900] reward=-55541769.0 actor_loss=0.0865 critic_loss=119852528338.8235 entropy=6.5951 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 3900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468441.7 mean_steps=12.7
|
|
[Episode 3910] reward=-37368241.8 actor_loss=0.0392 critic_loss=104683205973.3333 entropy=6.6019 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 3920] reward=-47189126.6 actor_loss=0.0519 critic_loss=114136229010.2857 entropy=6.6041 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 3920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523102.6 mean_steps=12.8
|
|
[Episode 3930] reward=-47220996.9 actor_loss=0.0669 critic_loss=112632700928.0000 entropy=6.6123 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 3940] reward=-51541338.3 actor_loss=0.0625 critic_loss=117208715264.0000 entropy=6.6158 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 3940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-476620.4 mean_steps=13.9
|
|
[Episode 3950] reward=-52922633.7 actor_loss=0.0516 critic_loss=120994297856.0000 entropy=6.6179 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 3960] reward=-37885440.4 actor_loss=0.0872 critic_loss=106278359203.8400 entropy=6.6232 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 3960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-650357.5 mean_steps=11.6
|
|
[Episode 3970] reward=-43844985.5 actor_loss=0.0604 critic_loss=109098337894.4000 entropy=6.6413 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 3980] reward=-46751363.7 actor_loss=0.0478 critic_loss=113037412352.0000 entropy=6.6475 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 3980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512770.0 mean_steps=12.9
|
|
[Episode 3990] reward=-49959194.8 actor_loss=0.0685 critic_loss=112287791396.5714 entropy=6.6488 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 4000] reward=-53709250.6 actor_loss=0.0757 critic_loss=121838348846.5455 entropy=6.6645 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 4000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540568.7 mean_steps=12.1
|
|
[Episode 4010] reward=-44387063.7 actor_loss=0.0407 critic_loss=108351199555.3684 entropy=6.6752 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 4020] reward=-45058785.7 actor_loss=0.0514 critic_loss=112349548859.0769 entropy=6.6885 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 4020] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-616723.2 mean_steps=10.6
|
|
[Episode 4030] reward=-42213487.0 actor_loss=0.0498 critic_loss=109363184579.7647 entropy=6.6983 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 4040] reward=-38333612.3 actor_loss=0.0523 critic_loss=106957138602.6667 entropy=6.7041 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 4040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503556.1 mean_steps=13.8
|
|
[Episode 4050] reward=-49221765.7 actor_loss=0.0580 critic_loss=115933027328.0000 entropy=6.7146 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 4060] reward=-53088471.7 actor_loss=0.0599 critic_loss=118829641081.2632 entropy=6.7159 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 4060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472886.7 mean_steps=12.6
|
|
[Episode 4070] reward=-42815371.0 actor_loss=0.0413 critic_loss=110857685869.7143 entropy=6.7205 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 4080] reward=-49537513.4 actor_loss=0.0526 critic_loss=113138430658.2069 entropy=6.7161 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 4080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523805.1 mean_steps=12.9
|
|
[Episode 4090] reward=-56210640.0 actor_loss=0.0556 critic_loss=121238352457.1429 entropy=6.7187 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 4100] reward=-41383426.5 actor_loss=0.0539 critic_loss=109131494022.7368 entropy=6.7228 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 4100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551151.8 mean_steps=11.7
|
|
[Episode 4110] reward=-42339553.0 actor_loss=0.0777 critic_loss=110352281873.0667 entropy=6.7284 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 4120] reward=-50658983.6 actor_loss=0.0791 critic_loss=119236039601.2308 entropy=6.7328 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 4120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483880.5 mean_steps=14.6
|
|
[Episode 4130] reward=-45197143.6 actor_loss=0.0620 critic_loss=113585163972.9231 entropy=6.7345 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 4140] reward=-47814893.2 actor_loss=0.0641 critic_loss=110936360082.2857 entropy=6.7339 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 4140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484248.1 mean_steps=13.4
|
|
[Episode 4150] reward=-50353608.5 actor_loss=0.0674 critic_loss=117073419195.7333 entropy=6.7302 approx_kl=0.0098 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 4160] reward=-42011724.6 actor_loss=0.0648 critic_loss=108140103559.5294 entropy=6.7335 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 4160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-647441.8 mean_steps=11.4
|
|
[Episode 4170] reward=-48113379.2 actor_loss=0.0623 critic_loss=115764756480.0000 entropy=6.7416 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 4180] reward=-35535685.7 actor_loss=0.0550 critic_loss=104389282762.1053 entropy=6.7566 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 4180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-419661.6 mean_steps=14.8
|
|
[Episode 4190] reward=-47115924.1 actor_loss=0.0623 critic_loss=108382135235.7647 entropy=6.7659 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 4200] reward=-43627240.6 actor_loss=0.0714 critic_loss=111737792512.0000 entropy=6.7686 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Eval 4200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-343381.2 mean_steps=14.2
|
|
[Episode 4210] reward=-35767871.8 actor_loss=0.0659 critic_loss=107860114195.6923 entropy=6.7880 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 4220] reward=-35426610.6 actor_loss=0.0493 critic_loss=105116961698.9091 entropy=6.8000 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 4220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466883.9 mean_steps=13.4
|
|
[Episode 4230] reward=-45468607.9 actor_loss=0.0305 critic_loss=109137656490.6667 entropy=6.7981 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 4240] reward=-40616501.5 actor_loss=0.0598 critic_loss=111290064289.1852 entropy=6.8071 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 4240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567643.6 mean_steps=12.8
|
|
[Episode 4250] reward=-36700406.1 actor_loss=0.0456 critic_loss=105115013802.6667 entropy=6.8108 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 4260] reward=-52950249.1 actor_loss=0.0955 critic_loss=119238440870.9565 entropy=6.8195 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 4260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487673.0 mean_steps=13.8
|
|
[Episode 4270] reward=-54353672.5 actor_loss=0.0660 critic_loss=119922094614.2609 entropy=6.8245 approx_kl=0.0100 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 4280] reward=-62926198.2 actor_loss=0.0933 critic_loss=119597833808.8421 entropy=6.8344 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 4280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447973.1 mean_steps=13.2
|
|
[Episode 4290] reward=-53266139.3 actor_loss=0.0577 critic_loss=124060435742.7200 entropy=6.8346 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 4300] reward=-49594766.3 actor_loss=0.0632 critic_loss=116236689920.0000 entropy=6.8468 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 4300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-602988.0 mean_steps=12.9
|
|
[Episode 4310] reward=-48151549.2 actor_loss=0.0574 critic_loss=111418559692.8000 entropy=6.8665 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 4320] reward=-34261136.2 actor_loss=0.0521 critic_loss=102432366110.1176 entropy=6.8711 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Eval 4320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422134.7 mean_steps=14.2
|
|
[Episode 4330] reward=-39933161.9 actor_loss=0.0379 critic_loss=106697819204.2667 entropy=6.8771 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 4340] reward=-45743488.8 actor_loss=0.0630 critic_loss=107685738359.4667 entropy=6.8891 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 4340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-369061.3 mean_steps=15.3
|
|
[Episode 4350] reward=-39905560.6 actor_loss=0.0397 critic_loss=109834388626.2857 entropy=6.8931 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 4360] reward=-51576122.4 actor_loss=0.0527 critic_loss=114772284757.3333 entropy=6.9035 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 4360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617072.3 mean_steps=11.2
|
|
[Episode 4370] reward=-43625276.5 actor_loss=0.0502 critic_loss=104789771150.2222 entropy=6.9032 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 4380] reward=-51230753.4 actor_loss=0.0716 critic_loss=112499210397.5385 entropy=6.9083 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 4380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555223.4 mean_steps=12.8
|
|
[Episode 4390] reward=-33107260.2 actor_loss=0.0361 critic_loss=94135362280.7273 entropy=6.9241 approx_kl=0.0116 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 4400] reward=-40362964.9 actor_loss=0.0417 critic_loss=108512310452.7059 entropy=6.9357 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 4400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-498924.3 mean_steps=12.3
|
|
[Episode 4410] reward=-48610307.2 actor_loss=0.0708 critic_loss=108013010944.0000 entropy=6.9371 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 4420] reward=-49190934.2 actor_loss=0.0584 critic_loss=111537297817.6000 entropy=6.9402 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 4420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490586.7 mean_steps=12.7
|
|
[Episode 4430] reward=-47968519.7 actor_loss=0.0619 critic_loss=110763967692.8000 entropy=6.9350 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 4440] reward=-51020309.0 actor_loss=0.0653 critic_loss=114742361721.9048 entropy=6.9239 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 4440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-598786.1 mean_steps=12.9
|
|
[Episode 4450] reward=-45077886.7 actor_loss=0.0724 critic_loss=105649903686.6207 entropy=6.9238 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 4460] reward=-48743223.3 actor_loss=0.0814 critic_loss=109355436243.8621 entropy=6.9189 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 4460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500738.7 mean_steps=12.3
|
|
[Episode 4470] reward=-42409111.4 actor_loss=0.0745 critic_loss=104589674788.5714 entropy=6.9248 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 4480] reward=-45604314.0 actor_loss=0.0467 critic_loss=108510286714.4348 entropy=6.9329 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 4480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365616.9 mean_steps=14.1
|
|
[Episode 4490] reward=-32242113.1 actor_loss=0.0378 critic_loss=97669653065.1429 entropy=6.9468 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 4500] reward=-44378022.1 actor_loss=0.0474 critic_loss=104245972992.0000 entropy=6.9445 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Eval 4500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369279.7 mean_steps=14.2
|
|
[Episode 4510] reward=-42995482.2 actor_loss=0.0624 critic_loss=107261352345.6000 entropy=6.9569 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 4520] reward=-47714317.8 actor_loss=0.0587 critic_loss=112488953054.6087 entropy=6.9494 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 4520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-301562.8 mean_steps=14.5
|
|
[Episode 4530] reward=-44500955.4 actor_loss=0.0506 critic_loss=110204878848.0000 entropy=6.9411 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 4540] reward=-39777639.7 actor_loss=0.0354 critic_loss=103692861440.0000 entropy=6.9513 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 4540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436631.4 mean_steps=13.4
|
|
[Episode 4550] reward=-38551408.7 actor_loss=0.0536 critic_loss=103484999387.4286 entropy=6.9612 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 4560] reward=-42673608.0 actor_loss=0.0574 critic_loss=101070948059.4286 entropy=6.9569 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 4560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-405237.9 mean_steps=13.4
|
|
[Episode 4570] reward=-40216837.6 actor_loss=0.0217 critic_loss=103328939212.8000 entropy=6.9604 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Episode 4580] reward=-31179940.0 actor_loss=0.0474 critic_loss=96695013691.0769 entropy=6.9711 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 4580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486856.2 mean_steps=12.8
|
|
[Episode 4590] reward=-37336908.9 actor_loss=0.0598 critic_loss=103335691878.4000 entropy=6.9670 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 4600] reward=-41324927.2 actor_loss=0.0620 critic_loss=98337588292.2667 entropy=6.9684 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 4600] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632924.4 mean_steps=11.6
|
|
[Episode 4610] reward=-43231772.3 actor_loss=0.0506 critic_loss=106433925939.2000 entropy=6.9725 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 4620] reward=-42871138.9 actor_loss=0.0337 critic_loss=102757924233.8462 entropy=6.9776 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Eval 4620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570131.4 mean_steps=11.7
|
|
[Episode 4630] reward=-50863596.8 actor_loss=0.0634 critic_loss=111727428015.1579 entropy=6.9854 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 4640] reward=-50876695.4 actor_loss=0.0533 critic_loss=111613432048.9412 entropy=6.9904 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 4640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481505.8 mean_steps=13.3
|
|
[Episode 4650] reward=-48798128.6 actor_loss=0.0866 critic_loss=114037655931.2593 entropy=7.0016 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 4660] reward=-47721023.8 actor_loss=0.0457 critic_loss=111362887224.8889 entropy=7.0154 approx_kl=0.0108 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 4660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562780.7 mean_steps=12.2
|
|
[Episode 4670] reward=-39301069.0 actor_loss=0.0411 critic_loss=101095947059.2000 entropy=7.0268 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 4680] reward=-41532639.7 actor_loss=0.0460 critic_loss=103081277440.0000 entropy=7.0232 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Eval 4680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-541507.0 mean_steps=10.8
|
|
[Episode 4690] reward=-40666992.2 actor_loss=0.0219 critic_loss=106022557816.4706 entropy=7.0342 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 4700] reward=-46154559.2 actor_loss=0.0590 critic_loss=105462778958.7692 entropy=7.0361 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 4700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562639.2 mean_steps=12.6
|
|
[Episode 4710] reward=-53369454.1 actor_loss=0.0715 critic_loss=112623501668.1739 entropy=7.0376 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 4720] reward=-41053036.6 actor_loss=0.0346 critic_loss=104024635904.0000 entropy=7.0572 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 4720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455268.6 mean_steps=14.1
|
|
[Episode 4730] reward=-49608822.2 actor_loss=0.0391 critic_loss=107120768236.3077 entropy=7.0613 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 4740] reward=-47438341.4 actor_loss=0.0463 critic_loss=109408336359.6190 entropy=7.0616 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 4740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478428.8 mean_steps=12.6
|
|
[Episode 4750] reward=-52289509.1 actor_loss=0.0551 critic_loss=116986576440.8889 entropy=7.0673 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 4760] reward=-48853762.2 actor_loss=0.0841 critic_loss=106706575990.1538 entropy=7.0606 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 4760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531155.4 mean_steps=12.9
|
|
[Episode 4770] reward=-52065369.5 actor_loss=0.0864 critic_loss=108553281863.6800 entropy=7.0607 approx_kl=0.0108 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 4780] reward=-31800009.4 actor_loss=0.0089 critic_loss=94175110333.6296 entropy=7.0724 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0482 front_blocked=0
|
|
[Eval 4780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603480.0 mean_steps=11.0
|
|
[Episode 4790] reward=-41037827.2 actor_loss=0.0610 critic_loss=108488524322.1333 entropy=7.0742 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 4800] reward=-38625805.1 actor_loss=0.0532 critic_loss=103368153380.5714 entropy=7.0800 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 4800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509118.8 mean_steps=12.9
|
|
[Episode 4810] reward=-42656875.0 actor_loss=0.0528 critic_loss=102170587136.0000 entropy=7.0790 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 4820] reward=-54028578.6 actor_loss=0.0525 critic_loss=110746498295.1724 entropy=7.0872 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 4820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535382.0 mean_steps=12.3
|
|
[Episode 4830] reward=-42485338.6 actor_loss=0.0570 critic_loss=105365489436.4444 entropy=7.0889 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Episode 4840] reward=-43357588.3 actor_loss=0.0582 critic_loss=109607101379.7647 entropy=7.0975 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 4840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437278.3 mean_steps=13.2
|
|
[Episode 4850] reward=-27094335.5 actor_loss=0.0279 critic_loss=90305624064.0000 entropy=7.0977 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 4860] reward=-37297504.3 actor_loss=0.0487 critic_loss=103284900750.2222 entropy=7.1076 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 4860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562930.0 mean_steps=12.4
|
|
[Episode 4870] reward=-33578474.4 actor_loss=0.0364 critic_loss=98578617230.2222 entropy=7.1075 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 4880] reward=-37074771.2 actor_loss=0.0417 critic_loss=105269533809.7778 entropy=7.1054 approx_kl=0.0095 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Eval 4880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427917.1 mean_steps=13.9
|
|
[Episode 4890] reward=-47733825.2 actor_loss=0.0484 critic_loss=110619021627.0769 entropy=7.1071 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 4900] reward=-32527618.8 actor_loss=0.0424 critic_loss=90206701056.0000 entropy=7.1100 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 4900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-647884.5 mean_steps=11.3
|
|
[Episode 4910] reward=-36115541.6 actor_loss=0.0158 critic_loss=92927711524.5714 entropy=7.1135 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 4920] reward=-33663295.9 actor_loss=0.0341 critic_loss=97092248462.2222 entropy=7.1149 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 4920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424140.0 mean_steps=13.9
|
|
[Episode 4930] reward=-50912181.6 actor_loss=0.0485 critic_loss=110691682986.6667 entropy=7.1291 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 4940] reward=-53761568.0 actor_loss=0.0493 critic_loss=116669887186.8235 entropy=7.1319 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 4940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-521809.4 mean_steps=11.2
|
|
[Episode 4950] reward=-42104579.1 actor_loss=0.0406 critic_loss=103033010176.0000 entropy=7.1359 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 4960] reward=-46975734.3 actor_loss=0.0506 critic_loss=107996433066.6667 entropy=7.1396 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 4960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-581174.9 mean_steps=11.1
|
|
[Episode 4970] reward=-37881497.1 actor_loss=0.0745 critic_loss=103854886696.4211 entropy=7.1397 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 4980] reward=-41649697.1 actor_loss=0.0709 critic_loss=100195890062.2222 entropy=7.1441 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 4980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-438632.8 mean_steps=14.4
|
|
[Episode 4990] reward=-49616597.7 actor_loss=0.0633 critic_loss=110235391317.3333 entropy=7.1417 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Episode 5000] reward=-24195112.7 actor_loss=0.0098 critic_loss=82937879665.7778 entropy=7.1445 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Eval 5000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-353932.1 mean_steps=14.3
|
|
[Episode 5010] reward=-52470136.7 actor_loss=0.0580 critic_loss=113400730322.8235 entropy=7.1471 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 5020] reward=-53669501.3 actor_loss=0.0599 critic_loss=113673422060.3077 entropy=7.1567 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 5020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-613530.4 mean_steps=12.0
|
|
[Episode 5030] reward=-43084497.7 actor_loss=0.0399 critic_loss=111012861440.0000 entropy=7.1634 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 5040] reward=-46336948.0 actor_loss=0.0699 critic_loss=104315275450.1818 entropy=7.1583 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 5040] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-403494.8 mean_steps=15.3
|
|
[Episode 5050] reward=-45390790.9 actor_loss=0.0538 critic_loss=106508459212.8000 entropy=7.1635 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 5060] reward=-47981525.6 actor_loss=0.0513 critic_loss=113698722767.2381 entropy=7.1656 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 5060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530997.7 mean_steps=12.3
|
|
[Episode 5070] reward=-34881704.7 actor_loss=0.0655 critic_loss=96843842981.6471 entropy=7.1613 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 5080] reward=-45048800.2 actor_loss=0.0219 critic_loss=102706065723.0769 entropy=7.1673 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 5080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351909.1 mean_steps=14.1
|
|
[Episode 5090] reward=-41422994.0 actor_loss=0.0371 critic_loss=103941085184.0000 entropy=7.1694 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 5100] reward=-28400700.1 actor_loss=-0.0119 critic_loss=93723149365.8947 entropy=7.1713 approx_kl=0.0109 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 5100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-374246.6 mean_steps=14.6
|
|
[Episode 5110] reward=-44442230.4 actor_loss=0.0375 critic_loss=111703702291.6923 entropy=7.1761 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 5120] reward=-37664208.1 actor_loss=0.0540 critic_loss=92623304021.3333 entropy=7.1830 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 5120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395472.1 mean_steps=14.3
|
|
[Episode 5130] reward=-51521284.7 actor_loss=0.0493 critic_loss=109921604754.2857 entropy=7.1964 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Episode 5140] reward=-39754838.6 actor_loss=0.0734 critic_loss=100080946959.0588 entropy=7.2096 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 5140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560486.0 mean_steps=11.8
|
|
[Episode 5150] reward=-55133575.7 actor_loss=0.0827 critic_loss=114629162449.4545 entropy=7.2191 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 5160] reward=-44471966.7 actor_loss=0.0461 critic_loss=106819597863.3846 entropy=7.2202 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 5160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581084.7 mean_steps=11.9
|
|
[Episode 5170] reward=-40314543.3 actor_loss=0.0715 critic_loss=94880406459.7333 entropy=7.2217 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Episode 5180] reward=-47431207.7 actor_loss=0.0561 critic_loss=105966719622.7368 entropy=7.2412 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 5180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572714.9 mean_steps=11.7
|
|
[Episode 5190] reward=-38703383.6 actor_loss=0.0212 critic_loss=106933025996.8000 entropy=7.2478 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 5200] reward=-36425886.1 actor_loss=0.0386 critic_loss=100874571190.8571 entropy=7.2405 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Eval 5200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-654236.7 mean_steps=11.2
|
|
[Episode 5210] reward=-34651024.1 actor_loss=0.0434 critic_loss=93728343381.3333 entropy=7.2421 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 5220] reward=-39970723.8 actor_loss=0.0482 critic_loss=96602999229.2174 entropy=7.2496 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 5220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496123.3 mean_steps=13.1
|
|
[Episode 5230] reward=-28550601.6 actor_loss=0.0202 critic_loss=86262567634.8235 entropy=7.2572 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Episode 5240] reward=-42240233.3 actor_loss=0.0557 critic_loss=102852902502.4000 entropy=7.2587 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 5240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-418599.3 mean_steps=14.9
|
|
[Episode 5250] reward=-28995135.6 actor_loss=0.0248 critic_loss=93214982690.1333 entropy=7.2603 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 5260] reward=-32412425.5 actor_loss=0.0392 critic_loss=87690437518.2222 entropy=7.2580 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 5260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501106.3 mean_steps=13.7
|
|
[Episode 5270] reward=-48930207.3 actor_loss=0.0588 critic_loss=109211966259.2000 entropy=7.2569 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 5280] reward=-36582920.0 actor_loss=0.0313 critic_loss=93824345156.2667 entropy=7.2714 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Eval 5280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-431952.3 mean_steps=11.8
|
|
[Episode 5290] reward=-38619424.4 actor_loss=0.0345 critic_loss=100358931549.0909 entropy=7.2677 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 5300] reward=-47428037.1 actor_loss=0.0474 critic_loss=110657114646.2609 entropy=7.2621 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 5300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441754.1 mean_steps=13.8
|
|
[Episode 5310] reward=-40905313.7 actor_loss=0.0536 critic_loss=101354352399.0588 entropy=7.2585 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Episode 5320] reward=-34589554.4 actor_loss=0.0310 critic_loss=92346605999.1579 entropy=7.2715 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 5320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546852.4 mean_steps=12.3
|
|
[Episode 5330] reward=-38994966.3 actor_loss=0.0452 critic_loss=99748695153.7778 entropy=7.2729 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 5340] reward=-45762154.2 actor_loss=0.0490 critic_loss=102257220371.6923 entropy=7.2812 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 5340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447130.5 mean_steps=13.8
|
|
[Episode 5350] reward=-34969189.2 actor_loss=0.0489 critic_loss=94539388928.0000 entropy=7.2858 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 5360] reward=-35713794.3 actor_loss=0.0432 critic_loss=92964915882.6667 entropy=7.2949 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 5360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492427.5 mean_steps=12.9
|
|
[Episode 5370] reward=-38208026.5 actor_loss=0.0429 critic_loss=97194360832.0000 entropy=7.2993 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Episode 5380] reward=-35542700.7 actor_loss=0.0018 critic_loss=94836883456.0000 entropy=7.2922 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Eval 5380] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-256965.3 mean_steps=14.9
|
|
[Episode 5390] reward=-43106893.3 actor_loss=0.0661 critic_loss=102018261736.7273 entropy=7.2923 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 5400] reward=-46870739.4 actor_loss=0.0595 critic_loss=107973328310.8571 entropy=7.2943 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 5400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412554.4 mean_steps=13.8
|
|
[Episode 5410] reward=-28962234.5 actor_loss=0.0065 critic_loss=87302653831.5294 entropy=7.2953 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 5420] reward=-41106797.2 actor_loss=0.0246 critic_loss=103138975744.0000 entropy=7.3094 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 5420] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-303470.5 mean_steps=14.3
|
|
[Episode 5430] reward=-41501450.6 actor_loss=0.0296 critic_loss=99599814009.2632 entropy=7.3096 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 5440] reward=-40926784.0 actor_loss=0.0588 critic_loss=92734442373.1200 entropy=7.3132 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 5440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452337.7 mean_steps=12.7
|
|
[Episode 5450] reward=-36032444.0 actor_loss=0.0284 critic_loss=96226223445.3333 entropy=7.3155 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 5460] reward=-42149313.6 actor_loss=0.0545 critic_loss=97507800726.5882 entropy=7.3313 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Eval 5460] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-265802.5 mean_steps=15.7
|
|
[Episode 5470] reward=-46138133.0 actor_loss=0.0570 critic_loss=97107415040.0000 entropy=7.3401 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 5480] reward=-46484353.1 actor_loss=0.0410 critic_loss=104735621510.0952 entropy=7.3457 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 5480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508937.3 mean_steps=12.8
|
|
[Episode 5490] reward=-42529870.9 actor_loss=0.0646 critic_loss=101725604717.7143 entropy=7.3451 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 5500] reward=-35906228.8 actor_loss=0.0314 critic_loss=96048997888.0000 entropy=7.3632 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Eval 5500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491678.6 mean_steps=12.8
|
|
[Episode 5510] reward=-32750329.4 actor_loss=0.0188 critic_loss=93875954145.8824 entropy=7.3566 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 5520] reward=-44693498.6 actor_loss=0.0918 critic_loss=100517579044.5714 entropy=7.3549 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 5520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465890.8 mean_steps=12.7
|
|
[Episode 5530] reward=-41157960.6 actor_loss=0.0159 critic_loss=100909000996.5714 entropy=7.3615 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 5540] reward=-35588704.6 actor_loss=0.0455 critic_loss=93520530733.1765 entropy=7.3685 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 5540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472536.8 mean_steps=13.2
|
|
[Episode 5550] reward=-31635240.3 actor_loss=0.0373 critic_loss=86412585642.6667 entropy=7.3647 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 5560] reward=-40463029.2 actor_loss=0.0470 critic_loss=101927394304.0000 entropy=7.3605 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 5560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566278.0 mean_steps=12.0
|
|
[Episode 5570] reward=-29221900.6 actor_loss=0.0189 critic_loss=86750496475.4286 entropy=7.3668 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Episode 5580] reward=-37788733.0 actor_loss=0.0427 critic_loss=97602881024.0000 entropy=7.3707 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Eval 5580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476192.1 mean_steps=12.9
|
|
[Episode 5590] reward=-43827086.8 actor_loss=0.0490 critic_loss=100631571456.0000 entropy=7.3770 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Episode 5600] reward=-44929289.8 actor_loss=0.0244 critic_loss=96467804790.1538 entropy=7.3864 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Eval 5600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542701.7 mean_steps=13.1
|
|
[Episode 5610] reward=-23071356.5 actor_loss=0.0214 critic_loss=87266569898.6667 entropy=7.3946 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 5620] reward=-41671791.7 actor_loss=0.0643 critic_loss=99930396672.0000 entropy=7.3878 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Eval 5620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481594.4 mean_steps=13.9
|
|
[Episode 5630] reward=-39201721.1 actor_loss=0.0496 critic_loss=101285902677.3333 entropy=7.3894 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 5640] reward=-30542045.9 actor_loss=0.0612 critic_loss=89066162283.7895 entropy=7.3966 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Eval 5640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479792.2 mean_steps=12.6
|
|
[Episode 5650] reward=-40680090.6 actor_loss=0.0454 critic_loss=102314895496.5333 entropy=7.4085 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Episode 5660] reward=-29940841.3 actor_loss=-0.0083 critic_loss=87772470923.6364 entropy=7.4108 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0430 front_blocked=0
|
|
[Eval 5660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577277.9 mean_steps=12.0
|
|
[Episode 5670] reward=-29189498.1 actor_loss=0.0218 critic_loss=87419688773.8182 entropy=7.4206 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Episode 5680] reward=-48495525.0 actor_loss=0.0633 critic_loss=101038013952.0000 entropy=7.4137 approx_kl=0.0101 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 5680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-428554.6 mean_steps=14.4
|
|
[Episode 5690] reward=-33050995.0 actor_loss=0.0167 critic_loss=90700034340.5714 entropy=7.4202 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 5700] reward=-43504460.5 actor_loss=0.0289 critic_loss=101699960832.0000 entropy=7.4175 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 5700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-454918.7 mean_steps=11.7
|
|
[Episode 5710] reward=-24058153.7 actor_loss=0.0128 critic_loss=81062787364.5714 entropy=7.4123 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 5720] reward=-37624253.7 actor_loss=0.0266 critic_loss=98682486784.0000 entropy=7.4235 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Eval 5720] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-656796.3 mean_steps=11.0
|
|
[Episode 5730] reward=-40626505.9 actor_loss=0.0371 critic_loss=100862202675.2000 entropy=7.4278 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 5740] reward=-43676307.5 actor_loss=0.0401 critic_loss=104052700842.6667 entropy=7.4196 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Eval 5740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529569.2 mean_steps=12.4
|
|
[Episode 5750] reward=-22257509.6 actor_loss=0.0237 critic_loss=82180213097.4118 entropy=7.4297 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 5760] reward=-42520446.3 actor_loss=0.0455 critic_loss=95426211840.0000 entropy=7.4339 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Eval 5760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463234.2 mean_steps=12.9
|
|
[Episode 5770] reward=-27623684.5 actor_loss=0.0268 critic_loss=83660585041.9200 entropy=7.4275 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Episode 5780] reward=-26838559.7 actor_loss=0.0195 critic_loss=89186610614.8571 entropy=7.4263 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 5780] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-272050.8 mean_steps=15.7
|
|
[Episode 5790] reward=-39263298.7 actor_loss=0.0208 critic_loss=92085694737.0667 entropy=7.4264 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0521 front_blocked=0
|
|
[Episode 5800] reward=-32908783.4 actor_loss=0.0418 critic_loss=93112639146.6667 entropy=7.4201 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Eval 5800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-515702.5 mean_steps=14.0
|
|
[Episode 5810] reward=-44022234.2 actor_loss=0.0678 critic_loss=102152501475.5556 entropy=7.4286 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 5820] reward=-39187038.7 actor_loss=0.0374 critic_loss=89497604336.9412 entropy=7.4214 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 5820] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-702688.8 mean_steps=11.3
|
|
[Episode 5830] reward=-25937544.5 actor_loss=0.0068 critic_loss=78401654374.4000 entropy=7.4328 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 5840] reward=-38067303.0 actor_loss=0.0277 critic_loss=92032795587.7647 entropy=7.4282 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 5840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413139.3 mean_steps=14.1
|
|
[Episode 5850] reward=-39559137.7 actor_loss=0.0476 critic_loss=96149309819.2593 entropy=7.4283 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Episode 5860] reward=-33638540.1 actor_loss=0.0394 critic_loss=88226691859.6923 entropy=7.4221 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Eval 5860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610623.2 mean_steps=12.2
|
|
[Episode 5870] reward=-34340728.0 actor_loss=0.0351 critic_loss=91244279808.0000 entropy=7.4233 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 5880] reward=-43927652.6 actor_loss=0.0360 critic_loss=105792512409.6000 entropy=7.4196 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 5880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504188.1 mean_steps=13.0
|
|
[Episode 5890] reward=-27397026.2 actor_loss=0.0065 critic_loss=86152180297.1429 entropy=7.4238 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 5900] reward=-43145557.5 actor_loss=0.0218 critic_loss=98676453376.0000 entropy=7.4169 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 5900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398205.8 mean_steps=14.3
|
|
[Episode 5910] reward=-41705079.6 actor_loss=0.0437 critic_loss=97397452924.1212 entropy=7.4141 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Episode 5920] reward=-36472394.2 actor_loss=0.0572 critic_loss=96608418059.1304 entropy=7.4129 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 5920] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-647986.2 mean_steps=10.8
|
|
[Episode 5930] reward=-28516298.0 actor_loss=0.0143 critic_loss=88874126637.1765 entropy=7.4144 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Episode 5940] reward=-39861460.3 actor_loss=0.0207 critic_loss=99833944119.3513 entropy=7.4113 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Eval 5940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408703.0 mean_steps=14.0
|
|
[Episode 5950] reward=-31207970.9 actor_loss=0.0257 critic_loss=93505651671.0400 entropy=7.4184 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0521 front_blocked=0
|
|
[Episode 5960] reward=-42780751.1 actor_loss=0.0655 critic_loss=96598957116.2353 entropy=7.4310 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 5960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422318.8 mean_steps=13.7
|
|
[Episode 5970] reward=-32852108.2 actor_loss=0.0112 critic_loss=89243323050.6667 entropy=7.4351 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 5980] reward=-38692093.6 actor_loss=0.0363 critic_loss=97156612778.6667 entropy=7.4410 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 5980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-389571.0 mean_steps=12.9
|
|
[Episode 5990] reward=-30933986.9 actor_loss=0.0314 critic_loss=86752343162.8800 entropy=7.4438 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 6000] reward=-38883457.4 actor_loss=0.0386 critic_loss=100450832042.6667 entropy=7.4410 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 6000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-602914.5 mean_steps=11.2
|
|
[Episode 6010] reward=-31395397.9 actor_loss=0.0162 critic_loss=86976566916.7407 entropy=7.4485 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 6020] reward=-34390303.4 actor_loss=0.0075 critic_loss=93052884546.7826 entropy=7.4632 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 6020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485482.2 mean_steps=13.0
|
|
[Episode 6030] reward=-33312965.0 actor_loss=0.0062 critic_loss=91590429033.4118 entropy=7.4687 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0462 front_blocked=0
|
|
[Episode 6040] reward=-27987285.9 actor_loss=0.0286 critic_loss=81204585103.3600 entropy=7.4721 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Eval 6040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555843.4 mean_steps=12.0
|
|
[Episode 6050] reward=-34201237.6 actor_loss=0.0093 critic_loss=89176473600.0000 entropy=7.4781 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Episode 6060] reward=-21131427.2 actor_loss=0.0124 critic_loss=67590545889.8824 entropy=7.4971 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Eval 6060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-310951.7 mean_steps=14.1
|
|
[Episode 6070] reward=-36455413.2 actor_loss=0.0279 critic_loss=91605019852.8000 entropy=7.4923 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Episode 6080] reward=-35524526.8 actor_loss=0.0064 critic_loss=90058084592.9412 entropy=7.4977 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 6080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403873.1 mean_steps=14.0
|
|
[Episode 6090] reward=-43831668.6 actor_loss=0.0180 critic_loss=99781606400.0000 entropy=7.4948 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Episode 6100] reward=-40191030.2 actor_loss=0.0250 critic_loss=93619679016.4211 entropy=7.4904 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 6100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-386231.4 mean_steps=14.2
|
|
[Episode 6110] reward=-34820002.5 actor_loss=0.0307 critic_loss=89070279923.8095 entropy=7.4854 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 6120] reward=-37885968.7 actor_loss=0.0284 critic_loss=92328701952.0000 entropy=7.4837 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 6120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477997.1 mean_steps=13.1
|
|
[Episode 6130] reward=-42592250.1 actor_loss=0.0625 critic_loss=98753878835.2000 entropy=7.4896 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 6140] reward=-29803410.5 actor_loss=0.0165 critic_loss=81335436247.0400 entropy=7.4963 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Eval 6140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445861.4 mean_steps=13.6
|
|
[Episode 6150] reward=-42084181.4 actor_loss=0.0333 critic_loss=94698126969.9048 entropy=7.5024 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 6160] reward=-40914363.6 actor_loss=0.0373 critic_loss=100642040035.5556 entropy=7.5029 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 6160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-482123.9 mean_steps=12.2
|
|
[Episode 6170] reward=-36887030.4 actor_loss=0.0416 critic_loss=98286625398.1538 entropy=7.5069 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0645 front_blocked=0
|
|
[Episode 6180] reward=-32397531.1 actor_loss=0.0383 critic_loss=88938318701.7143 entropy=7.5043 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 6180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447717.3 mean_steps=13.6
|
|
[Episode 6190] reward=-36129926.2 actor_loss=0.0564 critic_loss=91127755434.6667 entropy=7.5052 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 6200] reward=-34865136.2 actor_loss=0.0342 critic_loss=86987157699.0476 entropy=7.5041 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 6200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462975.9 mean_steps=13.5
|
|
[Episode 6210] reward=-33435097.6 actor_loss=0.0365 critic_loss=91976889548.8000 entropy=7.5031 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 6220] reward=-47796081.6 actor_loss=0.0401 critic_loss=102629333779.6923 entropy=7.5162 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 6220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472799.3 mean_steps=13.7
|
|
[Episode 6230] reward=-43083457.2 actor_loss=0.0371 critic_loss=98651801276.6316 entropy=7.5206 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Episode 6240] reward=-50307394.6 actor_loss=0.0347 critic_loss=104896790528.0000 entropy=7.5115 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 6240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-365293.4 mean_steps=15.1
|
|
[Episode 6250] reward=-33603797.6 actor_loss=0.0070 critic_loss=85381675235.5556 entropy=7.5130 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 6260] reward=-45132411.9 actor_loss=0.0340 critic_loss=98626843852.8000 entropy=7.5152 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 6260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420019.8 mean_steps=14.8
|
|
[Episode 6270] reward=-33321509.2 actor_loss=0.0406 critic_loss=96147103416.3200 entropy=7.5167 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Episode 6280] reward=-24931790.0 actor_loss=0.0183 critic_loss=78846705254.4000 entropy=7.5294 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0449 front_blocked=0
|
|
[Eval 6280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475669.4 mean_steps=12.5
|
|
[Episode 6290] reward=-30089700.6 actor_loss=-0.0132 critic_loss=78896669789.0909 entropy=7.5276 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0410 front_blocked=0
|
|
[Episode 6300] reward=-37592214.8 actor_loss=0.0094 critic_loss=92171995015.5294 entropy=7.5306 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 6300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533278.4 mean_steps=12.2
|
|
[Episode 6310] reward=-34556832.7 actor_loss=0.0306 critic_loss=88861101494.8571 entropy=7.5328 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 6320] reward=-43314658.0 actor_loss=0.0240 critic_loss=96674429715.6923 entropy=7.5427 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Eval 6320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585794.5 mean_steps=11.8
|
|
[Episode 6330] reward=-39512885.6 actor_loss=0.0701 critic_loss=97767525338.0741 entropy=7.5514 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 6340] reward=-53031005.0 actor_loss=0.0496 critic_loss=111076375405.7143 entropy=7.5528 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 6340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498307.3 mean_steps=12.8
|
|
[Episode 6350] reward=-34003868.4 actor_loss=0.0051 critic_loss=92869079463.7241 entropy=7.5536 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Episode 6360] reward=-44761972.9 actor_loss=0.0544 critic_loss=102066769197.1765 entropy=7.5556 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 6360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471641.0 mean_steps=12.4
|
|
[Episode 6370] reward=-37917060.9 actor_loss=0.0399 critic_loss=97210897203.2000 entropy=7.5614 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 6380] reward=-37070053.7 actor_loss=0.0221 critic_loss=89787563758.9333 entropy=7.5607 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0573 front_blocked=0
|
|
[Eval 6380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571139.8 mean_steps=12.2
|
|
[Episode 6390] reward=-35719550.4 actor_loss=0.0190 critic_loss=89849304726.5882 entropy=7.5653 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Episode 6400] reward=-33968859.4 actor_loss=0.0231 critic_loss=97000644608.0000 entropy=7.5727 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 6400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427855.9 mean_steps=14.1
|
|
[Episode 6410] reward=-30171854.4 actor_loss=0.0231 critic_loss=82267458839.2727 entropy=7.5790 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 6420] reward=-40794013.1 actor_loss=0.0300 critic_loss=99655835930.4828 entropy=7.5815 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 6420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505570.1 mean_steps=13.1
|
|
[Episode 6430] reward=-49583889.0 actor_loss=0.0385 critic_loss=106517111239.1111 entropy=7.5805 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 6440] reward=-35218459.3 actor_loss=0.0486 critic_loss=87562439101.2174 entropy=7.5840 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Eval 6440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471970.0 mean_steps=13.8
|
|
[Episode 6450] reward=-38046867.8 actor_loss=0.0316 critic_loss=89233105964.5217 entropy=7.5976 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 6460] reward=-35822017.6 actor_loss=0.0141 critic_loss=88286208409.6000 entropy=7.5984 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 6460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540966.0 mean_steps=12.2
|
|
[Episode 6470] reward=-27942398.0 actor_loss=0.0198 critic_loss=76669804134.4000 entropy=7.6017 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 6480] reward=-40370688.6 actor_loss=0.0389 critic_loss=93571851776.0000 entropy=7.5888 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 6480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549281.5 mean_steps=13.2
|
|
[Episode 6490] reward=-32424125.5 actor_loss=-0.0023 critic_loss=84397062931.6923 entropy=7.5914 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 6500] reward=-46562107.7 actor_loss=0.0445 critic_loss=100972991283.2000 entropy=7.5893 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 6500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504741.9 mean_steps=13.0
|
|
[Episode 6510] reward=-38939113.9 actor_loss=0.0250 critic_loss=86227629056.0000 entropy=7.5985 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 6520] reward=-28430834.9 actor_loss=0.0100 critic_loss=76029920768.0000 entropy=7.5937 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Eval 6520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391919.6 mean_steps=14.4
|
|
[Episode 6530] reward=-32582742.1 actor_loss=0.0179 critic_loss=89385265152.0000 entropy=7.6012 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 6540] reward=-32283528.9 actor_loss=0.0135 critic_loss=86996181772.1905 entropy=7.6094 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 6540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539352.2 mean_steps=12.6
|
|
[Episode 6550] reward=-34734542.7 actor_loss=0.0399 critic_loss=92496174731.6364 entropy=7.6111 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 6560] reward=-35836799.2 actor_loss=0.0272 critic_loss=86741547300.5714 entropy=7.6157 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Eval 6560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-485022.8 mean_steps=12.1
|
|
[Episode 6570] reward=-27853951.5 actor_loss=0.0270 critic_loss=80878201969.7778 entropy=7.6143 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 6580] reward=-34652984.6 actor_loss=0.0046 critic_loss=87258732098.7826 entropy=7.6107 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Eval 6580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419611.4 mean_steps=13.0
|
|
[Episode 6590] reward=-31847154.7 actor_loss=0.0007 critic_loss=90766744780.8000 entropy=7.6211 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 6600] reward=-42182114.7 actor_loss=0.0672 critic_loss=96606093312.0000 entropy=7.6340 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Eval 6600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477270.7 mean_steps=12.8
|
|
[Episode 6610] reward=-42692364.6 actor_loss=0.0282 critic_loss=94279702648.4706 entropy=7.6344 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Episode 6620] reward=-23910786.8 actor_loss=-0.0050 critic_loss=69051456418.9091 entropy=7.6390 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Eval 6620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435772.0 mean_steps=13.1
|
|
[Episode 6630] reward=-33957646.3 actor_loss=0.0388 critic_loss=86557226871.4667 entropy=7.6478 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Episode 6640] reward=-43184256.9 actor_loss=-0.0004 critic_loss=91235792357.0526 entropy=7.6464 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Eval 6640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465726.6 mean_steps=13.5
|
|
[Episode 6650] reward=-32116698.3 actor_loss=-0.0037 critic_loss=86499865466.4348 entropy=7.6467 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Episode 6660] reward=-26032258.3 actor_loss=-0.0123 critic_loss=74538138785.6842 entropy=7.6580 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0358 front_blocked=0
|
|
[Eval 6660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465624.1 mean_steps=13.3
|
|
[Episode 6670] reward=-22483863.9 actor_loss=-0.0014 critic_loss=77097679075.5556 entropy=7.6538 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0371 front_blocked=0
|
|
[Episode 6680] reward=-42818551.9 actor_loss=0.0287 critic_loss=99404718080.0000 entropy=7.6544 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Eval 6680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-470548.0 mean_steps=14.2
|
|
[Episode 6690] reward=-34383187.0 actor_loss=0.0240 critic_loss=89864077793.8824 entropy=7.6562 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Episode 6700] reward=-36366137.1 actor_loss=0.0285 critic_loss=91750849194.6667 entropy=7.6615 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 6700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-296239.6 mean_steps=14.8
|
|
[Episode 6710] reward=-23049374.6 actor_loss=0.0016 critic_loss=75869785702.4000 entropy=7.6598 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 6720] reward=-34853263.9 actor_loss=0.0140 critic_loss=100103531724.8000 entropy=7.6681 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Eval 6720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-466741.3 mean_steps=14.2
|
|
[Episode 6730] reward=-44979186.2 actor_loss=0.0360 critic_loss=100461418905.6000 entropy=7.6672 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 6740] reward=-42626800.2 actor_loss=0.0281 critic_loss=95858683740.1600 entropy=7.6683 approx_kl=0.0095 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 6740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-291597.7 mean_steps=14.8
|
|
[Episode 6750] reward=-30951494.3 actor_loss=0.0030 critic_loss=83732540235.2941 entropy=7.6737 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Episode 6760] reward=-32855076.8 actor_loss=0.0053 critic_loss=87654625495.5789 entropy=7.6901 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Eval 6760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429792.7 mean_steps=14.1
|
|
[Episode 6770] reward=-21473278.9 actor_loss=0.0096 critic_loss=78859726356.4800 entropy=7.6988 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0430 front_blocked=0
|
|
[Episode 6780] reward=-32068778.8 actor_loss=0.0247 critic_loss=81555050736.9412 entropy=7.6919 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Eval 6780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447918.2 mean_steps=13.1
|
|
[Episode 6790] reward=-27223340.7 actor_loss=-0.0058 critic_loss=68696231772.1600 entropy=7.6967 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0384 front_blocked=0
|
|
[Episode 6800] reward=-26129540.1 actor_loss=0.0020 critic_loss=70173865149.6296 entropy=7.7051 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 6800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-634977.0 mean_steps=12.3
|
|
[Episode 6810] reward=-33538953.4 actor_loss=0.0485 critic_loss=92016050532.1739 entropy=7.7052 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Episode 6820] reward=-34728021.5 actor_loss=0.0249 critic_loss=88824585808.8421 entropy=7.7069 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Eval 6820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-639661.3 mean_steps=11.4
|
|
[Episode 6830] reward=-23584222.2 actor_loss=0.0102 critic_loss=73784012214.8571 entropy=7.7160 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0384 front_blocked=0
|
|
[Episode 6840] reward=-25801958.5 actor_loss=0.0084 critic_loss=80522181451.2941 entropy=7.7185 approx_kl=0.0104 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 6840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393613.1 mean_steps=14.6
|
|
[Episode 6850] reward=-36934741.2 actor_loss=0.0487 critic_loss=96719067648.0000 entropy=7.7100 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Episode 6860] reward=-32125133.7 actor_loss=0.0265 critic_loss=93245298551.4667 entropy=7.7146 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0488 front_blocked=0
|
|
[Eval 6860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465973.3 mean_steps=13.7
|
|
[Episode 6870] reward=-31668543.7 actor_loss=0.0152 critic_loss=88388560896.0000 entropy=7.7153 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0430 front_blocked=0
|
|
[Episode 6880] reward=-25858525.5 actor_loss=-0.0032 critic_loss=83079695701.3333 entropy=7.7192 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0482 front_blocked=0
|
|
[Eval 6880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541208.8 mean_steps=12.3
|
|
[Episode 6890] reward=-36583950.6 actor_loss=0.0270 critic_loss=89419276615.6800 entropy=7.7267 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 6900] reward=-37905296.2 actor_loss=0.0416 critic_loss=90384571671.2727 entropy=7.7328 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 6900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472827.9 mean_steps=13.9
|
|
[Episode 6910] reward=-37814967.5 actor_loss=0.0535 critic_loss=95748764765.0909 entropy=7.7389 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Episode 6920] reward=-45827141.0 actor_loss=0.0339 critic_loss=98994472401.4545 entropy=7.7396 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 6920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524308.8 mean_steps=12.6
|
|
[Episode 6930] reward=-35430306.5 actor_loss=0.0366 critic_loss=88443952206.7692 entropy=7.7502 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0521 front_blocked=0
|
|
[Episode 6940] reward=-37894227.7 actor_loss=0.0420 critic_loss=99956472410.3529 entropy=7.7516 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Eval 6940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518119.9 mean_steps=13.2
|
|
[Episode 6950] reward=-46303192.8 actor_loss=0.0373 critic_loss=96031996024.4706 entropy=7.7577 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0632 front_blocked=0
|
|
[Episode 6960] reward=-38110678.7 actor_loss=0.0151 critic_loss=91068672107.7895 entropy=7.7476 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Eval 6960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536186.3 mean_steps=12.2
|
|
[Episode 6970] reward=-29135706.8 actor_loss=0.0084 critic_loss=87902975426.5600 entropy=7.7454 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 6980] reward=-35541730.8 actor_loss=0.0183 critic_loss=91743053336.3810 entropy=7.7494 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 6980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491555.3 mean_steps=12.8
|
|
[Episode 6990] reward=-35795922.0 actor_loss=0.0285 critic_loss=82392222768.7619 entropy=7.7609 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 7000] reward=-24803324.2 actor_loss=0.0042 critic_loss=81296575977.7391 entropy=7.7768 approx_kl=0.0094 kl_stop=1 intervention_rate=0.0462 front_blocked=0
|
|
[Eval 7000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463964.6 mean_steps=13.4
|
|
[Episode 7010] reward=-34262749.1 actor_loss=0.0126 critic_loss=84738374041.6000 entropy=7.7801 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Episode 7020] reward=-34100910.2 actor_loss=-0.0003 critic_loss=82810973449.4815 entropy=7.7910 approx_kl=0.0097 kl_stop=1 intervention_rate=0.0514 front_blocked=0
|
|
[Eval 7020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519994.9 mean_steps=12.3
|
|
[Episode 7030] reward=-39029040.4 actor_loss=0.0253 critic_loss=88743752570.4348 entropy=7.7989 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 7040] reward=-38586030.8 actor_loss=0.0345 critic_loss=93919814451.2000 entropy=7.7953 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Eval 7040] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-290157.4 mean_steps=16.2
|
|
[Episode 7050] reward=-35880546.0 actor_loss=-0.0148 critic_loss=83604412825.6000 entropy=7.8003 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Episode 7060] reward=-41591517.9 actor_loss=0.0385 critic_loss=93439030613.3333 entropy=7.8164 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Eval 7060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-364637.4 mean_steps=14.2
|
|
[Episode 7070] reward=-51257471.4 actor_loss=0.0201 critic_loss=105336268390.4000 entropy=7.8146 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0638 front_blocked=0
|
|
[Episode 7080] reward=-40793769.4 actor_loss=0.0252 critic_loss=90418901967.2381 entropy=7.8137 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0625 front_blocked=0
|
|
[Eval 7080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508182.6 mean_steps=13.3
|
|
[Episode 7090] reward=-37299419.0 actor_loss=0.0323 critic_loss=90671783936.0000 entropy=7.8172 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Episode 7100] reward=-45468378.4 actor_loss=0.0454 critic_loss=93158548626.2857 entropy=7.8062 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 7100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-603172.5 mean_steps=12.8
|
|
[Episode 7110] reward=-26478284.5 actor_loss=0.0318 critic_loss=79627262439.6190 entropy=7.8142 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0462 front_blocked=0
|
|
[Episode 7120] reward=-48042824.8 actor_loss=0.0636 critic_loss=107624745369.6000 entropy=7.8102 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 7120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537460.3 mean_steps=12.2
|
|
[Episode 7130] reward=-40047239.1 actor_loss=0.0410 critic_loss=94216031280.7619 entropy=7.8079 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Episode 7140] reward=-35118720.0 actor_loss=0.0200 critic_loss=81131884953.6000 entropy=7.8129 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0521 front_blocked=0
|
|
[Eval 7140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368149.2 mean_steps=14.1
|
|
[Episode 7150] reward=-30764618.6 actor_loss=0.0122 critic_loss=80436865706.6667 entropy=7.8167 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 7160] reward=-23360059.2 actor_loss=-0.0067 critic_loss=67307012808.3478 entropy=7.8246 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0299 front_blocked=0
|
|
[Eval 7160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-670434.5 mean_steps=11.7
|
|
[Episode 7170] reward=-35821038.3 actor_loss=0.0212 critic_loss=82935180434.2857 entropy=7.8284 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0592 front_blocked=0
|
|
[Episode 7180] reward=-42231440.0 actor_loss=0.0217 critic_loss=91292667904.0000 entropy=7.8327 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Eval 7180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515066.8 mean_steps=12.9
|
|
[Episode 7190] reward=-39224634.1 actor_loss=0.0183 critic_loss=89473742740.2105 entropy=7.8325 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 7200] reward=-32149110.1 actor_loss=0.0307 critic_loss=81157589178.1818 entropy=7.8420 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 7200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541473.8 mean_steps=12.6
|
|
[Episode 7210] reward=-33618716.5 actor_loss=0.0185 critic_loss=84997126826.6667 entropy=7.8510 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Episode 7220] reward=-12776641.2 actor_loss=-0.0093 critic_loss=56276044961.6842 entropy=7.8516 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0293 front_blocked=0
|
|
[Eval 7220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492937.4 mean_steps=12.8
|
|
[Episode 7230] reward=-24933700.4 actor_loss=0.0230 critic_loss=75704752865.2800 entropy=7.8630 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 7240] reward=-44067564.7 actor_loss=0.0227 critic_loss=91190693205.3333 entropy=7.8739 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Eval 7240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465054.1 mean_steps=14.0
|
|
[Episode 7250] reward=-36956579.5 actor_loss=0.0157 critic_loss=84791832380.9524 entropy=7.8912 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Episode 7260] reward=-37743618.3 actor_loss=0.0399 critic_loss=88285828892.4444 entropy=7.8997 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0508 front_blocked=0
|
|
[Eval 7260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440572.5 mean_steps=13.9
|
|
[Episode 7270] reward=-47043608.5 actor_loss=0.0432 critic_loss=100680653619.2000 entropy=7.9053 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 7280] reward=-29953851.3 actor_loss=0.0144 critic_loss=72763897344.0000 entropy=7.9020 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0449 front_blocked=0
|
|
[Eval 7280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-448029.0 mean_steps=12.3
|
|
[Episode 7290] reward=-38683843.7 actor_loss=0.0176 critic_loss=83468118395.2593 entropy=7.9026 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 7300] reward=-19835703.5 actor_loss=-0.0136 critic_loss=65346449080.3200 entropy=7.9105 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Eval 7300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-454893.9 mean_steps=12.8
|
|
[Episode 7310] reward=-26462469.1 actor_loss=-0.0003 critic_loss=74093649547.6364 entropy=7.9357 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Episode 7320] reward=-39876317.4 actor_loss=0.0490 critic_loss=86747374861.4737 entropy=7.9507 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 7320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505470.1 mean_steps=13.2
|
|
[Episode 7330] reward=-35012463.5 actor_loss=0.0153 critic_loss=81839628769.8824 entropy=7.9527 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 7340] reward=-33918205.6 actor_loss=0.0168 critic_loss=81550231738.1818 entropy=7.9547 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 7340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642262.5 mean_steps=12.4
|
|
[Episode 7350] reward=-28717230.2 actor_loss=-0.0162 critic_loss=73171586885.8182 entropy=7.9599 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0397 front_blocked=0
|
|
[Episode 7360] reward=-32083744.6 actor_loss=0.0187 critic_loss=84348309012.4800 entropy=7.9686 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Eval 7360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-343862.7 mean_steps=13.5
|
|
[Episode 7370] reward=-28046336.6 actor_loss=0.0222 critic_loss=75782035712.0000 entropy=7.9670 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 7380] reward=-29909341.1 actor_loss=0.0086 critic_loss=78374969002.6667 entropy=7.9695 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0423 front_blocked=0
|
|
[Eval 7380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498937.2 mean_steps=12.8
|
|
[Episode 7390] reward=-32421860.8 actor_loss=0.0063 critic_loss=78671541114.4348 entropy=7.9662 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 7400] reward=-25560439.3 actor_loss=0.0102 critic_loss=72178852608.0000 entropy=7.9573 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Eval 7400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387895.7 mean_steps=13.7
|
|
[Episode 7410] reward=-28309100.6 actor_loss=0.0167 critic_loss=72611507833.9048 entropy=7.9530 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0482 front_blocked=0
|
|
[Episode 7420] reward=-25046132.2 actor_loss=0.0129 critic_loss=69913501013.3333 entropy=7.9540 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0449 front_blocked=0
|
|
[Eval 7420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468373.8 mean_steps=13.6
|
|
[Episode 7430] reward=-26104329.3 actor_loss=0.0178 critic_loss=79890094762.6667 entropy=7.9496 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 7440] reward=-33844953.1 actor_loss=0.0230 critic_loss=83546844293.5652 entropy=7.9480 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0566 front_blocked=0
|
|
[Eval 7440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508146.7 mean_steps=12.3
|
|
[Episode 7450] reward=-25031720.7 actor_loss=-0.0132 critic_loss=71955815201.3913 entropy=7.9593 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 7460] reward=-27150630.6 actor_loss=-0.0189 critic_loss=65730727936.0000 entropy=7.9615 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Eval 7460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575938.0 mean_steps=12.2
|
|
[Episode 7470] reward=-40261537.6 actor_loss=0.0467 critic_loss=88170492723.2000 entropy=7.9793 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Episode 7480] reward=-27060374.0 actor_loss=0.0052 critic_loss=76146750557.0909 entropy=7.9896 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Eval 7480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525444.6 mean_steps=12.6
|
|
[Episode 7490] reward=-27567698.6 actor_loss=0.0088 critic_loss=72090302374.9565 entropy=7.9852 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Episode 7500] reward=-22545330.7 actor_loss=-0.0190 critic_loss=66091407415.3513 entropy=7.9987 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0326 front_blocked=0
|
|
[Eval 7500] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-295258.5 mean_steps=16.1
|
|
[Episode 7510] reward=-41634400.3 actor_loss=0.0382 critic_loss=90521949915.4286 entropy=8.0136 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0553 front_blocked=0
|
|
[Episode 7520] reward=-29510532.8 actor_loss=0.0014 critic_loss=81991168178.0870 entropy=8.0213 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Eval 7520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592432.4 mean_steps=12.5
|
|
[Episode 7530] reward=-26347656.1 actor_loss=-0.0000 critic_loss=67573191580.9032 entropy=8.0217 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0443 front_blocked=0
|
|
[Episode 7540] reward=-19122036.3 actor_loss=-0.0289 critic_loss=59744544768.0000 entropy=8.0297 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Eval 7540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524288.9 mean_steps=13.2
|
|
[Episode 7550] reward=-24113566.9 actor_loss=-0.0205 critic_loss=71961015773.8667 entropy=8.0417 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0358 front_blocked=0
|
|
[Episode 7560] reward=-22283175.1 actor_loss=-0.0109 critic_loss=68584081162.2400 entropy=8.0618 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0352 front_blocked=0
|
|
[Eval 7560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511301.4 mean_steps=13.3
|
|
[Episode 7570] reward=-22800790.5 actor_loss=-0.0275 critic_loss=67526619487.0857 entropy=8.0784 approx_kl=0.0099 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Episode 7580] reward=-18624578.3 actor_loss=-0.0140 critic_loss=60587091037.0909 entropy=8.0707 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0286 front_blocked=0
|
|
[Eval 7580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623222.8 mean_steps=11.5
|
|
[Episode 7590] reward=-30398635.7 actor_loss=0.0022 critic_loss=78923676876.8000 entropy=8.0605 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 7600] reward=-34950426.6 actor_loss=0.0361 critic_loss=85991258574.4516 entropy=8.0656 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 7600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574884.5 mean_steps=12.8
|
|
[Episode 7610] reward=-29536317.0 actor_loss=-0.0010 critic_loss=77492425950.6087 entropy=8.0769 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Episode 7620] reward=-37529258.2 actor_loss=0.0162 critic_loss=86075223900.1600 entropy=8.0792 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Eval 7620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442138.6 mean_steps=13.4
|
|
[Episode 7630] reward=-21303922.4 actor_loss=-0.0105 critic_loss=60732761216.0000 entropy=8.0835 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0397 front_blocked=0
|
|
[Episode 7640] reward=-17626763.4 actor_loss=-0.0183 critic_loss=53997550884.5714 entropy=8.0896 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0280 front_blocked=0
|
|
[Eval 7640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464109.5 mean_steps=13.4
|
|
[Episode 7650] reward=-33646433.6 actor_loss=0.0495 critic_loss=79238730020.5714 entropy=8.0952 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0560 front_blocked=0
|
|
[Episode 7660] reward=-33943887.0 actor_loss=0.0418 critic_loss=82928769536.0000 entropy=8.0883 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 7660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500222.1 mean_steps=12.9
|
|
[Episode 7670] reward=-22583286.6 actor_loss=-0.0131 critic_loss=69673700966.4000 entropy=8.0871 approx_kl=0.0103 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 7680] reward=-53802968.9 actor_loss=0.0532 critic_loss=103285227847.6800 entropy=8.0979 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 7680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562413.6 mean_steps=12.8
|
|
[Episode 7690] reward=-23081250.8 actor_loss=-0.0056 critic_loss=65962829960.5333 entropy=8.1057 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Episode 7700] reward=-29244044.0 actor_loss=0.0141 critic_loss=80772514107.0769 entropy=8.1189 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 7700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585207.1 mean_steps=12.2
|
|
[Episode 7710] reward=-39917080.2 actor_loss=0.0110 critic_loss=81995062905.9048 entropy=8.1232 approx_kl=0.0090 kl_stop=1 intervention_rate=0.0579 front_blocked=0
|
|
[Episode 7720] reward=-38594248.7 actor_loss=-0.0075 critic_loss=88281879605.8947 entropy=8.1339 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 7720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534554.6 mean_steps=13.2
|
|
[Episode 7730] reward=-32961712.7 actor_loss=0.0311 critic_loss=85284874825.1429 entropy=8.1397 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0540 front_blocked=0
|
|
[Episode 7740] reward=-33718691.6 actor_loss=0.0080 critic_loss=76255669381.5652 entropy=8.1564 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 7740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-389970.6 mean_steps=14.1
|
|
[Episode 7750] reward=-26665162.3 actor_loss=0.0111 critic_loss=71132286976.0000 entropy=8.1624 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0443 front_blocked=0
|
|
[Episode 7760] reward=-19953789.3 actor_loss=-0.0497 critic_loss=44866333062.0952 entropy=8.1818 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0247 front_blocked=0
|
|
[Eval 7760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-479770.3 mean_steps=11.2
|
|
[Episode 7770] reward=-23023341.9 actor_loss=-0.0291 critic_loss=67598920735.0303 entropy=8.1954 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Episode 7780] reward=-25239755.8 actor_loss=-0.0332 critic_loss=64302215331.8400 entropy=8.2079 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Eval 7780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482090.4 mean_steps=13.8
|
|
[Episode 7790] reward=-27880961.3 actor_loss=0.0149 critic_loss=73964929024.0000 entropy=8.2248 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 7800] reward=-32466538.0 actor_loss=0.0295 critic_loss=78094627761.2308 entropy=8.2330 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0534 front_blocked=0
|
|
[Eval 7800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454650.6 mean_steps=13.9
|
|
[Episode 7810] reward=-29583733.1 actor_loss=0.0112 critic_loss=67715700628.2105 entropy=8.2475 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0462 front_blocked=0
|
|
[Episode 7820] reward=-41469700.1 actor_loss=0.0295 critic_loss=85712469032.9600 entropy=8.2413 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0547 front_blocked=0
|
|
[Eval 7820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423996.6 mean_steps=13.9
|
|
[Episode 7830] reward=-26043723.7 actor_loss=0.0029 critic_loss=64650591717.0526 entropy=8.2434 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Episode 7840] reward=-18664254.3 actor_loss=-0.0291 critic_loss=55241740288.0000 entropy=8.2408 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0312 front_blocked=0
|
|
[Eval 7840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-392560.7 mean_steps=13.8
|
|
[Episode 7850] reward=-25978932.0 actor_loss=0.0139 critic_loss=65860530043.8710 entropy=8.2416 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Episode 7860] reward=-29046121.0 actor_loss=0.0348 critic_loss=71651179297.3913 entropy=8.2629 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0482 front_blocked=0
|
|
[Eval 7860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561554.2 mean_steps=12.6
|
|
[Episode 7870] reward=-27826612.3 actor_loss=0.0235 critic_loss=65945781248.0000 entropy=8.2735 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 7880] reward=-21536375.1 actor_loss=-0.0080 critic_loss=53047293269.3333 entropy=8.2822 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Eval 7880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567903.8 mean_steps=11.9
|
|
[Episode 7890] reward=-29419736.1 actor_loss=0.0318 critic_loss=73572013093.9259 entropy=8.3041 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 7900] reward=-31056284.2 actor_loss=0.0164 critic_loss=78791384268.8000 entropy=8.3146 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 7900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-471932.7 mean_steps=14.5
|
|
[Episode 7910] reward=-26302997.0 actor_loss=0.0137 critic_loss=65013919744.0000 entropy=8.3124 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Episode 7920] reward=-14480361.1 actor_loss=-0.0555 critic_loss=36171513675.2941 entropy=8.3229 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0202 front_blocked=0
|
|
[Eval 7920] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-342221.9 mean_steps=15.7
|
|
[Episode 7930] reward=-31180898.8 actor_loss=0.0265 critic_loss=69631028155.7333 entropy=8.3316 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Episode 7940] reward=-44793807.2 actor_loss=0.0241 critic_loss=88034101930.6667 entropy=8.3401 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0605 front_blocked=0
|
|
[Eval 7940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482165.2 mean_steps=13.1
|
|
[Episode 7950] reward=-29165012.3 actor_loss=-0.0030 critic_loss=65390284957.5385 entropy=8.3500 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Episode 7960] reward=-39219194.5 actor_loss=0.0231 critic_loss=77452496896.0000 entropy=8.3432 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0501 front_blocked=0
|
|
[Eval 7960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-593985.4 mean_steps=11.4
|
|
[Episode 7970] reward=-16424661.7 actor_loss=-0.0371 critic_loss=58485897216.0000 entropy=8.3433 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0260 front_blocked=0
|
|
[Episode 7980] reward=-16309258.6 actor_loss=0.0197 critic_loss=62335942656.0000 entropy=8.3500 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0332 front_blocked=0
|
|
[Eval 7980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505899.2 mean_steps=12.7
|
|
[Episode 7990] reward=-20185445.3 actor_loss=-0.0317 critic_loss=58218552173.7143 entropy=8.3372 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0273 front_blocked=0
|
|
[Episode 8000] reward=-16921059.3 actor_loss=-0.0430 critic_loss=42536458093.7143 entropy=8.3399 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0267 front_blocked=0
|
|
[Eval 8000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450909.0 mean_steps=14.1
|
|
[Episode 8010] reward=-18205794.9 actor_loss=-0.0317 critic_loss=52087806244.5714 entropy=8.3616 approx_kl=0.0078 kl_stop=1 intervention_rate=0.0273 front_blocked=0
|
|
[Episode 8020] reward=-25655271.1 actor_loss=-0.0050 critic_loss=63238401365.3333 entropy=8.3635 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0378 front_blocked=0
|
|
[Eval 8020] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-274924.0 mean_steps=16.3
|
|
[Episode 8030] reward=-25981034.7 actor_loss=-0.0228 critic_loss=63359873272.2424 entropy=8.3782 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 8040] reward=-18563212.0 actor_loss=-0.0329 critic_loss=58495291245.7143 entropy=8.3727 approx_kl=0.0085 kl_stop=1 intervention_rate=0.0306 front_blocked=0
|
|
[Eval 8040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535442.2 mean_steps=12.7
|
|
[Episode 8050] reward=-18267928.5 actor_loss=-0.0468 critic_loss=50708337095.1111 entropy=8.3927 approx_kl=0.0104 kl_stop=1 intervention_rate=0.0267 front_blocked=0
|
|
[Episode 8060] reward=-34122205.1 actor_loss=0.0115 critic_loss=74310698507.3778 entropy=8.3996 approx_kl=0.0080 kl_stop=0 intervention_rate=0.0482 front_blocked=0
|
|
[Eval 8060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-581912.3 mean_steps=11.3
|
|
[Episode 8070] reward=-8322793.1 actor_loss=-0.0632 critic_loss=30975172969.4118 entropy=8.4030 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0143 front_blocked=0
|
|
[Episode 8080] reward=-35719643.7 actor_loss=-0.0017 critic_loss=80926218649.6000 entropy=8.4189 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Eval 8080] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-336419.5 mean_steps=16.4
|
|
[Episode 8090] reward=-28594598.6 actor_loss=-0.0187 critic_loss=67785433460.3636 entropy=8.4223 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Episode 8100] reward=-34773255.6 actor_loss=-0.0121 critic_loss=71260649851.2593 entropy=8.4149 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0482 front_blocked=0
|
|
[Eval 8100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420619.0 mean_steps=13.7
|
|
[Episode 8110] reward=-22022915.0 actor_loss=-0.0211 critic_loss=62997026570.2400 entropy=8.4264 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 8120] reward=-27888935.2 actor_loss=0.0094 critic_loss=64403449540.9231 entropy=8.4314 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0443 front_blocked=0
|
|
[Eval 8120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-635283.9 mean_steps=12.8
|
|
[Episode 8130] reward=-19538824.9 actor_loss=-0.0045 critic_loss=61933969221.8182 entropy=8.4314 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Episode 8140] reward=-17342524.6 actor_loss=-0.0543 critic_loss=44963881216.0000 entropy=8.4348 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0215 front_blocked=0
|
|
[Eval 8140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429090.5 mean_steps=15.0
|
|
[Episode 8150] reward=-31237419.7 actor_loss=0.0261 critic_loss=73458438144.0000 entropy=8.4325 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0443 front_blocked=0
|
|
[Episode 8160] reward=-27939327.1 actor_loss=-0.0058 critic_loss=63420657859.0476 entropy=8.4483 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0397 front_blocked=0
|
|
[Eval 8160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553749.6 mean_steps=14.2
|
|
[Episode 8170] reward=-14960170.2 actor_loss=-0.0088 critic_loss=46374673302.0690 entropy=8.4547 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0286 front_blocked=0
|
|
[Episode 8180] reward=-20986423.2 actor_loss=-0.0111 critic_loss=55230341283.8400 entropy=8.4501 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0352 front_blocked=0
|
|
[Eval 8180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534700.7 mean_steps=12.6
|
|
[Episode 8190] reward=-26440111.5 actor_loss=-0.0155 critic_loss=60424018033.7778 entropy=8.4518 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Episode 8200] reward=-21722363.9 actor_loss=-0.0012 critic_loss=59233341591.7037 entropy=8.4644 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0352 front_blocked=0
|
|
[Eval 8200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435323.1 mean_steps=13.3
|
|
[Episode 8210] reward=-29184775.3 actor_loss=0.0216 critic_loss=70030824130.2069 entropy=8.4636 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0436 front_blocked=0
|
|
[Episode 8220] reward=-28533637.0 actor_loss=-0.0054 critic_loss=69324010782.7200 entropy=8.4636 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0443 front_blocked=0
|
|
[Eval 8220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-417823.7 mean_steps=16.4
|
|
[Episode 8230] reward=-41258439.4 actor_loss=0.0131 critic_loss=80448637466.9474 entropy=8.4713 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0618 front_blocked=0
|
|
[Episode 8240] reward=-31175078.2 actor_loss=-0.0026 critic_loss=73977853952.0000 entropy=8.4905 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Eval 8240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534028.5 mean_steps=13.4
|
|
[Episode 8250] reward=-40495730.4 actor_loss=0.0340 critic_loss=83510653486.5455 entropy=8.4998 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 8260] reward=-20760714.1 actor_loss=-0.0178 critic_loss=55077426135.0400 entropy=8.5079 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0312 front_blocked=0
|
|
[Eval 8260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463094.6 mean_steps=13.9
|
|
[Episode 8270] reward=-29410082.0 actor_loss=0.0067 critic_loss=64075541248.0000 entropy=8.5228 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0404 front_blocked=0
|
|
[Episode 8280] reward=-25578914.0 actor_loss=-0.0103 critic_loss=66306817267.8095 entropy=8.5331 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0371 front_blocked=0
|
|
[Eval 8280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360783.3 mean_steps=15.5
|
|
[Episode 8290] reward=-23499586.9 actor_loss=-0.0137 critic_loss=56929071816.3478 entropy=8.5486 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 8300] reward=-25926711.3 actor_loss=-0.0117 critic_loss=63475360699.7333 entropy=8.5878 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0358 front_blocked=0
|
|
[Eval 8300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579496.7 mean_steps=12.3
|
|
[Episode 8310] reward=-19320631.4 actor_loss=-0.0284 critic_loss=52519879262.8148 entropy=8.5960 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 8320] reward=-21523214.7 actor_loss=-0.0286 critic_loss=64643083410.2857 entropy=8.6073 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0306 front_blocked=0
|
|
[Eval 8320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548379.4 mean_steps=13.2
|
|
[Episode 8330] reward=-23834223.0 actor_loss=0.0098 critic_loss=56965057050.9474 entropy=8.6192 approx_kl=0.0093 kl_stop=1 intervention_rate=0.0358 front_blocked=0
|
|
[Episode 8340] reward=-23542083.1 actor_loss=-0.0073 critic_loss=62788907648.0000 entropy=8.6240 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0384 front_blocked=0
|
|
[Eval 8340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506798.8 mean_steps=12.6
|
|
[Episode 8350] reward=-22071095.9 actor_loss=-0.0070 critic_loss=55712935454.1176 entropy=8.6397 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 8360] reward=-25874592.8 actor_loss=0.0371 critic_loss=67797997410.4615 entropy=8.6377 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0495 front_blocked=0
|
|
[Eval 8360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535068.0 mean_steps=12.8
|
|
[Episode 8370] reward=-12599252.0 actor_loss=-0.0491 critic_loss=44719575176.5333 entropy=8.6504 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0221 front_blocked=0
|
|
[Episode 8380] reward=-26250946.9 actor_loss=0.0059 critic_loss=55911788228.9231 entropy=8.6628 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Eval 8380] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-278071.0 mean_steps=16.5
|
|
[Episode 8390] reward=-21760907.0 actor_loss=-0.0260 critic_loss=50577546333.0909 entropy=8.6787 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0280 front_blocked=0
|
|
[Episode 8400] reward=-9957410.5 actor_loss=-0.0408 critic_loss=37559105588.5128 entropy=8.6836 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0228 front_blocked=0
|
|
[Eval 8400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457385.5 mean_steps=13.1
|
|
[Episode 8410] reward=-24864099.6 actor_loss=-0.0162 critic_loss=58983809536.0000 entropy=8.6896 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0378 front_blocked=0
|
|
[Episode 8420] reward=-26173783.4 actor_loss=0.0055 critic_loss=68154297250.9091 entropy=8.7035 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0469 front_blocked=0
|
|
[Eval 8420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504548.3 mean_steps=13.7
|
|
[Episode 8430] reward=-20181998.7 actor_loss=-0.0125 critic_loss=51481653604.1739 entropy=8.7196 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 8440] reward=-12386749.7 actor_loss=-0.0394 critic_loss=44471333914.2564 entropy=8.7331 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0247 front_blocked=0
|
|
[Eval 8440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598256.8 mean_steps=12.2
|
|
[Episode 8450] reward=-22090853.4 actor_loss=-0.0135 critic_loss=57380124779.7895 entropy=8.7506 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0312 front_blocked=0
|
|
[Episode 8460] reward=-21409841.7 actor_loss=-0.0373 critic_loss=54124379008.0000 entropy=8.7636 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0286 front_blocked=0
|
|
[Eval 8460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430811.7 mean_steps=14.8
|
|
[Episode 8470] reward=-21461208.8 actor_loss=0.0046 critic_loss=65589036646.4000 entropy=8.7921 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0391 front_blocked=0
|
|
[Episode 8480] reward=-18034110.2 actor_loss=-0.0303 critic_loss=39851097115.6757 entropy=8.8067 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0260 front_blocked=0
|
|
[Eval 8480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510743.7 mean_steps=13.6
|
|
[Episode 8490] reward=-23247141.6 actor_loss=-0.0343 critic_loss=51358159394.1333 entropy=8.8107 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0339 front_blocked=0
|
|
[Episode 8500] reward=-14260034.7 actor_loss=-0.0457 critic_loss=34882207467.2432 entropy=8.8218 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0241 front_blocked=0
|
|
[Eval 8500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559660.5 mean_steps=12.2
|
|
[Episode 8510] reward=-29223672.7 actor_loss=-0.0135 critic_loss=61328727276.3077 entropy=8.8368 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0384 front_blocked=0
|
|
[Episode 8520] reward=-9394460.9 actor_loss=-0.0536 critic_loss=34146214348.8000 entropy=8.8420 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0202 front_blocked=0
|
|
[Eval 8520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491902.4 mean_steps=12.7
|
|
[Episode 8530] reward=-26185256.4 actor_loss=0.0064 critic_loss=62632846525.6296 entropy=8.8503 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0410 front_blocked=0
|
|
[Episode 8540] reward=-14255619.3 actor_loss=-0.0566 critic_loss=40321226524.4444 entropy=8.8600 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0189 front_blocked=0
|
|
[Eval 8540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-413990.9 mean_steps=13.8
|
|
[Episode 8550] reward=-26369421.2 actor_loss=-0.0204 critic_loss=63893770098.7586 entropy=8.8637 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 8560] reward=-27839248.3 actor_loss=0.0310 critic_loss=58218341242.4348 entropy=8.8776 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0417 front_blocked=0
|
|
[Eval 8560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502767.6 mean_steps=12.8
|
|
[Episode 8570] reward=-14617984.0 actor_loss=-0.0114 critic_loss=42352775450.4828 entropy=8.8907 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0332 front_blocked=0
|
|
[Episode 8580] reward=-15016245.4 actor_loss=-0.0305 critic_loss=41652868313.2121 entropy=8.8937 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0247 front_blocked=0
|
|
[Eval 8580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452749.8 mean_steps=14.2
|
|
[Episode 8590] reward=-22512699.8 actor_loss=-0.0154 critic_loss=52731660363.8519 entropy=8.9150 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0312 front_blocked=0
|
|
[Episode 8600] reward=-15131396.5 actor_loss=-0.0507 critic_loss=43879938366.5778 entropy=8.9104 approx_kl=0.0089 kl_stop=0 intervention_rate=0.0247 front_blocked=0
|
|
[Eval 8600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567516.4 mean_steps=13.2
|
|
[Episode 8610] reward=-12118990.1 actor_loss=-0.0408 critic_loss=47189263883.3778 entropy=8.9276 approx_kl=0.0095 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Episode 8620] reward=-11032194.9 actor_loss=-0.0515 critic_loss=37929962968.6154 entropy=8.9501 approx_kl=0.0089 kl_stop=1 intervention_rate=0.0208 front_blocked=0
|
|
[Eval 8620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493966.2 mean_steps=13.4
|
|
[Episode 8630] reward=-14879650.7 actor_loss=-0.0272 critic_loss=42602328064.0000 entropy=8.9679 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0247 front_blocked=0
|
|
[Episode 8640] reward=-11577973.7 actor_loss=-0.0556 critic_loss=32047282113.9394 entropy=8.9734 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0182 front_blocked=0
|
|
[Eval 8640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402560.7 mean_steps=15.7
|
|
[Episode 8650] reward=-19620241.5 actor_loss=-0.0154 critic_loss=59616276206.9333 entropy=8.9950 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0358 front_blocked=0
|
|
[Episode 8660] reward=-21438609.7 actor_loss=-0.0068 critic_loss=52490819447.4667 entropy=9.0212 approx_kl=0.0085 kl_stop=0 intervention_rate=0.0352 front_blocked=0
|
|
[Eval 8660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-448110.0 mean_steps=13.2
|
|
[Episode 8670] reward=-11524398.0 actor_loss=-0.0569 critic_loss=34261749942.0444 entropy=9.0413 approx_kl=0.0072 kl_stop=0 intervention_rate=0.0182 front_blocked=0
|
|
[Episode 8680] reward=-11969960.4 actor_loss=-0.0320 critic_loss=38155318303.0303 entropy=9.0619 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0241 front_blocked=0
|
|
[Eval 8680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502015.7 mean_steps=13.4
|
|
[Episode 8690] reward=-12625665.0 actor_loss=-0.0375 critic_loss=33854818021.5172 entropy=9.0814 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 8700] reward=-23301402.2 actor_loss=-0.0174 critic_loss=57275302066.0870 entropy=9.1063 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0384 front_blocked=0
|
|
[Eval 8700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-693262.2 mean_steps=12.1
|
|
[Episode 8710] reward=-18482143.1 actor_loss=-0.0423 critic_loss=45094650148.5714 entropy=9.1127 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0273 front_blocked=0
|
|
[Episode 8720] reward=-11824038.0 actor_loss=-0.0410 critic_loss=34462866184.8276 entropy=9.1188 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0208 front_blocked=0
|
|
[Eval 8720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-490699.4 mean_steps=15.2
|
|
[Episode 8730] reward=-15606291.9 actor_loss=-0.0480 critic_loss=43571983701.3333 entropy=9.1321 approx_kl=0.0072 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 8740] reward=-6954807.2 actor_loss=-0.0756 critic_loss=23815827774.5778 entropy=9.1426 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0150 front_blocked=0
|
|
[Eval 8740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556096.7 mean_steps=14.1
|
|
[Episode 8750] reward=-10612683.4 actor_loss=-0.0499 critic_loss=28977631232.0000 entropy=9.1379 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 8760] reward=-19411229.9 actor_loss=-0.0329 critic_loss=35604831963.4286 entropy=9.1427 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0254 front_blocked=0
|
|
[Eval 8760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459630.2 mean_steps=14.2
|
|
[Episode 8770] reward=-26847001.1 actor_loss=0.0016 critic_loss=53098443016.2581 entropy=9.1647 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 8780] reward=-14843984.4 actor_loss=-0.0259 critic_loss=45930515456.0000 entropy=9.1920 approx_kl=0.0091 kl_stop=1 intervention_rate=0.0306 front_blocked=0
|
|
[Eval 8780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-593857.1 mean_steps=12.7
|
|
[Episode 8790] reward=-14251603.0 actor_loss=-0.0371 critic_loss=28777267264.0000 entropy=9.2094 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 8800] reward=-7310834.1 actor_loss=-0.0758 critic_loss=27188171277.1282 entropy=9.2397 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 8800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540745.3 mean_steps=13.9
|
|
[Episode 8810] reward=-18027693.8 actor_loss=-0.0400 critic_loss=55951645857.6842 entropy=9.2503 approx_kl=0.0086 kl_stop=1 intervention_rate=0.0267 front_blocked=0
|
|
[Episode 8820] reward=-17478768.5 actor_loss=-0.0406 critic_loss=38335532646.4000 entropy=9.2611 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0228 front_blocked=0
|
|
[Eval 8820] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-338159.0 mean_steps=17.4
|
|
[Episode 8830] reward=-11729754.9 actor_loss=-0.0630 critic_loss=26686399647.2889 entropy=9.2857 approx_kl=0.0056 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Episode 8840] reward=-13456656.0 actor_loss=-0.0700 critic_loss=34926255217.7778 entropy=9.3095 approx_kl=0.0085 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Eval 8840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-365063.5 mean_steps=14.7
|
|
[Episode 8850] reward=-12567271.4 actor_loss=-0.0665 critic_loss=29652870576.3556 entropy=9.3201 approx_kl=0.0080 kl_stop=0 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 8860] reward=-18373778.2 actor_loss=-0.0331 critic_loss=49841886759.3846 entropy=9.3453 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0280 front_blocked=0
|
|
[Eval 8860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554373.5 mean_steps=12.1
|
|
[Episode 8870] reward=-20874940.7 actor_loss=-0.0268 critic_loss=41867639661.7143 entropy=9.3821 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0280 front_blocked=0
|
|
[Episode 8880] reward=-12720524.6 actor_loss=-0.0691 critic_loss=27823035547.1515 entropy=9.3900 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0163 front_blocked=0
|
|
[Eval 8880] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-655994.0 mean_steps=11.0
|
|
[Episode 8890] reward=-4257519.7 actor_loss=-0.0898 critic_loss=21328898867.2000 entropy=9.4049 approx_kl=0.0056 kl_stop=0 intervention_rate=0.0104 front_blocked=0
|
|
[Episode 8900] reward=-18597932.0 actor_loss=-0.0513 critic_loss=48726358639.3043 entropy=9.4323 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0273 front_blocked=0
|
|
[Eval 8900] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-269411.2 mean_steps=17.9
|
|
[Episode 8910] reward=-12270353.1 actor_loss=-0.0533 critic_loss=26752574272.0000 entropy=9.4539 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0176 front_blocked=0
|
|
[Episode 8920] reward=-21055690.1 actor_loss=-0.0355 critic_loss=47414385732.2667 entropy=9.4626 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0299 front_blocked=0
|
|
[Eval 8920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369511.9 mean_steps=15.9
|
|
[Episode 8930] reward=-12243808.1 actor_loss=-0.0386 critic_loss=24366876535.4667 entropy=9.4899 approx_kl=0.0082 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Episode 8940] reward=-18804359.2 actor_loss=-0.0407 critic_loss=42669841448.9600 entropy=9.5109 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0221 front_blocked=0
|
|
[Eval 8940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-368910.4 mean_steps=14.4
|
|
[Episode 8950] reward=-25344684.9 actor_loss=0.0126 critic_loss=52692128399.3600 entropy=9.5213 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0378 front_blocked=0
|
|
[Episode 8960] reward=-6208843.2 actor_loss=-0.0769 critic_loss=16903710219.3778 entropy=9.5319 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0117 front_blocked=0
|
|
[Eval 8960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511677.1 mean_steps=13.7
|
|
[Episode 8970] reward=-13988433.1 actor_loss=-0.0306 critic_loss=32767520819.2000 entropy=9.5521 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 8980] reward=-11219497.8 actor_loss=-0.0662 critic_loss=28066671820.8000 entropy=9.5747 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0163 front_blocked=0
|
|
[Eval 8980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442605.3 mean_steps=14.2
|
|
[Episode 8990] reward=-3220304.3 actor_loss=-0.0988 critic_loss=11058927126.7556 entropy=9.6019 approx_kl=0.0057 kl_stop=0 intervention_rate=0.0065 front_blocked=0
|
|
[Episode 9000] reward=-9202161.2 actor_loss=-0.0731 critic_loss=23011955598.2222 entropy=9.6228 approx_kl=0.0066 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Eval 9000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-627115.3 mean_steps=11.6
|
|
[Episode 9010] reward=-13834439.5 actor_loss=-0.0559 critic_loss=34136759955.9111 entropy=9.6427 approx_kl=0.0080 kl_stop=0 intervention_rate=0.0221 front_blocked=0
|
|
[Episode 9020] reward=-5101875.8 actor_loss=-0.0945 critic_loss=10405705375.2889 entropy=9.6645 approx_kl=0.0054 kl_stop=0 intervention_rate=0.0072 front_blocked=0
|
|
[Eval 9020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489516.0 mean_steps=17.5
|
|
[Episode 9030] reward=-17030768.6 actor_loss=-0.0385 critic_loss=37514124083.2000 entropy=9.6893 approx_kl=0.0069 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 9040] reward=-21451102.6 actor_loss=-0.0284 critic_loss=41732206405.8182 entropy=9.7093 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0326 front_blocked=0
|
|
[Eval 9040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549353.5 mean_steps=13.3
|
|
[Episode 9050] reward=-12913082.2 actor_loss=-0.0472 critic_loss=35044054396.3429 entropy=9.7226 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 9060] reward=-17216130.9 actor_loss=-0.0386 critic_loss=39195011657.1429 entropy=9.7376 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0326 front_blocked=0
|
|
[Eval 9060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480371.3 mean_steps=15.4
|
|
[Episode 9070] reward=-8764676.7 actor_loss=-0.0737 critic_loss=20577113472.0000 entropy=9.7628 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0130 front_blocked=0
|
|
[Episode 9080] reward=-18041743.2 actor_loss=0.0063 critic_loss=38241170537.9310 entropy=9.7756 approx_kl=0.0073 kl_stop=1 intervention_rate=0.0306 front_blocked=0
|
|
[Eval 9080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555020.7 mean_steps=14.1
|
|
[Episode 9090] reward=-12294126.8 actor_loss=-0.0369 critic_loss=18923963707.0769 entropy=9.7890 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0163 front_blocked=0
|
|
[Episode 9100] reward=-9116760.7 actor_loss=-0.0613 critic_loss=26010355939.5556 entropy=9.7907 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0176 front_blocked=0
|
|
[Eval 9100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552794.8 mean_steps=13.2
|
|
[Episode 9110] reward=-8860324.2 actor_loss=-0.0710 critic_loss=17200675066.3111 entropy=9.8259 approx_kl=0.0079 kl_stop=0 intervention_rate=0.0124 front_blocked=0
|
|
[Episode 9120] reward=-16868459.7 actor_loss=-0.0633 critic_loss=33744714043.0769 entropy=9.8631 approx_kl=0.0082 kl_stop=1 intervention_rate=0.0228 front_blocked=0
|
|
[Eval 9120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430449.8 mean_steps=16.1
|
|
[Episode 9130] reward=-25229244.5 actor_loss=-0.0253 critic_loss=44180564278.3030 entropy=9.8919 approx_kl=0.0084 kl_stop=1 intervention_rate=0.0319 front_blocked=0
|
|
[Episode 9140] reward=-13092042.7 actor_loss=-0.0415 critic_loss=27227214912.0000 entropy=9.9086 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0221 front_blocked=0
|
|
[Eval 9140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433912.7 mean_steps=14.3
|
|
[Episode 9150] reward=-19515144.0 actor_loss=-0.0357 critic_loss=40040000034.1333 entropy=9.9344 approx_kl=0.0077 kl_stop=0 intervention_rate=0.0254 front_blocked=0
|
|
[Episode 9160] reward=-17075668.0 actor_loss=-0.0152 critic_loss=34863312031.2889 entropy=9.9408 approx_kl=0.0062 kl_stop=0 intervention_rate=0.0326 front_blocked=0
|
|
[Eval 9160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-371753.7 mean_steps=16.1
|
|
[Episode 9170] reward=-9420028.9 actor_loss=-0.0585 critic_loss=25294158540.8000 entropy=9.9518 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 9180] reward=-6525570.4 actor_loss=-0.0710 critic_loss=18783851588.2667 entropy=9.9680 approx_kl=0.0052 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Eval 9180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349268.4 mean_steps=20.6
|
|
[Episode 9190] reward=-20145685.5 actor_loss=-0.0327 critic_loss=40528429524.1143 entropy=9.9982 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0293 front_blocked=0
|
|
[Episode 9200] reward=-7218078.8 actor_loss=-0.0830 critic_loss=19209191378.4889 entropy=10.0253 approx_kl=0.0060 kl_stop=0 intervention_rate=0.0111 front_blocked=0
|
|
[Eval 9200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-647434.0 mean_steps=13.5
|
|
[Episode 9210] reward=-7952088.9 actor_loss=-0.0763 critic_loss=14270663680.0000 entropy=10.0430 approx_kl=0.0059 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Episode 9220] reward=-24809995.6 actor_loss=-0.0246 critic_loss=49785401958.4000 entropy=10.0883 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0332 front_blocked=0
|
|
[Eval 9220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526118.9 mean_steps=13.8
|
|
[Episode 9230] reward=-11166820.2 actor_loss=-0.0792 critic_loss=30521606052.9778 entropy=10.1154 approx_kl=0.0076 kl_stop=0 intervention_rate=0.0195 front_blocked=0
|
|
[Episode 9240] reward=-21259980.4 actor_loss=0.0143 critic_loss=39753994240.0000 entropy=10.1338 approx_kl=0.0092 kl_stop=1 intervention_rate=0.0326 front_blocked=0
|
|
[Eval 9240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500843.2 mean_steps=14.8
|
|
[Episode 9250] reward=-11662398.9 actor_loss=-0.0496 critic_loss=27620340667.7333 entropy=10.1355 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Episode 9260] reward=-13855024.9 actor_loss=-0.0280 critic_loss=27583258487.4667 entropy=10.1702 approx_kl=0.0063 kl_stop=0 intervention_rate=0.0215 front_blocked=0
|
|
[Eval 9260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459436.3 mean_steps=22.1
|
|
[Episode 9270] reward=-16748117.5 actor_loss=-0.0573 critic_loss=36121866649.6000 entropy=10.1998 approx_kl=0.0076 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Episode 9280] reward=-4862311.0 actor_loss=-0.1046 critic_loss=9681954281.2444 entropy=10.2283 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0052 front_blocked=0
|
|
[Eval 9280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375761.4 mean_steps=29.5
|
|
[Episode 9290] reward=-2947860.5 actor_loss=-0.0903 critic_loss=6588617147.7333 entropy=10.2600 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0052 front_blocked=0
|
|
[Episode 9300] reward=-15214907.8 actor_loss=-0.0535 critic_loss=31625892886.7556 entropy=10.2992 approx_kl=0.0058 kl_stop=0 intervention_rate=0.0254 front_blocked=0
|
|
[Eval 9300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-414823.8 mean_steps=49.7
|
|
[Episode 9310] reward=-14090712.2 actor_loss=-0.0453 critic_loss=23971216270.2222 entropy=10.3177 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Episode 9320] reward=-8516324.4 actor_loss=-0.0856 critic_loss=14152738377.1429 entropy=10.3543 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0104 front_blocked=0
|
|
[Eval 9320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404553.8 mean_steps=118.2
|
|
[Episode 9330] reward=-4124219.9 actor_loss=-0.0940 critic_loss=12571637623.4667 entropy=10.3704 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0078 front_blocked=0
|
|
[Episode 9340] reward=-5994446.2 actor_loss=-0.0751 critic_loss=14993278088.5333 entropy=10.3964 approx_kl=0.0056 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 9340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447202.7 mean_steps=40.5
|
|
[Episode 9350] reward=-9114591.2 actor_loss=-0.0516 critic_loss=24180552362.6667 entropy=10.4226 approx_kl=0.0065 kl_stop=0 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 9360] reward=-17200579.7 actor_loss=-0.0400 critic_loss=31842428017.7778 entropy=10.4540 approx_kl=0.0066 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Eval 9360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512064.4 mean_steps=90.2
|
|
[Episode 9370] reward=-9424737.4 actor_loss=-0.0668 critic_loss=19813134882.1333 entropy=10.4893 approx_kl=0.0061 kl_stop=0 intervention_rate=0.0156 front_blocked=0
|
|
[Episode 9380] reward=-10868818.0 actor_loss=-0.0412 critic_loss=20251600418.1333 entropy=10.5213 approx_kl=0.0074 kl_stop=0 intervention_rate=0.0182 front_blocked=0
|
|
[Eval 9380] success_rate=0.000 qp_infeasible_rate=0.650 mean_return=-708058.5 mean_steps=1126.2
|
|
[Episode 9390] reward=-14070247.7 actor_loss=-0.0581 critic_loss=28538053745.7778 entropy=10.5350 approx_kl=0.0065 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Episode 9400] reward=-4558196.2 actor_loss=-0.0752 critic_loss=8917567772.4444 entropy=10.5608 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0098 front_blocked=0
|
|
[Eval 9400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426116.5 mean_steps=157.3
|
|
[Episode 9410] reward=-3057403.9 actor_loss=-0.0940 critic_loss=10882036997.6889 entropy=10.5801 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0065 front_blocked=0
|
|
[Episode 9420] reward=-13532925.0 actor_loss=-0.0548 critic_loss=23268035925.3333 entropy=10.6016 approx_kl=0.0066 kl_stop=0 intervention_rate=0.0176 front_blocked=0
|
|
[Eval 9420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-625429.3 mean_steps=243.1
|
|
[Episode 9430] reward=-14641966.8 actor_loss=-0.0611 critic_loss=23674228794.5143 entropy=10.6203 approx_kl=0.0096 kl_stop=1 intervention_rate=0.0195 front_blocked=0
|
|
[Episode 9440] reward=-14631008.6 actor_loss=-0.0407 critic_loss=23664119716.9778 entropy=10.6396 approx_kl=0.0062 kl_stop=0 intervention_rate=0.0195 front_blocked=0
|
|
[Eval 9440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470214.6 mean_steps=193.1
|
|
[Episode 9450] reward=-10123254.6 actor_loss=-0.0620 critic_loss=17279657096.5333 entropy=10.6493 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0150 front_blocked=0
|
|
[Episode 9460] reward=-10225590.5 actor_loss=-0.0568 critic_loss=14035101832.5333 entropy=10.6925 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 9460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518713.2 mean_steps=352.9
|
|
[Episode 9470] reward=-7201681.0 actor_loss=-0.0834 critic_loss=9773371505.7778 entropy=10.7220 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Episode 9480] reward=-8618974.8 actor_loss=-0.0776 critic_loss=16807405954.8444 entropy=10.7259 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 9480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-555315.4 mean_steps=25.9
|
|
[Episode 9490] reward=-10203041.3 actor_loss=-0.0750 critic_loss=19287177124.9778 entropy=10.7430 approx_kl=0.0052 kl_stop=0 intervention_rate=0.0163 front_blocked=0
|
|
[Episode 9500] reward=-9403997.2 actor_loss=-0.0615 critic_loss=16045164726.0444 entropy=10.7461 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0182 front_blocked=0
|
|
[Eval 9500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552593.9 mean_steps=32.6
|
|
[Episode 9510] reward=-3065352.5 actor_loss=-0.0936 critic_loss=5400488009.9556 entropy=10.7636 approx_kl=0.0040 kl_stop=0 intervention_rate=0.0065 front_blocked=0
|
|
[Episode 9520] reward=-10384743.4 actor_loss=0.0011 critic_loss=17018642727.8222 entropy=10.7892 approx_kl=0.0057 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Eval 9520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448782.2 mean_steps=62.8
|
|
[Episode 9530] reward=-12167632.0 actor_loss=-0.0824 critic_loss=20407998577.7778 entropy=10.7948 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Episode 9540] reward=-5052597.5 actor_loss=-0.0909 critic_loss=7678188123.0222 entropy=10.8255 approx_kl=0.0042 kl_stop=0 intervention_rate=0.0091 front_blocked=0
|
|
[Eval 9540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446088.7 mean_steps=84.7
|
|
[Episode 9550] reward=-7990569.0 actor_loss=-0.0765 critic_loss=11528731295.2889 entropy=10.8545 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0111 front_blocked=0
|
|
[Episode 9560] reward=-9259972.9 actor_loss=-0.0141 critic_loss=14070046424.1778 entropy=10.8752 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Eval 9560] success_rate=0.250 qp_infeasible_rate=0.700 mean_return=-571651.9 mean_steps=404.4
|
|
[Episode 9570] reward=-21396328.5 actor_loss=0.0171 critic_loss=36048388369.0667 entropy=10.8866 approx_kl=0.0067 kl_stop=0 intervention_rate=0.0345 front_blocked=0
|
|
[Episode 9580] reward=-16083641.9 actor_loss=-0.0318 critic_loss=34384477115.7333 entropy=10.9125 approx_kl=0.0066 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Eval 9580] success_rate=0.000 qp_infeasible_rate=0.550 mean_return=-644006.6 mean_steps=1444.9
|
|
[Episode 9590] reward=-8576655.5 actor_loss=-0.0634 critic_loss=16133964800.0000 entropy=10.9318 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Episode 9600] reward=-6458933.4 actor_loss=-0.0525 critic_loss=12477533980.4444 entropy=10.9618 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 9600] success_rate=0.200 qp_infeasible_rate=0.700 mean_return=-580161.8 mean_steps=330.6
|
|
[Episode 9610] reward=-3027189.1 actor_loss=-0.0845 critic_loss=3040180588.0889 entropy=10.9802 approx_kl=0.0008 kl_stop=0 intervention_rate=0.0059 front_blocked=0
|
|
[Episode 9620] reward=-3807207.6 actor_loss=-0.0833 critic_loss=9781683086.2222 entropy=11.0167 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0091 front_blocked=0
|
|
[Eval 9620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-654269.1 mean_steps=11.2
|
|
[Episode 9630] reward=-5713267.2 actor_loss=-0.0512 critic_loss=10029797649.0667 entropy=11.0336 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Episode 9640] reward=-12735108.9 actor_loss=-0.0708 critic_loss=23216791392.7111 entropy=11.0627 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0163 front_blocked=0
|
|
[Eval 9640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516191.0 mean_steps=16.1
|
|
[Episode 9650] reward=-11081373.2 actor_loss=-0.0553 critic_loss=18549743069.8667 entropy=11.0704 approx_kl=0.0061 kl_stop=0 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 9660] reward=-4483900.3 actor_loss=-0.0727 critic_loss=6222012404.6222 entropy=11.0781 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0078 front_blocked=0
|
|
[Eval 9660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656389.0 mean_steps=12.4
|
|
[Episode 9670] reward=-5949991.3 actor_loss=-0.0899 critic_loss=8892813391.6444 entropy=11.1002 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0085 front_blocked=0
|
|
[Episode 9680] reward=-7029322.5 actor_loss=-0.0695 critic_loss=16941720962.8444 entropy=11.1204 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Eval 9680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428277.3 mean_steps=18.1
|
|
[Episode 9690] reward=-13965862.3 actor_loss=-0.0111 critic_loss=24764818682.3111 entropy=11.1595 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 9700] reward=-6560737.9 actor_loss=-0.0664 critic_loss=12339190647.4667 entropy=11.1687 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Eval 9700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452139.5 mean_steps=67.5
|
|
[Episode 9710] reward=-12262746.8 actor_loss=-0.0518 critic_loss=20907951195.0222 entropy=11.1798 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 9720] reward=-7046621.6 actor_loss=-0.0467 critic_loss=7776711441.0667 entropy=11.1809 approx_kl=0.0018 kl_stop=0 intervention_rate=0.0163 front_blocked=0
|
|
[Eval 9720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-622421.9 mean_steps=16.8
|
|
[Episode 9730] reward=-8826777.9 actor_loss=-0.0626 critic_loss=11324309321.9556 entropy=11.1759 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0124 front_blocked=0
|
|
[Episode 9740] reward=-7799513.3 actor_loss=-0.0762 critic_loss=11389222525.1556 entropy=11.2221 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0130 front_blocked=0
|
|
[Eval 9740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515030.6 mean_steps=13.4
|
|
[Episode 9750] reward=-10255734.6 actor_loss=-0.0615 critic_loss=14687999590.4000 entropy=11.2407 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0163 front_blocked=0
|
|
[Episode 9760] reward=-2450005.3 actor_loss=-0.0999 critic_loss=2280660216.8889 entropy=11.2639 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0065 front_blocked=0
|
|
[Eval 9760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-593309.3 mean_steps=14.1
|
|
[Episode 9770] reward=-6449185.2 actor_loss=-0.0696 critic_loss=11053556462.9333 entropy=11.2847 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Episode 9780] reward=-14516772.4 actor_loss=-0.0238 critic_loss=20748653385.9556 entropy=11.2993 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0260 front_blocked=0
|
|
[Eval 9780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-515456.5 mean_steps=15.3
|
|
[Episode 9790] reward=-6232775.9 actor_loss=-0.0672 critic_loss=7160044572.4444 entropy=11.2891 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0104 front_blocked=0
|
|
[Episode 9800] reward=-7642475.7 actor_loss=-0.0638 critic_loss=11487444115.9111 entropy=11.2985 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0150 front_blocked=0
|
|
[Eval 9800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466854.0 mean_steps=16.2
|
|
[Episode 9810] reward=-9943955.0 actor_loss=-0.0510 critic_loss=12918935916.0889 entropy=11.3218 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Episode 9820] reward=-11169303.4 actor_loss=-0.0509 critic_loss=13050735092.6222 entropy=11.3513 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0182 front_blocked=0
|
|
[Eval 9820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545847.3 mean_steps=13.3
|
|
[Episode 9830] reward=-18185694.1 actor_loss=-0.0372 critic_loss=29291620260.9778 entropy=11.3542 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 9840] reward=-5638817.3 actor_loss=-0.0965 critic_loss=8055379103.2889 entropy=11.3745 approx_kl=0.0042 kl_stop=0 intervention_rate=0.0085 front_blocked=0
|
|
[Eval 9840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-421670.7 mean_steps=14.8
|
|
[Episode 9850] reward=-10006042.7 actor_loss=-0.0788 critic_loss=12366566331.7333 entropy=11.3954 approx_kl=0.0008 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Episode 9860] reward=-6936083.1 actor_loss=-0.0366 critic_loss=6832546451.9111 entropy=11.4313 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Eval 9860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462971.2 mean_steps=21.9
|
|
[Episode 9870] reward=-3623479.7 actor_loss=-0.0751 critic_loss=3643757240.8889 entropy=11.4505 approx_kl=0.0028 kl_stop=0 intervention_rate=0.0111 front_blocked=0
|
|
[Episode 9880] reward=-9056105.8 actor_loss=-0.0603 critic_loss=13271539325.1556 entropy=11.4557 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Eval 9880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507260.4 mean_steps=38.2
|
|
[Episode 9890] reward=-14394009.0 actor_loss=-0.0476 critic_loss=19628237255.1111 entropy=11.4764 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Episode 9900] reward=-10807519.7 actor_loss=-0.0369 critic_loss=13051558866.4889 entropy=11.5010 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0195 front_blocked=0
|
|
[Eval 9900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603324.2 mean_steps=42.9
|
|
[Episode 9910] reward=-3743114.9 actor_loss=-0.0795 critic_loss=3750280072.5333 entropy=11.5310 approx_kl=0.0017 kl_stop=0 intervention_rate=0.0104 front_blocked=0
|
|
[Episode 9920] reward=-15035487.8 actor_loss=-0.0551 critic_loss=19759064723.9111 entropy=11.5543 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Eval 9920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-624779.7 mean_steps=14.7
|
|
[Episode 9930] reward=-7480354.6 actor_loss=-0.0572 critic_loss=9340095283.2000 entropy=11.5727 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Episode 9940] reward=-5058430.6 actor_loss=-0.0583 critic_loss=6654403959.4667 entropy=11.6034 approx_kl=0.0012 kl_stop=0 intervention_rate=0.0124 front_blocked=0
|
|
[Eval 9940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364167.0 mean_steps=17.2
|
|
[Episode 9950] reward=-6460847.3 actor_loss=-0.0876 critic_loss=6735664998.4000 entropy=11.6222 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0091 front_blocked=0
|
|
[Episode 9960] reward=-9405917.1 actor_loss=-0.0528 critic_loss=10792844367.6444 entropy=11.6546 approx_kl=0.0014 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Eval 9960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430750.9 mean_steps=16.2
|
|
[Episode 9970] reward=-7483212.7 actor_loss=-0.0496 critic_loss=12080957508.2667 entropy=11.6782 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0163 front_blocked=0
|
|
[Episode 9980] reward=-4865494.1 actor_loss=-0.0868 critic_loss=4162202174.5778 entropy=11.6889 approx_kl=0.0010 kl_stop=0 intervention_rate=0.0085 front_blocked=0
|
|
[Eval 9980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-493597.1 mean_steps=19.0
|
|
[Episode 9990] reward=-16278226.5 actor_loss=-0.0521 critic_loss=20361809783.4667 entropy=11.6957 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0215 front_blocked=0
|
|
[Episode 10000] reward=-12705915.9 actor_loss=-0.0326 critic_loss=16800844049.0667 entropy=11.7149 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0228 front_blocked=0
|
|
[Eval 10000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-652970.3 mean_steps=11.7
|
|
[Episode 10010] reward=-2740566.7 actor_loss=-0.0836 critic_loss=2487279948.8000 entropy=11.7319 approx_kl=0.0010 kl_stop=0 intervention_rate=0.0078 front_blocked=0
|
|
[Episode 10020] reward=-10026994.6 actor_loss=-0.0741 critic_loss=11989841032.5333 entropy=11.7499 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0137 front_blocked=0
|
|
[Eval 10020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592516.1 mean_steps=13.8
|
|
[Episode 10030] reward=-12244190.1 actor_loss=-0.0794 critic_loss=17867325485.5111 entropy=11.7518 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0150 front_blocked=0
|
|
[Episode 10040] reward=-14560159.0 actor_loss=-0.0830 critic_loss=20052901546.6667 entropy=11.7676 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0176 front_blocked=0
|
|
[Eval 10040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559552.1 mean_steps=12.6
|
|
[Episode 10050] reward=-12394864.2 actor_loss=-0.0077 critic_loss=13774158279.1111 entropy=11.7865 approx_kl=0.0041 kl_stop=0 intervention_rate=0.0247 front_blocked=0
|
|
[Episode 10060] reward=-32385203.9 actor_loss=0.0239 critic_loss=40639391197.8667 entropy=11.8135 approx_kl=0.0052 kl_stop=0 intervention_rate=0.0410 front_blocked=0
|
|
[Eval 10060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479724.2 mean_steps=37.5
|
|
[Episode 10070] reward=-21871405.1 actor_loss=-0.0297 critic_loss=25893899468.8000 entropy=11.8382 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0267 front_blocked=0
|
|
[Episode 10080] reward=-6030711.9 actor_loss=-0.0558 critic_loss=6643466467.5556 entropy=11.8541 approx_kl=0.0016 kl_stop=0 intervention_rate=0.0156 front_blocked=0
|
|
[Eval 10080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-465095.2 mean_steps=13.8
|
|
[Episode 10090] reward=-11515011.9 actor_loss=-0.0360 critic_loss=15471367532.0889 entropy=11.8987 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 10100] reward=-10631088.7 actor_loss=-0.0134 critic_loss=14036777688.1778 entropy=11.9144 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Eval 10100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563337.3 mean_steps=12.2
|
|
[Episode 10110] reward=-12700415.6 actor_loss=-0.0246 critic_loss=15613321921.4222 entropy=11.9384 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0247 front_blocked=0
|
|
[Episode 10120] reward=-3759325.9 actor_loss=-0.0865 critic_loss=3787425371.0222 entropy=11.9412 approx_kl=0.0010 kl_stop=0 intervention_rate=0.0072 front_blocked=0
|
|
[Eval 10120] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-720098.0 mean_steps=9.9
|
|
[Episode 10130] reward=-24622461.3 actor_loss=0.0387 critic_loss=29787055035.7333 entropy=11.9685 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0410 front_blocked=0
|
|
[Episode 10140] reward=-2248124.9 actor_loss=-0.0931 critic_loss=1875161347.5556 entropy=11.9892 approx_kl=0.0005 kl_stop=0 intervention_rate=0.0085 front_blocked=0
|
|
[Eval 10140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402311.9 mean_steps=16.6
|
|
[Episode 10150] reward=-4499198.6 actor_loss=-0.0551 critic_loss=6233913543.1111 entropy=12.0267 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Episode 10160] reward=-5422561.7 actor_loss=-0.0646 critic_loss=5481725866.6667 entropy=12.0543 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0143 front_blocked=0
|
|
[Eval 10160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519252.2 mean_steps=15.4
|
|
[Episode 10170] reward=-10478896.2 actor_loss=0.0279 critic_loss=11678490988.0889 entropy=12.0781 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0273 front_blocked=0
|
|
[Episode 10180] reward=-17862713.4 actor_loss=0.0448 critic_loss=19925555336.5333 entropy=12.0822 approx_kl=0.0044 kl_stop=0 intervention_rate=0.0293 front_blocked=0
|
|
[Eval 10180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461744.7 mean_steps=13.8
|
|
[Episode 10190] reward=-6117465.1 actor_loss=-0.0709 critic_loss=6222370821.6889 entropy=12.0783 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0124 front_blocked=0
|
|
[Episode 10200] reward=-9724898.8 actor_loss=-0.0425 critic_loss=10795798107.0222 entropy=12.0894 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0176 front_blocked=0
|
|
[Eval 10200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473615.6 mean_steps=14.9
|
|
[Episode 10210] reward=-23964619.2 actor_loss=0.0313 critic_loss=34230248288.7111 entropy=12.1178 approx_kl=0.0044 kl_stop=0 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 10220] reward=-29376207.6 actor_loss=0.0340 critic_loss=38375180014.9333 entropy=12.1499 approx_kl=0.0053 kl_stop=0 intervention_rate=0.0391 front_blocked=0
|
|
[Eval 10220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532973.5 mean_steps=14.8
|
|
[Episode 10230] reward=-12968879.9 actor_loss=-0.0210 critic_loss=14127733782.7556 entropy=12.1633 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 10240] reward=-19933348.0 actor_loss=-0.0200 critic_loss=23295330030.9333 entropy=12.1705 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0260 front_blocked=0
|
|
[Eval 10240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583385.8 mean_steps=14.2
|
|
[Episode 10250] reward=-17513224.0 actor_loss=0.0140 critic_loss=20872193888.7111 entropy=12.1869 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0312 front_blocked=0
|
|
[Episode 10260] reward=-19407390.1 actor_loss=-0.0115 critic_loss=23567793675.3778 entropy=12.2015 approx_kl=0.0044 kl_stop=0 intervention_rate=0.0280 front_blocked=0
|
|
[Eval 10260] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-632256.0 mean_steps=11.3
|
|
[Episode 10270] reward=-17048922.5 actor_loss=-0.0002 critic_loss=20487100893.8667 entropy=12.1965 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0286 front_blocked=0
|
|
[Episode 10280] reward=-21048809.6 actor_loss=0.0561 critic_loss=30012458780.4444 entropy=12.2134 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0365 front_blocked=0
|
|
[Eval 10280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-578091.1 mean_steps=12.1
|
|
[Episode 10290] reward=-4128855.4 actor_loss=-0.0830 critic_loss=3988595828.6222 entropy=12.2476 approx_kl=0.0007 kl_stop=0 intervention_rate=0.0117 front_blocked=0
|
|
[Episode 10300] reward=-10443359.0 actor_loss=-0.0375 critic_loss=11035567763.9111 entropy=12.2749 approx_kl=0.0015 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Eval 10300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588914.4 mean_steps=13.6
|
|
[Episode 10310] reward=-12043318.3 actor_loss=-0.0319 critic_loss=14201692979.2000 entropy=12.2866 approx_kl=0.0018 kl_stop=0 intervention_rate=0.0202 front_blocked=0
|
|
[Episode 10320] reward=-11244842.8 actor_loss=-0.0366 critic_loss=13112955483.0222 entropy=12.2944 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0208 front_blocked=0
|
|
[Eval 10320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-437227.6 mean_steps=13.5
|
|
[Episode 10330] reward=-36840562.1 actor_loss=0.0778 critic_loss=46182670427.0222 entropy=12.3118 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 10340] reward=-10084293.9 actor_loss=-0.0090 critic_loss=11652830845.1556 entropy=12.3269 approx_kl=0.0028 kl_stop=0 intervention_rate=0.0202 front_blocked=0
|
|
[Eval 10340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452811.6 mean_steps=13.9
|
|
[Episode 10350] reward=-17833381.0 actor_loss=0.0337 critic_loss=20142782646.0444 entropy=12.3346 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0326 front_blocked=0
|
|
[Episode 10360] reward=-28397599.4 actor_loss=0.0523 critic_loss=34516406044.4444 entropy=12.3650 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0449 front_blocked=0
|
|
[Eval 10360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551334.9 mean_steps=12.4
|
|
[Episode 10370] reward=-20428126.2 actor_loss=0.0007 critic_loss=24060212383.2889 entropy=12.3959 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0326 front_blocked=0
|
|
[Episode 10380] reward=-23395575.5 actor_loss=0.1015 critic_loss=28281383412.6222 entropy=12.4171 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0436 front_blocked=0
|
|
[Eval 10380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-437212.7 mean_steps=13.7
|
|
[Episode 10390] reward=-11133758.8 actor_loss=-0.0589 critic_loss=12398859036.4444 entropy=12.4174 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0169 front_blocked=0
|
|
[Episode 10400] reward=-15830540.8 actor_loss=-0.0264 critic_loss=17936894270.5778 entropy=12.4182 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Eval 10400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477987.4 mean_steps=14.8
|
|
[Episode 10410] reward=-14958032.8 actor_loss=-0.0401 critic_loss=17092854624.7111 entropy=12.4325 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0241 front_blocked=0
|
|
[Episode 10420] reward=-13903150.8 actor_loss=0.0142 critic_loss=15816999367.1111 entropy=12.4376 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0299 front_blocked=0
|
|
[Eval 10420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518932.0 mean_steps=13.8
|
|
[Episode 10430] reward=-11820151.2 actor_loss=-0.0509 critic_loss=13941241969.7778 entropy=12.4397 approx_kl=0.0023 kl_stop=0 intervention_rate=0.0189 front_blocked=0
|
|
[Episode 10440] reward=-28469288.9 actor_loss=0.0606 critic_loss=34335300767.2889 entropy=12.4511 approx_kl=0.0049 kl_stop=0 intervention_rate=0.0430 front_blocked=0
|
|
[Eval 10440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488123.5 mean_steps=14.4
|
|
[Episode 10450] reward=-6606798.8 actor_loss=-0.0201 critic_loss=6822875784.5333 entropy=12.4679 approx_kl=0.0009 kl_stop=0 intervention_rate=0.0195 front_blocked=0
|
|
[Episode 10460] reward=-18495219.4 actor_loss=0.0189 critic_loss=23407323477.3333 entropy=12.4603 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0352 front_blocked=0
|
|
[Eval 10460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462445.5 mean_steps=14.6
|
|
[Episode 10470] reward=-31617634.0 actor_loss=0.0658 critic_loss=38419574692.9778 entropy=12.4813 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0482 front_blocked=0
|
|
[Episode 10480] reward=-22398000.2 actor_loss=0.0745 critic_loss=25732927032.8889 entropy=12.4846 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0436 front_blocked=0
|
|
[Eval 10480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423536.0 mean_steps=16.5
|
|
[Episode 10490] reward=-18516257.2 actor_loss=0.0072 critic_loss=20582939374.9333 entropy=12.5004 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0326 front_blocked=0
|
|
[Episode 10500] reward=-26699397.4 actor_loss=0.0366 critic_loss=31934520797.8667 entropy=12.5146 approx_kl=0.0049 kl_stop=0 intervention_rate=0.0417 front_blocked=0
|
|
[Eval 10500] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-584773.9 mean_steps=10.8
|
|
[Episode 10510] reward=-15604787.5 actor_loss=-0.0107 critic_loss=17178262414.2222 entropy=12.5443 approx_kl=0.0023 kl_stop=0 intervention_rate=0.0319 front_blocked=0
|
|
[Episode 10520] reward=-17725330.5 actor_loss=0.0342 critic_loss=20537742995.9111 entropy=12.5806 approx_kl=0.0012 kl_stop=0 intervention_rate=0.0312 front_blocked=0
|
|
[Eval 10520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520389.8 mean_steps=14.6
|
|
[Episode 10530] reward=-25903162.2 actor_loss=0.0239 critic_loss=31888258980.9778 entropy=12.5854 approx_kl=0.0028 kl_stop=0 intervention_rate=0.0371 front_blocked=0
|
|
[Episode 10540] reward=-10969202.9 actor_loss=-0.0153 critic_loss=11534863974.4000 entropy=12.5953 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0234 front_blocked=0
|
|
[Eval 10540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614574.2 mean_steps=11.8
|
|
[Episode 10550] reward=-43034868.7 actor_loss=0.1208 critic_loss=52111952827.7333 entropy=12.6185 approx_kl=0.0062 kl_stop=0 intervention_rate=0.0618 front_blocked=0
|
|
[Episode 10560] reward=-22180131.3 actor_loss=0.0363 critic_loss=25222014793.9556 entropy=12.6075 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0391 front_blocked=0
|
|
[Eval 10560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566509.6 mean_steps=12.6
|
|
[Episode 10570] reward=-18888756.5 actor_loss=-0.0102 critic_loss=21705800863.2889 entropy=12.6426 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0293 front_blocked=0
|
|
[Episode 10580] reward=-31843610.9 actor_loss=0.0725 critic_loss=37947417759.2889 entropy=12.6480 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0469 front_blocked=0
|
|
[Eval 10580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533752.5 mean_steps=13.8
|
|
[Episode 10590] reward=-25013710.2 actor_loss=0.0350 critic_loss=29663643283.9111 entropy=12.6555 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0404 front_blocked=0
|
|
[Episode 10600] reward=-24838757.6 actor_loss=0.0669 critic_loss=28121504745.2444 entropy=12.6891 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0430 front_blocked=0
|
|
[Eval 10600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534860.5 mean_steps=13.9
|
|
[Episode 10610] reward=-28307095.2 actor_loss=0.0996 critic_loss=31230635576.8889 entropy=12.7131 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 10620] reward=-27717554.9 actor_loss=0.0467 critic_loss=34382708872.5333 entropy=12.7049 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0456 front_blocked=0
|
|
[Eval 10620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522269.6 mean_steps=12.9
|
|
[Episode 10630] reward=-39073291.6 actor_loss=0.1732 critic_loss=48085296560.3556 entropy=12.7271 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0651 front_blocked=0
|
|
[Episode 10640] reward=-27306118.0 actor_loss=0.1008 critic_loss=31458785280.0000 entropy=12.7644 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 10640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619819.7 mean_steps=12.5
|
|
[Episode 10650] reward=-46543245.3 actor_loss=0.1859 critic_loss=58429959463.8222 entropy=12.7799 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 10660] reward=-22203513.3 actor_loss=0.0518 critic_loss=24778574961.7778 entropy=12.7992 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0397 front_blocked=0
|
|
[Eval 10660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410114.0 mean_steps=16.6
|
|
[Episode 10670] reward=-27911568.3 actor_loss=0.1061 critic_loss=31730460034.8444 entropy=12.7987 approx_kl=0.0011 kl_stop=0 intervention_rate=0.0488 front_blocked=0
|
|
[Episode 10680] reward=-23551252.6 actor_loss=0.0708 critic_loss=28976239957.3333 entropy=12.8007 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0430 front_blocked=0
|
|
[Eval 10680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547787.4 mean_steps=13.8
|
|
[Episode 10690] reward=-23219016.2 actor_loss=0.0726 critic_loss=27962995689.2444 entropy=12.8096 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0417 front_blocked=0
|
|
[Episode 10700] reward=-44830107.5 actor_loss=0.1407 critic_loss=56649894843.7333 entropy=12.8367 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 10700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435497.4 mean_steps=15.1
|
|
[Episode 10710] reward=-28494619.0 actor_loss=0.0627 critic_loss=32929686368.7111 entropy=12.8500 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0456 front_blocked=0
|
|
[Episode 10720] reward=-32168309.1 actor_loss=0.1049 critic_loss=39495118392.8889 entropy=12.8316 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0540 front_blocked=0
|
|
[Eval 10720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590972.8 mean_steps=12.6
|
|
[Episode 10730] reward=-35199510.7 actor_loss=0.1213 critic_loss=41795254681.6000 entropy=12.8519 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0579 front_blocked=0
|
|
[Episode 10740] reward=-32788499.9 actor_loss=0.1232 critic_loss=38423379740.4444 entropy=12.8421 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0547 front_blocked=0
|
|
[Eval 10740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550240.6 mean_steps=12.4
|
|
[Episode 10750] reward=-27549322.2 actor_loss=0.0729 critic_loss=32263193349.6889 entropy=12.8587 approx_kl=0.0013 kl_stop=0 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 10760] reward=-39681726.7 actor_loss=0.1595 critic_loss=49877282907.0222 entropy=12.8733 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 10760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-654929.7 mean_steps=12.3
|
|
[Episode 10770] reward=-35446139.6 actor_loss=0.1775 critic_loss=43420585210.3111 entropy=12.8683 approx_kl=0.0015 kl_stop=0 intervention_rate=0.0599 front_blocked=0
|
|
[Episode 10780] reward=-35303391.3 actor_loss=0.1307 critic_loss=44100536456.5333 entropy=12.8556 approx_kl=0.0011 kl_stop=0 intervention_rate=0.0612 front_blocked=0
|
|
[Eval 10780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569139.5 mean_steps=13.2
|
|
[Episode 10790] reward=-24840113.5 actor_loss=0.1100 critic_loss=28195491248.3556 entropy=12.8505 approx_kl=0.0015 kl_stop=0 intervention_rate=0.0501 front_blocked=0
|
|
[Episode 10800] reward=-25691690.7 actor_loss=0.0731 critic_loss=31119315490.1333 entropy=12.8545 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0462 front_blocked=0
|
|
[Eval 10800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382755.9 mean_steps=15.6
|
|
[Episode 10810] reward=-23249538.7 actor_loss=0.0835 critic_loss=27643992246.0444 entropy=12.8484 approx_kl=0.0014 kl_stop=0 intervention_rate=0.0430 front_blocked=0
|
|
[Episode 10820] reward=-29857269.0 actor_loss=0.1288 critic_loss=35889409410.8444 entropy=12.8555 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0586 front_blocked=0
|
|
[Eval 10820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-658655.8 mean_steps=12.4
|
|
[Episode 10830] reward=-31193826.2 actor_loss=0.0993 critic_loss=36122352116.6222 entropy=12.8697 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0508 front_blocked=0
|
|
[Episode 10840] reward=-30355746.4 actor_loss=0.0696 critic_loss=37444509513.9556 entropy=12.8688 approx_kl=0.0017 kl_stop=0 intervention_rate=0.0462 front_blocked=0
|
|
[Eval 10840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479477.2 mean_steps=14.1
|
|
[Episode 10850] reward=-51054910.8 actor_loss=0.2413 critic_loss=64768796717.5111 entropy=12.8977 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 10860] reward=-35200658.3 actor_loss=0.1216 critic_loss=40399574539.3778 entropy=12.9102 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 10860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464110.9 mean_steps=14.8
|
|
[Episode 10870] reward=-27392116.1 actor_loss=0.1200 critic_loss=31240643834.3111 entropy=12.9141 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0547 front_blocked=0
|
|
[Episode 10880] reward=-20575027.7 actor_loss=0.0662 critic_loss=24283358367.2889 entropy=12.9042 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0404 front_blocked=0
|
|
[Eval 10880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456148.3 mean_steps=13.6
|
|
[Episode 10890] reward=-14696978.5 actor_loss=0.0775 critic_loss=17023893458.4889 entropy=12.8992 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0365 front_blocked=0
|
|
[Episode 10900] reward=-33869188.0 actor_loss=0.0810 critic_loss=41269595886.9333 entropy=12.9065 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0521 front_blocked=0
|
|
[Eval 10900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496291.0 mean_steps=14.8
|
|
[Episode 10910] reward=-16041355.6 actor_loss=0.1733 critic_loss=17750515029.3333 entropy=12.9147 approx_kl=0.0014 kl_stop=0 intervention_rate=0.0475 front_blocked=0
|
|
[Episode 10920] reward=-30038164.3 actor_loss=0.0977 critic_loss=35917448032.7111 entropy=12.9214 approx_kl=0.0017 kl_stop=0 intervention_rate=0.0508 front_blocked=0
|
|
[Eval 10920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589718.9 mean_steps=13.7
|
|
[Episode 10930] reward=-44948772.4 actor_loss=0.1513 critic_loss=53874062677.3333 entropy=12.9045 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 10940] reward=-20571164.9 actor_loss=0.0651 critic_loss=24086974372.9778 entropy=12.8922 approx_kl=0.0014 kl_stop=0 intervention_rate=0.0417 front_blocked=0
|
|
[Eval 10940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446846.5 mean_steps=14.4
|
|
[Episode 10950] reward=-47588126.1 actor_loss=0.1785 critic_loss=57115691235.5556 entropy=12.9084 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 10960] reward=-25501366.3 actor_loss=0.1200 critic_loss=31558801908.6222 entropy=12.9175 approx_kl=0.0038 kl_stop=0 intervention_rate=0.0521 front_blocked=0
|
|
[Eval 10960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594680.2 mean_steps=12.7
|
|
[Episode 10970] reward=-38392635.9 actor_loss=0.1128 critic_loss=47215243264.0000 entropy=12.9190 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0592 front_blocked=0
|
|
[Episode 10980] reward=-39667463.7 actor_loss=0.1675 critic_loss=47544914557.1556 entropy=12.9387 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0645 front_blocked=0
|
|
[Eval 10980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493251.8 mean_steps=13.7
|
|
[Episode 10990] reward=-39381742.0 actor_loss=0.1455 critic_loss=47951089299.9111 entropy=12.9387 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0625 front_blocked=0
|
|
[Episode 11000] reward=-44839691.7 actor_loss=0.1777 critic_loss=54860111325.8667 entropy=12.9142 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 11000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-617945.0 mean_steps=13.1
|
|
[Episode 11010] reward=-43071491.1 actor_loss=0.1930 critic_loss=54113964123.0222 entropy=12.9276 approx_kl=0.0011 kl_stop=0 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 11020] reward=-25993299.5 actor_loss=0.0725 critic_loss=30215061549.5111 entropy=12.9617 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0443 front_blocked=0
|
|
[Eval 11020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440896.6 mean_steps=15.4
|
|
[Episode 11030] reward=-29878791.5 actor_loss=0.0858 critic_loss=35075091478.7556 entropy=12.9944 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0469 front_blocked=0
|
|
[Episode 11040] reward=-27283410.8 actor_loss=0.1263 critic_loss=32971684477.1556 entropy=12.9808 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0508 front_blocked=0
|
|
[Eval 11040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541202.4 mean_steps=12.9
|
|
[Episode 11050] reward=-53361968.3 actor_loss=0.1325 critic_loss=66197457578.6667 entropy=12.9745 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 11060] reward=-30999965.6 actor_loss=0.0759 critic_loss=37460002042.3111 entropy=12.9817 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0475 front_blocked=0
|
|
[Eval 11060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401546.7 mean_steps=15.6
|
|
[Episode 11070] reward=-44672210.4 actor_loss=0.2506 critic_loss=52350882065.0667 entropy=12.9951 approx_kl=0.0006 kl_stop=0 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 11080] reward=-33393411.6 actor_loss=0.1702 critic_loss=38703569851.7333 entropy=13.0014 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 11080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420494.5 mean_steps=15.8
|
|
[Episode 11090] reward=-35033180.5 actor_loss=0.1640 critic_loss=39745028733.1556 entropy=13.0248 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0612 front_blocked=0
|
|
[Episode 11100] reward=-36742964.5 actor_loss=0.1476 critic_loss=43296902530.8444 entropy=13.0337 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0632 front_blocked=0
|
|
[Eval 11100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530907.2 mean_steps=14.2
|
|
[Episode 11110] reward=-26179297.1 actor_loss=0.1257 critic_loss=29993233430.7556 entropy=13.0354 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 11120] reward=-30289808.5 actor_loss=0.0918 critic_loss=36191405260.8000 entropy=13.0375 approx_kl=0.0041 kl_stop=0 intervention_rate=0.0521 front_blocked=0
|
|
[Eval 11120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614150.1 mean_steps=13.1
|
|
[Episode 11130] reward=-58645425.0 actor_loss=0.1901 critic_loss=73558812808.5333 entropy=13.0506 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 11140] reward=-52822428.2 actor_loss=0.2605 critic_loss=64518213267.9111 entropy=13.0521 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 11140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-381734.3 mean_steps=15.8
|
|
[Episode 11150] reward=-39849754.9 actor_loss=0.1733 critic_loss=47122858348.0889 entropy=13.0614 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 11160] reward=-53895374.1 actor_loss=0.2023 critic_loss=65001134489.6000 entropy=13.0826 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 11160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440653.6 mean_steps=14.8
|
|
[Episode 11170] reward=-46777970.4 actor_loss=0.1934 critic_loss=56952240264.5333 entropy=13.0669 approx_kl=0.0053 kl_stop=0 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 11180] reward=-53748581.8 actor_loss=0.1643 critic_loss=64516469373.1556 entropy=13.0855 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 11180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-429693.7 mean_steps=13.7
|
|
[Episode 11190] reward=-40408345.4 actor_loss=0.2525 critic_loss=46925352504.8889 entropy=13.0794 approx_kl=0.0020 kl_stop=0 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 11200] reward=-62019131.3 actor_loss=0.2263 critic_loss=77809589907.9111 entropy=13.0911 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 11200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580275.4 mean_steps=13.0
|
|
[Episode 11210] reward=-52077420.2 actor_loss=0.2175 critic_loss=63100104612.9778 entropy=13.0970 approx_kl=0.0027 kl_stop=0 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 11220] reward=-42139564.0 actor_loss=0.2250 critic_loss=51221467044.9778 entropy=13.0938 approx_kl=0.0042 kl_stop=0 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 11220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503221.2 mean_steps=14.5
|
|
[Episode 11230] reward=-48221471.5 actor_loss=0.1328 critic_loss=60123086392.8889 entropy=13.0999 approx_kl=0.0030 kl_stop=0 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 11240] reward=-35296971.2 actor_loss=0.0685 critic_loss=41731443370.6667 entropy=13.1012 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0527 front_blocked=0
|
|
[Eval 11240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418349.9 mean_steps=15.9
|
|
[Episode 11250] reward=-54677825.8 actor_loss=0.1659 critic_loss=66216399576.1778 entropy=13.1158 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 11260] reward=-35515236.1 actor_loss=0.1379 critic_loss=41854235898.3111 entropy=13.1448 approx_kl=0.0043 kl_stop=0 intervention_rate=0.0592 front_blocked=0
|
|
[Eval 11260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-558900.3 mean_steps=14.9
|
|
[Episode 11270] reward=-28239547.7 actor_loss=0.0873 critic_loss=31461045179.7333 entropy=13.1584 approx_kl=0.0050 kl_stop=0 intervention_rate=0.0534 front_blocked=0
|
|
[Episode 11280] reward=-43605944.4 actor_loss=0.1322 critic_loss=53454746874.3111 entropy=13.1704 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0651 front_blocked=0
|
|
[Eval 11280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-379048.6 mean_steps=16.1
|
|
[Episode 11290] reward=-49415713.4 actor_loss=0.2603 critic_loss=60301070609.0667 entropy=13.1720 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 11300] reward=-41754673.9 actor_loss=0.1865 critic_loss=50448774849.4222 entropy=13.1769 approx_kl=0.0022 kl_stop=0 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 11300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555119.9 mean_steps=13.2
|
|
[Episode 11310] reward=-43809064.4 actor_loss=0.1452 critic_loss=52450805987.5556 entropy=13.1681 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 11320] reward=-29489508.3 actor_loss=0.1154 critic_loss=35036794971.0222 entropy=13.1787 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0534 front_blocked=0
|
|
[Eval 11320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410124.3 mean_steps=15.7
|
|
[Episode 11330] reward=-54107997.4 actor_loss=0.2771 critic_loss=64998782202.3111 entropy=13.1837 approx_kl=0.0021 kl_stop=0 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 11340] reward=-58225886.4 actor_loss=0.3018 critic_loss=69828118209.4222 entropy=13.2090 approx_kl=0.0035 kl_stop=0 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 11340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-358168.3 mean_steps=15.4
|
|
[Episode 11350] reward=-48960303.6 actor_loss=0.2307 critic_loss=59495070196.6222 entropy=13.2220 approx_kl=0.0041 kl_stop=0 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 11360] reward=-49891584.2 actor_loss=0.2514 critic_loss=58945626476.0889 entropy=13.2061 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 11360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563403.5 mean_steps=13.3
|
|
[Episode 11370] reward=-54117116.3 actor_loss=0.2598 critic_loss=66150018798.9333 entropy=13.2030 approx_kl=0.0028 kl_stop=0 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 11380] reward=-57748903.3 actor_loss=0.2166 critic_loss=71658355552.7111 entropy=13.2009 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 11380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-597226.6 mean_steps=13.8
|
|
[Episode 11390] reward=-37315797.8 actor_loss=0.2087 critic_loss=44940951916.0889 entropy=13.2070 approx_kl=0.0028 kl_stop=0 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 11400] reward=-64677515.5 actor_loss=0.1778 critic_loss=79319477998.9333 entropy=13.1979 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 11400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-400581.5 mean_steps=14.6
|
|
[Episode 11410] reward=-62523776.8 actor_loss=0.1626 critic_loss=79111332568.1778 entropy=13.2029 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 11420] reward=-41651028.7 actor_loss=0.2368 critic_loss=48713200799.2889 entropy=13.1985 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 11420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522618.0 mean_steps=14.0
|
|
[Episode 11430] reward=-47833106.2 actor_loss=0.2562 critic_loss=55838016944.3556 entropy=13.2032 approx_kl=0.0042 kl_stop=0 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 11440] reward=-50160496.6 actor_loss=0.1856 critic_loss=62851599200.7111 entropy=13.2006 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 11440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536267.9 mean_steps=13.2
|
|
[Episode 11450] reward=-69502736.9 actor_loss=0.2772 critic_loss=88633734303.2889 entropy=13.2019 approx_kl=0.0039 kl_stop=0 intervention_rate=0.0983 front_blocked=0
|
|
[Episode 11460] reward=-49978352.7 actor_loss=0.1555 critic_loss=60797677932.0889 entropy=13.2321 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 11460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508876.5 mean_steps=14.8
|
|
[Episode 11470] reward=-44835482.1 actor_loss=0.1797 critic_loss=54398083345.0667 entropy=13.2068 approx_kl=0.0023 kl_stop=0 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 11480] reward=-30707047.5 actor_loss=0.1495 critic_loss=37226436653.5111 entropy=13.2231 approx_kl=0.0018 kl_stop=0 intervention_rate=0.0579 front_blocked=0
|
|
[Eval 11480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566014.4 mean_steps=13.0
|
|
[Episode 11490] reward=-50735264.7 actor_loss=0.2218 critic_loss=60795185470.5778 entropy=13.2201 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 11500] reward=-48424085.4 actor_loss=0.2574 critic_loss=59929841755.0222 entropy=13.2462 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 11500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421061.2 mean_steps=15.4
|
|
[Episode 11510] reward=-52733466.3 actor_loss=0.1734 critic_loss=62919542101.3333 entropy=13.2372 approx_kl=0.0033 kl_stop=0 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 11520] reward=-54159365.9 actor_loss=0.2320 critic_loss=66130308027.7333 entropy=13.2236 approx_kl=0.0049 kl_stop=0 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 11520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434325.9 mean_steps=15.9
|
|
[Episode 11530] reward=-50970692.4 actor_loss=0.2112 critic_loss=61011652608.0000 entropy=13.2567 approx_kl=0.0024 kl_stop=0 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 11540] reward=-67279005.1 actor_loss=0.2620 critic_loss=82167685484.0889 entropy=13.2769 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1055 front_blocked=0
|
|
[Eval 11540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506513.5 mean_steps=14.1
|
|
[Episode 11550] reward=-55627756.0 actor_loss=0.2202 critic_loss=67522012501.3333 entropy=13.2794 approx_kl=0.0025 kl_stop=0 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 11560] reward=-67415520.6 actor_loss=0.2168 critic_loss=83009469189.6889 entropy=13.2820 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 11560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467972.7 mean_steps=15.3
|
|
[Episode 11570] reward=-59568498.2 actor_loss=0.2156 critic_loss=73232592531.9111 entropy=13.2983 approx_kl=0.0048 kl_stop=0 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 11580] reward=-59447997.1 actor_loss=0.2831 critic_loss=70696282612.6222 entropy=13.3094 approx_kl=0.0054 kl_stop=0 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 11580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-647138.8 mean_steps=12.7
|
|
[Episode 11590] reward=-45053482.9 actor_loss=0.1914 critic_loss=51590944267.3778 entropy=13.2990 approx_kl=0.0041 kl_stop=0 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 11600] reward=-57889358.4 actor_loss=0.2965 critic_loss=70570918252.0889 entropy=13.3071 approx_kl=0.0052 kl_stop=0 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 11600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383657.6 mean_steps=16.7
|
|
[Episode 11610] reward=-39649194.1 actor_loss=0.2414 critic_loss=46934551665.7778 entropy=13.3198 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 11620] reward=-44465558.9 actor_loss=0.2126 critic_loss=54767289685.3333 entropy=13.3050 approx_kl=0.0040 kl_stop=0 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 11620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474613.2 mean_steps=15.7
|
|
[Episode 11630] reward=-54135038.8 actor_loss=0.2252 critic_loss=69204190913.4222 entropy=13.2935 approx_kl=0.0029 kl_stop=0 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 11640] reward=-71415408.9 actor_loss=0.2965 critic_loss=86473045697.4222 entropy=13.3163 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 11640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530996.5 mean_steps=14.8
|
|
[Episode 11650] reward=-57022444.7 actor_loss=0.2726 critic_loss=70155979889.7778 entropy=13.3115 approx_kl=0.0032 kl_stop=0 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 11660] reward=-43181429.5 actor_loss=0.2400 critic_loss=51946742670.2222 entropy=13.2999 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 11660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400432.2 mean_steps=17.1
|
|
[Episode 11670] reward=-34806649.6 actor_loss=0.2075 critic_loss=40220441304.1778 entropy=13.3041 approx_kl=0.0037 kl_stop=0 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 11680] reward=-39123634.0 actor_loss=0.2642 critic_loss=44419977944.1778 entropy=13.3085 approx_kl=0.0049 kl_stop=0 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 11680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-612003.7 mean_steps=13.2
|
|
[Episode 11690] reward=-48086150.3 actor_loss=0.2038 critic_loss=57670499168.7111 entropy=13.3200 approx_kl=0.0034 kl_stop=0 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 11700] reward=-74220794.4 actor_loss=0.3412 critic_loss=90454708497.0667 entropy=13.3389 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 11700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547205.2 mean_steps=13.9
|
|
[Episode 11710] reward=-52841438.3 actor_loss=0.1446 critic_loss=64641944234.6667 entropy=13.3593 approx_kl=0.0066 kl_stop=0 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 11720] reward=-58101430.4 actor_loss=0.2887 critic_loss=70563321446.4000 entropy=13.3858 approx_kl=0.0045 kl_stop=0 intervention_rate=0.0983 front_blocked=0
|
|
[Eval 11720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510280.1 mean_steps=13.6
|
|
[Episode 11730] reward=-59323382.8 actor_loss=0.2760 critic_loss=73138701107.2000 entropy=13.3857 approx_kl=0.0051 kl_stop=0 intervention_rate=0.0996 front_blocked=0
|
|
[Episode 11740] reward=-70177032.9 actor_loss=0.2987 critic_loss=87547790222.2222 entropy=13.4102 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1068 front_blocked=0
|
|
[Eval 11740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579044.9 mean_steps=13.4
|
|
[Episode 11750] reward=-54775789.7 actor_loss=0.3456 critic_loss=65437422478.2222 entropy=13.4116 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1003 front_blocked=0
|
|
[Episode 11760] reward=-65801138.8 actor_loss=0.3541 critic_loss=82414797528.1778 entropy=13.4245 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Eval 11760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503797.7 mean_steps=13.6
|
|
[Episode 11770] reward=-66477215.5 actor_loss=0.2819 critic_loss=82022158791.1111 entropy=13.4496 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1003 front_blocked=0
|
|
[Episode 11780] reward=-58386219.0 actor_loss=0.2115 critic_loss=71266534013.1555 entropy=13.4468 approx_kl=0.0065 kl_stop=0 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 11780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452408.4 mean_steps=15.8
|
|
[Episode 11790] reward=-63258637.3 actor_loss=0.2722 critic_loss=75769820319.2889 entropy=13.4394 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1016 front_blocked=0
|
|
[Episode 11800] reward=-81307047.7 actor_loss=0.2528 critic_loss=101900033683.9111 entropy=13.4754 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 11800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563621.6 mean_steps=14.1
|
|
[Episode 11810] reward=-59085266.3 actor_loss=0.2561 critic_loss=71990128184.8889 entropy=13.5158 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 11820] reward=-68240885.9 actor_loss=0.2704 critic_loss=84500153594.3111 entropy=13.5227 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1016 front_blocked=0
|
|
[Eval 11820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-369749.5 mean_steps=17.5
|
|
[Episode 11830] reward=-64990345.0 actor_loss=0.2908 critic_loss=80272383453.8667 entropy=13.5203 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1048 front_blocked=0
|
|
[Episode 11840] reward=-58409248.9 actor_loss=0.2907 critic_loss=71227950694.4000 entropy=13.5432 approx_kl=0.0031 kl_stop=0 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 11840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506726.8 mean_steps=14.4
|
|
[Episode 11850] reward=-67929575.8 actor_loss=0.3119 critic_loss=81310334065.7778 entropy=13.5244 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Episode 11860] reward=-57043750.0 actor_loss=0.2794 critic_loss=68636607465.2444 entropy=13.5248 approx_kl=0.0051 kl_stop=0 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 11860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416640.1 mean_steps=15.0
|
|
[Episode 11870] reward=-71223452.6 actor_loss=0.3048 critic_loss=86392137136.3556 entropy=13.5352 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 11880] reward=-53014824.1 actor_loss=0.2276 critic_loss=66548975206.4000 entropy=13.5546 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 11880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532247.2 mean_steps=14.8
|
|
[Episode 11890] reward=-73750157.3 actor_loss=0.2944 critic_loss=89073129335.4667 entropy=13.5635 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Episode 11900] reward=-57205055.2 actor_loss=0.2374 critic_loss=69867294173.8667 entropy=13.5881 approx_kl=0.0040 kl_stop=0 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 11900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413622.7 mean_steps=15.8
|
|
[Episode 11910] reward=-63611559.3 actor_loss=0.3765 critic_loss=77056150641.7778 entropy=13.6100 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 11920] reward=-66234122.6 actor_loss=0.2447 critic_loss=80604409036.8000 entropy=13.6326 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1022 front_blocked=0
|
|
[Eval 11920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410642.6 mean_steps=16.5
|
|
[Episode 11930] reward=-75233540.2 actor_loss=0.3256 critic_loss=92157542035.9111 entropy=13.6401 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 11940] reward=-62225685.5 actor_loss=0.2295 critic_loss=78589057251.5556 entropy=13.6637 approx_kl=0.0041 kl_stop=0 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 11940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521293.8 mean_steps=14.3
|
|
[Episode 11950] reward=-60447496.2 actor_loss=0.1985 critic_loss=74265932868.2667 entropy=13.7113 approx_kl=0.0064 kl_stop=0 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 11960] reward=-67280279.9 actor_loss=0.3538 critic_loss=85781351628.8000 entropy=13.7270 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Eval 11960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-474487.1 mean_steps=13.3
|
|
[Episode 11970] reward=-67533635.4 actor_loss=0.3628 critic_loss=81836960426.6667 entropy=13.7573 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 11980] reward=-63929211.5 actor_loss=0.3145 critic_loss=77939104608.7111 entropy=13.7593 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 11980] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-289919.2 mean_steps=18.6
|
|
[Episode 11990] reward=-64925674.5 actor_loss=0.2777 critic_loss=77671705440.7111 entropy=13.7811 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1022 front_blocked=0
|
|
[Episode 12000] reward=-76944912.0 actor_loss=0.2713 critic_loss=97418758644.6222 entropy=13.7958 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 12000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445327.2 mean_steps=14.7
|
|
[Episode 12010] reward=-87043124.3 actor_loss=0.3329 critic_loss=107587174035.9111 entropy=13.8271 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 12020] reward=-82513496.6 actor_loss=0.3721 critic_loss=102448498460.4444 entropy=13.8278 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 12020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-594732.6 mean_steps=13.9
|
|
[Episode 12030] reward=-76725963.9 actor_loss=0.2790 critic_loss=95819218215.8222 entropy=13.8451 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 12040] reward=-59162739.6 actor_loss=0.2946 critic_loss=73943596418.8445 entropy=13.8420 approx_kl=0.0036 kl_stop=0 intervention_rate=0.0964 front_blocked=0
|
|
[Eval 12040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540258.3 mean_steps=13.4
|
|
[Episode 12050] reward=-75157373.0 actor_loss=0.3132 critic_loss=94310187372.0889 entropy=13.8717 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 12060] reward=-66246585.2 actor_loss=0.3596 critic_loss=83616349661.8667 entropy=13.8680 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 12060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-514117.8 mean_steps=14.1
|
|
[Episode 12070] reward=-66503217.0 actor_loss=0.3360 critic_loss=83271357053.1555 entropy=13.8912 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1061 front_blocked=0
|
|
[Episode 12080] reward=-50233109.4 actor_loss=0.2509 critic_loss=61517016450.8444 entropy=13.9188 approx_kl=0.0047 kl_stop=0 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 12080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-539928.0 mean_steps=12.6
|
|
[Episode 12090] reward=-62271602.6 actor_loss=0.3196 critic_loss=76678211174.4000 entropy=13.9116 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1074 front_blocked=0
|
|
[Episode 12100] reward=-59336712.4 actor_loss=0.2314 critic_loss=72625773499.7333 entropy=13.9208 approx_kl=0.0026 kl_stop=0 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 12100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403529.0 mean_steps=15.3
|
|
[Episode 12110] reward=-73399393.5 actor_loss=0.2890 critic_loss=92070532073.2444 entropy=13.9640 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1087 front_blocked=0
|
|
[Episode 12120] reward=-79046688.5 actor_loss=0.3183 critic_loss=98725880172.0889 entropy=13.9518 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 12120] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-328462.9 mean_steps=17.6
|
|
[Episode 12130] reward=-75411634.0 actor_loss=0.3092 critic_loss=91966871415.4667 entropy=13.9817 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 12140] reward=-77552494.0 actor_loss=0.2426 critic_loss=96202292701.8667 entropy=14.0171 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 12140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511361.9 mean_steps=14.1
|
|
[Episode 12150] reward=-87606200.6 actor_loss=0.2896 critic_loss=107866735684.2667 entropy=14.0358 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 12160] reward=-75587461.5 actor_loss=0.3199 critic_loss=91667551027.2000 entropy=14.0485 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 12160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474364.8 mean_steps=13.7
|
|
[Episode 12170] reward=-72705808.2 actor_loss=0.2997 critic_loss=88575569100.8000 entropy=14.0656 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Episode 12180] reward=-78872975.3 actor_loss=0.2487 critic_loss=96054617702.4000 entropy=14.0624 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 12180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526979.9 mean_steps=13.2
|
|
[Episode 12190] reward=-64848902.4 actor_loss=0.3102 critic_loss=79663691821.5111 entropy=14.0689 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1042 front_blocked=0
|
|
[Episode 12200] reward=-71521500.2 actor_loss=0.3116 critic_loss=86973853422.9333 entropy=14.0833 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Eval 12200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445744.2 mean_steps=14.3
|
|
[Episode 12210] reward=-72104622.0 actor_loss=0.2765 critic_loss=87678582237.8667 entropy=14.0759 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Episode 12220] reward=-79636712.0 actor_loss=0.3380 critic_loss=97960679469.5111 entropy=14.0927 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 12220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-461652.3 mean_steps=15.2
|
|
[Episode 12230] reward=-78584466.9 actor_loss=0.3419 critic_loss=95661287378.4889 entropy=14.0999 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 12240] reward=-77330144.3 actor_loss=0.3497 critic_loss=98847813176.8889 entropy=14.1299 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Eval 12240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-398468.1 mean_steps=16.8
|
|
[Episode 12250] reward=-70673099.2 actor_loss=0.3223 critic_loss=86289723300.9778 entropy=14.1411 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 12260] reward=-81925640.7 actor_loss=0.2523 critic_loss=106913038336.0000 entropy=14.1484 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 12260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-441741.5 mean_steps=13.6
|
|
[Episode 12270] reward=-76608918.7 actor_loss=0.3288 critic_loss=96488790789.6889 entropy=14.1739 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1146 front_blocked=0
|
|
[Episode 12280] reward=-68916732.2 actor_loss=0.3382 critic_loss=84947082717.8667 entropy=14.1839 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Eval 12280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407923.9 mean_steps=15.8
|
|
[Episode 12290] reward=-66027148.4 actor_loss=0.3055 critic_loss=85126903307.3778 entropy=14.2163 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1055 front_blocked=0
|
|
[Episode 12300] reward=-78601141.1 actor_loss=0.3216 critic_loss=101047126880.7111 entropy=14.2196 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 12300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-386944.0 mean_steps=13.8
|
|
[Episode 12310] reward=-83336937.6 actor_loss=0.2944 critic_loss=104989524332.0889 entropy=14.2278 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 12320] reward=-77248256.8 actor_loss=0.2707 critic_loss=97828343899.0222 entropy=14.2412 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Eval 12320] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-322534.3 mean_steps=15.9
|
|
[Episode 12330] reward=-79719202.0 actor_loss=0.2697 critic_loss=101988736659.9111 entropy=14.2534 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 12340] reward=-81246320.6 actor_loss=0.3612 critic_loss=104483420387.5556 entropy=14.2887 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 12340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-662944.6 mean_steps=12.4
|
|
[Episode 12350] reward=-72591562.1 actor_loss=0.3035 critic_loss=91894395153.0667 entropy=14.2782 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Episode 12360] reward=-91533960.6 actor_loss=0.3541 critic_loss=114191250773.3333 entropy=14.2901 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 12360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441840.9 mean_steps=14.3
|
|
[Episode 12370] reward=-70160105.8 actor_loss=0.3496 critic_loss=85120445826.8445 entropy=14.2912 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1081 front_blocked=0
|
|
[Episode 12380] reward=-78253644.1 actor_loss=0.3267 critic_loss=96516470465.4222 entropy=14.2951 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 12380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575582.2 mean_steps=13.3
|
|
[Episode 12390] reward=-89828443.5 actor_loss=0.2873 critic_loss=113358545078.0444 entropy=14.3148 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 12400] reward=-79679918.7 actor_loss=0.3144 critic_loss=99800065274.3111 entropy=14.3244 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Eval 12400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496044.8 mean_steps=13.8
|
|
[Episode 12410] reward=-59029555.7 actor_loss=0.3008 critic_loss=72691463873.4222 entropy=14.3452 approx_kl=0.0019 kl_stop=0 intervention_rate=0.0983 front_blocked=0
|
|
[Episode 12420] reward=-78053361.1 actor_loss=0.4315 critic_loss=96981931167.2889 entropy=14.3484 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 12420] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-720581.7 mean_steps=10.8
|
|
[Episode 12430] reward=-79295224.1 actor_loss=0.4066 critic_loss=100570692903.8222 entropy=14.3743 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 12440] reward=-81641400.8 actor_loss=0.3240 critic_loss=103253241947.0222 entropy=14.3778 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 12440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-341130.5 mean_steps=15.2
|
|
[Episode 12450] reward=-76221187.1 actor_loss=0.4196 critic_loss=95989949144.1778 entropy=14.3897 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 12460] reward=-83618184.2 actor_loss=0.2785 critic_loss=108548949606.4000 entropy=14.4081 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 12460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401944.6 mean_steps=16.1
|
|
[Episode 12470] reward=-70323151.2 actor_loss=0.2947 critic_loss=87585469599.2889 entropy=14.4176 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 12480] reward=-81074961.1 actor_loss=0.3317 critic_loss=104357709596.4444 entropy=14.4245 approx_kl=0.0009 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 12480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540381.1 mean_steps=14.4
|
|
[Episode 12490] reward=-73245637.9 actor_loss=0.2774 critic_loss=92919571342.2222 entropy=14.4191 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1068 front_blocked=0
|
|
[Episode 12500] reward=-76933402.5 actor_loss=0.3195 critic_loss=96797177901.5111 entropy=14.4261 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 12500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542601.3 mean_steps=14.2
|
|
[Episode 12510] reward=-64139445.2 actor_loss=0.3336 critic_loss=79550896173.5111 entropy=14.4390 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1022 front_blocked=0
|
|
[Episode 12520] reward=-80760137.0 actor_loss=0.4062 critic_loss=102188808965.6889 entropy=14.4639 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 12520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516556.8 mean_steps=13.8
|
|
[Episode 12530] reward=-84809072.5 actor_loss=0.3727 critic_loss=108474371094.7556 entropy=14.4621 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 12540] reward=-83625597.8 actor_loss=0.2768 critic_loss=105911374825.2444 entropy=14.4703 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 12540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502095.6 mean_steps=14.1
|
|
[Episode 12550] reward=-84590309.0 actor_loss=0.3437 critic_loss=108033719409.7778 entropy=14.4914 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 12560] reward=-92026862.9 actor_loss=0.3550 critic_loss=117774176347.0222 entropy=14.5075 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 12560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432135.8 mean_steps=14.8
|
|
[Episode 12570] reward=-78286616.6 actor_loss=0.3213 critic_loss=99345365765.6889 entropy=14.5027 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1139 front_blocked=0
|
|
[Episode 12580] reward=-82691263.1 actor_loss=0.2404 critic_loss=104750963097.6000 entropy=14.5307 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 12580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-598598.2 mean_steps=11.8
|
|
[Episode 12590] reward=-92474352.8 actor_loss=0.2875 critic_loss=120751027541.3333 entropy=14.5521 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 12600] reward=-72027903.1 actor_loss=0.3443 critic_loss=91270724175.6444 entropy=14.5471 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 12600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-535137.8 mean_steps=12.7
|
|
[Episode 12610] reward=-85850264.7 actor_loss=0.3846 critic_loss=109682425856.0000 entropy=14.5622 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 12620] reward=-80724765.9 actor_loss=0.2873 critic_loss=101105460656.3556 entropy=14.5856 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 12620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-357765.3 mean_steps=15.1
|
|
[Episode 12630] reward=-80735810.4 actor_loss=0.3699 critic_loss=103343220326.4000 entropy=14.5960 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 12640] reward=-79321518.2 actor_loss=0.2886 critic_loss=102033154412.0889 entropy=14.6023 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Eval 12640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-511796.0 mean_steps=12.3
|
|
[Episode 12650] reward=-77877571.3 actor_loss=0.3553 critic_loss=100077278367.2889 entropy=14.6225 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 12660] reward=-72120722.9 actor_loss=0.2976 critic_loss=92939431662.9333 entropy=14.6138 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1068 front_blocked=0
|
|
[Eval 12660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600267.4 mean_steps=13.1
|
|
[Episode 12670] reward=-88002025.6 actor_loss=0.3006 critic_loss=111559387272.5333 entropy=14.6404 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 12680] reward=-86302028.8 actor_loss=0.3313 critic_loss=110748310095.6444 entropy=14.6463 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 12680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516320.8 mean_steps=14.4
|
|
[Episode 12690] reward=-86760524.4 actor_loss=0.2569 critic_loss=111786356644.9778 entropy=14.6658 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 12700] reward=-74283138.6 actor_loss=0.3038 critic_loss=94385584355.5556 entropy=14.6919 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 12700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-300743.1 mean_steps=16.4
|
|
[Episode 12710] reward=-86195122.3 actor_loss=0.3348 critic_loss=110883056298.6667 entropy=14.6986 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 12720] reward=-79386139.3 actor_loss=0.3553 critic_loss=101987578311.1111 entropy=14.7170 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 12720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-619775.7 mean_steps=12.2
|
|
[Episode 12730] reward=-82427405.8 actor_loss=0.3890 critic_loss=104536820576.7111 entropy=14.7312 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 12740] reward=-91978050.0 actor_loss=0.2468 critic_loss=123830400887.4667 entropy=14.7520 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Eval 12740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-308033.4 mean_steps=16.4
|
|
[Episode 12750] reward=-84073865.4 actor_loss=0.3744 critic_loss=107338486215.1111 entropy=14.7790 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 12760] reward=-92512854.3 actor_loss=0.3919 critic_loss=119174891656.5333 entropy=14.7940 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 12760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464288.5 mean_steps=14.7
|
|
[Episode 12770] reward=-74912064.8 actor_loss=0.3651 critic_loss=96538602882.8445 entropy=14.8059 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 12780] reward=-88190069.9 actor_loss=0.2290 critic_loss=113941242220.0889 entropy=14.8060 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Eval 12780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527951.2 mean_steps=14.3
|
|
[Episode 12790] reward=-80688759.2 actor_loss=0.3385 critic_loss=103920380950.7556 entropy=14.8174 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 12800] reward=-80871319.6 actor_loss=0.3741 critic_loss=104749033608.5333 entropy=14.8226 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 12800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481315.8 mean_steps=14.9
|
|
[Episode 12810] reward=-86271383.9 actor_loss=0.3484 critic_loss=111663951234.8445 entropy=14.8237 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 12820] reward=-95996377.3 actor_loss=0.3042 critic_loss=127223392028.4444 entropy=14.8193 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 12820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-513790.9 mean_steps=15.1
|
|
[Episode 12830] reward=-72203639.2 actor_loss=0.3429 critic_loss=92044833587.2000 entropy=14.8129 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1126 front_blocked=0
|
|
[Episode 12840] reward=-81999814.1 actor_loss=0.3610 critic_loss=106456702065.7778 entropy=14.8179 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 12840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533816.5 mean_steps=14.0
|
|
[Episode 12850] reward=-76453531.8 actor_loss=0.3555 critic_loss=99021803428.9778 entropy=14.8430 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 12860] reward=-89885871.6 actor_loss=0.2709 critic_loss=115175635717.6889 entropy=14.8521 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 12860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368946.0 mean_steps=15.9
|
|
[Episode 12870] reward=-81636507.6 actor_loss=0.4321 critic_loss=110427438011.7333 entropy=14.8479 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 12880] reward=-81453859.4 actor_loss=0.3426 critic_loss=105842708935.1111 entropy=14.8619 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Eval 12880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437702.8 mean_steps=14.3
|
|
[Episode 12890] reward=-89246033.3 actor_loss=0.2741 critic_loss=119240205744.3556 entropy=14.8763 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 12900] reward=-84208235.3 actor_loss=0.3235 critic_loss=108601400797.8667 entropy=14.8718 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 12900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517553.9 mean_steps=15.0
|
|
[Episode 12910] reward=-88227298.1 actor_loss=0.3726 critic_loss=114458957505.4222 entropy=14.8652 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 12920] reward=-82897628.5 actor_loss=0.2320 critic_loss=108066511439.6444 entropy=14.8490 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1100 front_blocked=0
|
|
[Eval 12920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474696.5 mean_steps=13.8
|
|
[Episode 12930] reward=-78014643.8 actor_loss=0.3458 critic_loss=98115807550.5778 entropy=14.8587 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 12940] reward=-86021284.6 actor_loss=0.3667 critic_loss=111913365959.1111 entropy=14.8585 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 12940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467320.7 mean_steps=14.1
|
|
[Episode 12950] reward=-81277929.7 actor_loss=0.2483 critic_loss=105335873900.0889 entropy=14.8613 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Episode 12960] reward=-81574318.2 actor_loss=0.3042 critic_loss=105596499103.2889 entropy=14.8793 approx_kl=0.0012 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 12960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363600.6 mean_steps=15.6
|
|
[Episode 12970] reward=-80120013.8 actor_loss=0.3362 critic_loss=100573036726.0444 entropy=14.8780 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 12980] reward=-86340184.4 actor_loss=0.2560 critic_loss=110127003147.3778 entropy=14.8537 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1133 front_blocked=0
|
|
[Eval 12980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635486.7 mean_steps=11.8
|
|
[Episode 12990] reward=-93683999.8 actor_loss=0.3468 critic_loss=121437863571.9111 entropy=14.8580 approx_kl=0.0014 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 13000] reward=-69419330.2 actor_loss=0.4004 critic_loss=86870483308.0889 entropy=14.8627 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 13000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449634.8 mean_steps=14.4
|
|
[Episode 13010] reward=-82996032.1 actor_loss=0.3305 critic_loss=105651975691.3778 entropy=14.8806 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 13020] reward=-94560640.3 actor_loss=0.3640 critic_loss=120661416891.7333 entropy=14.8838 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 13020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439457.0 mean_steps=15.6
|
|
[Episode 13030] reward=-87491185.6 actor_loss=0.3039 critic_loss=113226284873.9556 entropy=14.8851 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 13040] reward=-94582853.9 actor_loss=0.3632 critic_loss=124082637846.7556 entropy=14.9153 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 13040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486152.1 mean_steps=13.8
|
|
[Episode 13050] reward=-81124404.3 actor_loss=0.1931 critic_loss=103909503431.1111 entropy=14.9258 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1035 front_blocked=0
|
|
[Episode 13060] reward=-83047247.3 actor_loss=0.4168 critic_loss=105297627272.5333 entropy=14.9256 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 13060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-362418.9 mean_steps=15.7
|
|
[Episode 13070] reward=-85584014.3 actor_loss=0.3570 critic_loss=111087848652.8000 entropy=14.9174 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 13080] reward=-84521430.5 actor_loss=0.3281 critic_loss=111498438064.3556 entropy=14.9376 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 13080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-666416.5 mean_steps=12.0
|
|
[Episode 13090] reward=-74998647.6 actor_loss=0.4680 critic_loss=95013035485.8667 entropy=14.9638 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 13100] reward=-84126801.7 actor_loss=0.2596 critic_loss=109844272651.3778 entropy=14.9708 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1094 front_blocked=0
|
|
[Eval 13100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564274.4 mean_steps=13.5
|
|
[Episode 13110] reward=-85907927.9 actor_loss=0.3349 critic_loss=110464811736.1778 entropy=14.9716 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 13120] reward=-72213020.6 actor_loss=0.4076 critic_loss=91396487941.6889 entropy=14.9693 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 13120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568962.4 mean_steps=13.4
|
|
[Episode 13130] reward=-78328389.1 actor_loss=0.3201 critic_loss=98848193467.7333 entropy=14.9773 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Episode 13140] reward=-84862631.2 actor_loss=0.3308 critic_loss=109104868010.6667 entropy=15.0007 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 13140] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642823.4 mean_steps=11.8
|
|
[Episode 13150] reward=-88116744.9 actor_loss=0.3256 critic_loss=115640660969.2444 entropy=15.0141 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 13160] reward=-88962413.8 actor_loss=0.4367 critic_loss=113869219066.3111 entropy=15.0062 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 13160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451577.2 mean_steps=15.7
|
|
[Episode 13170] reward=-84710198.7 actor_loss=0.2496 critic_loss=111815333387.3778 entropy=15.0303 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1107 front_blocked=0
|
|
[Episode 13180] reward=-95364196.7 actor_loss=0.3019 critic_loss=124296569924.2667 entropy=15.0103 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 13180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465731.0 mean_steps=14.7
|
|
[Episode 13190] reward=-96123435.5 actor_loss=0.3044 critic_loss=117418585838.9333 entropy=14.9937 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 13200] reward=-83622139.7 actor_loss=0.4636 critic_loss=110855484211.2000 entropy=14.9865 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 13200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483628.5 mean_steps=14.8
|
|
[Episode 13210] reward=-91585869.0 actor_loss=0.3784 critic_loss=120511270638.9333 entropy=15.0087 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 13220] reward=-92457123.8 actor_loss=0.3585 critic_loss=120909177924.2667 entropy=15.0185 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 13220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453328.1 mean_steps=14.6
|
|
[Episode 13230] reward=-92700024.4 actor_loss=0.3105 critic_loss=120585477233.7778 entropy=15.0348 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 13240] reward=-90824433.2 actor_loss=0.4387 critic_loss=118221533457.0667 entropy=15.0501 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 13240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521897.0 mean_steps=14.2
|
|
[Episode 13250] reward=-72320939.3 actor_loss=0.3524 critic_loss=93166251030.7556 entropy=15.0597 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1113 front_blocked=0
|
|
[Episode 13260] reward=-94139835.0 actor_loss=0.3891 critic_loss=121512692212.6222 entropy=15.0640 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 13260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485450.6 mean_steps=14.3
|
|
[Episode 13270] reward=-98894601.9 actor_loss=0.4256 critic_loss=130672457227.3778 entropy=15.0802 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 13280] reward=-84803481.9 actor_loss=0.3822 critic_loss=109328937961.2444 entropy=15.0983 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 13280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604258.1 mean_steps=13.0
|
|
[Episode 13290] reward=-86932173.9 actor_loss=0.3049 critic_loss=112224262553.6000 entropy=15.1087 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1159 front_blocked=0
|
|
[Episode 13300] reward=-95275052.4 actor_loss=0.2988 critic_loss=125391005832.5333 entropy=15.1167 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 13300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400240.9 mean_steps=15.2
|
|
[Episode 13310] reward=-87677581.2 actor_loss=0.4167 critic_loss=116182433063.8222 entropy=15.1173 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 13320] reward=-89899978.6 actor_loss=0.3498 critic_loss=118809775308.8000 entropy=15.1309 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 13320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371822.0 mean_steps=16.2
|
|
[Episode 13330] reward=-87990854.7 actor_loss=0.2672 critic_loss=116413228100.2667 entropy=15.1404 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 13340] reward=-79185024.7 actor_loss=0.3610 critic_loss=100374644326.4000 entropy=15.1645 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 13340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353311.4 mean_steps=16.8
|
|
[Episode 13350] reward=-85680662.8 actor_loss=0.3546 critic_loss=113848939861.3333 entropy=15.1666 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 13360] reward=-86972408.1 actor_loss=0.4065 critic_loss=112293948984.8889 entropy=15.1738 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 13360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437203.7 mean_steps=14.9
|
|
[Episode 13370] reward=-88747130.7 actor_loss=0.4648 critic_loss=116243072068.2667 entropy=15.1892 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 13380] reward=-84536496.0 actor_loss=0.3929 critic_loss=106744185924.2667 entropy=15.2167 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 13380] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620113.3 mean_steps=11.8
|
|
[Episode 13390] reward=-90411566.3 actor_loss=0.2605 critic_loss=117925646244.9778 entropy=15.2109 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1120 front_blocked=0
|
|
[Episode 13400] reward=-90830013.1 actor_loss=0.3308 critic_loss=118013754299.7333 entropy=15.1955 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 13400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525595.5 mean_steps=13.6
|
|
[Episode 13410] reward=-87595493.7 actor_loss=0.3603 critic_loss=111185844451.5556 entropy=15.2007 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 13420] reward=-80132663.7 actor_loss=0.5092 critic_loss=103977012974.9333 entropy=15.2044 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 13420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516552.2 mean_steps=13.2
|
|
[Episode 13430] reward=-84569540.3 actor_loss=0.4505 critic_loss=110117955902.5778 entropy=15.2152 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 13440] reward=-80106201.4 actor_loss=0.3753 critic_loss=102752660684.8000 entropy=15.2055 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 13440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553670.3 mean_steps=13.8
|
|
[Episode 13450] reward=-89444000.7 actor_loss=0.2486 critic_loss=116611829304.8889 entropy=15.2156 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 13460] reward=-97747397.8 actor_loss=0.2497 critic_loss=125289899986.4889 entropy=15.2272 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 13460] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-355381.2 mean_steps=17.1
|
|
[Episode 13470] reward=-93611198.0 actor_loss=0.2702 critic_loss=124539081523.2000 entropy=15.2381 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 13480] reward=-93610441.3 actor_loss=0.3163 critic_loss=121399288718.2222 entropy=15.2486 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 13480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534256.5 mean_steps=13.6
|
|
[Episode 13490] reward=-95800283.4 actor_loss=0.2829 critic_loss=127286318239.2889 entropy=15.2620 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 13500] reward=-88050131.9 actor_loss=0.4310 critic_loss=117188138689.4222 entropy=15.2805 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 13500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551719.1 mean_steps=12.6
|
|
[Episode 13510] reward=-82465105.2 actor_loss=0.3843 critic_loss=108291978581.3333 entropy=15.3056 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 13520] reward=-94736836.5 actor_loss=0.3766 critic_loss=123999234184.5333 entropy=15.2855 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 13520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478442.7 mean_steps=15.2
|
|
[Episode 13530] reward=-93605273.2 actor_loss=0.2861 critic_loss=122114989169.7778 entropy=15.2987 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 13540] reward=-98852738.8 actor_loss=0.3893 critic_loss=131015455721.2444 entropy=15.3144 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 13540] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-583098.5 mean_steps=10.8
|
|
[Episode 13550] reward=-97273938.8 actor_loss=0.2434 critic_loss=130962594383.6444 entropy=15.3121 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 13560] reward=-78611923.2 actor_loss=0.4806 critic_loss=105181267740.4444 entropy=15.3326 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 13560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541770.0 mean_steps=13.6
|
|
[Episode 13570] reward=-101373420.7 actor_loss=0.3750 critic_loss=136344269801.2444 entropy=15.3303 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 13580] reward=-85463490.7 actor_loss=0.3530 critic_loss=110064928722.4889 entropy=15.3444 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 13580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502859.2 mean_steps=14.4
|
|
[Episode 13590] reward=-97852114.3 actor_loss=0.3114 critic_loss=131508905756.4444 entropy=15.3551 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 13600] reward=-87990413.4 actor_loss=0.3905 critic_loss=114938188959.2889 entropy=15.3713 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 13600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509320.2 mean_steps=13.9
|
|
[Episode 13610] reward=-100865427.8 actor_loss=0.3327 critic_loss=129356809102.2222 entropy=15.3590 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 13620] reward=-90764127.5 actor_loss=0.3128 critic_loss=117597854651.7333 entropy=15.3548 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 13620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463256.7 mean_steps=14.8
|
|
[Episode 13630] reward=-90113151.7 actor_loss=0.3577 critic_loss=116618887350.0444 entropy=15.3934 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 13640] reward=-92986290.9 actor_loss=0.2961 critic_loss=122318694354.4889 entropy=15.4150 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 13640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578881.2 mean_steps=12.9
|
|
[Episode 13650] reward=-89679872.6 actor_loss=0.3179 critic_loss=120008666134.7556 entropy=15.4253 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 13660] reward=-95418203.6 actor_loss=0.4039 critic_loss=124292574230.7556 entropy=15.4534 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 13660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417353.2 mean_steps=15.7
|
|
[Episode 13670] reward=-89030302.1 actor_loss=0.3358 critic_loss=115939013791.2889 entropy=15.4444 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 13680] reward=-91002273.3 actor_loss=0.3848 critic_loss=118510871256.1778 entropy=15.4462 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 13680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442565.6 mean_steps=14.6
|
|
[Episode 13690] reward=-90418551.7 actor_loss=0.3499 critic_loss=119290470035.9111 entropy=15.4521 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 13700] reward=-104374137.6 actor_loss=0.3231 critic_loss=139617212097.4222 entropy=15.4678 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 13700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451528.1 mean_steps=14.3
|
|
[Episode 13710] reward=-100704749.9 actor_loss=0.2615 critic_loss=132621353142.0444 entropy=15.4945 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 13720] reward=-102226778.5 actor_loss=0.3572 critic_loss=134076204646.4000 entropy=15.5028 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 13720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517517.8 mean_steps=13.2
|
|
[Episode 13730] reward=-100661408.8 actor_loss=0.3075 critic_loss=133938034369.4222 entropy=15.5161 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 13740] reward=-100636155.9 actor_loss=0.3184 critic_loss=130935372003.5556 entropy=15.5295 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 13740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495208.5 mean_steps=14.6
|
|
[Episode 13750] reward=-98169074.3 actor_loss=0.3177 critic_loss=131508902297.6000 entropy=15.5211 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 13760] reward=-86185295.9 actor_loss=0.3213 critic_loss=113397616731.0222 entropy=15.5266 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Eval 13760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409455.7 mean_steps=14.9
|
|
[Episode 13770] reward=-101569645.8 actor_loss=0.3407 critic_loss=132484288785.0667 entropy=15.4923 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 13780] reward=-104493684.2 actor_loss=0.3085 critic_loss=138740874717.8667 entropy=15.4978 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 13780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490067.1 mean_steps=13.8
|
|
[Episode 13790] reward=-99602378.5 actor_loss=0.3496 critic_loss=131285507185.7778 entropy=15.5025 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 13800] reward=-101835516.3 actor_loss=0.2755 critic_loss=129382769550.2222 entropy=15.5004 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 13800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487162.1 mean_steps=13.9
|
|
[Episode 13810] reward=-98539186.9 actor_loss=0.4422 critic_loss=131774088260.2667 entropy=15.5188 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 13820] reward=-96574967.9 actor_loss=0.4039 critic_loss=128131743561.9556 entropy=15.5116 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 13820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-483865.5 mean_steps=12.1
|
|
[Episode 13830] reward=-91734479.1 actor_loss=0.3986 critic_loss=117059158016.0000 entropy=15.4984 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 13840] reward=-86486779.3 actor_loss=0.4166 critic_loss=113449264560.3556 entropy=15.5180 approx_kl=0.0010 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 13840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547193.3 mean_steps=13.1
|
|
[Episode 13850] reward=-94871105.6 actor_loss=0.3136 critic_loss=125178974481.0667 entropy=15.5307 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 13860] reward=-94262472.3 actor_loss=0.3837 critic_loss=122210189858.1333 entropy=15.5445 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 13860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509086.4 mean_steps=13.9
|
|
[Episode 13870] reward=-104304642.0 actor_loss=0.4439 critic_loss=138331521570.1333 entropy=15.5602 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 13880] reward=-98700971.6 actor_loss=0.3258 critic_loss=130127212999.1111 entropy=15.5721 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 13880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449299.5 mean_steps=15.6
|
|
[Episode 13890] reward=-89016467.3 actor_loss=0.3500 critic_loss=117450670262.0444 entropy=15.5686 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 13900] reward=-93457581.0 actor_loss=0.3750 critic_loss=124847032820.6222 entropy=15.5694 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 13900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603600.4 mean_steps=12.2
|
|
[Episode 13910] reward=-98099957.4 actor_loss=0.3059 critic_loss=128595331936.7111 entropy=15.5773 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 13920] reward=-94227390.9 actor_loss=0.3059 critic_loss=125037819949.5111 entropy=15.5932 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 13920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-417085.8 mean_steps=14.7
|
|
[Episode 13930] reward=-95683160.2 actor_loss=0.3526 critic_loss=128075026340.9778 entropy=15.5948 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 13940] reward=-102613740.4 actor_loss=0.2273 critic_loss=141445618619.7333 entropy=15.5909 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Eval 13940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-536103.3 mean_steps=12.4
|
|
[Episode 13950] reward=-98952398.0 actor_loss=0.4051 critic_loss=131612907747.5556 entropy=15.5867 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 13960] reward=-96841267.8 actor_loss=0.3344 critic_loss=132228686916.2667 entropy=15.5809 approx_kl=0.0015 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 13960] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-734237.3 mean_steps=11.8
|
|
[Episode 13970] reward=-95271379.7 actor_loss=0.4004 critic_loss=124151672740.9778 entropy=15.5470 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 13980] reward=-97599753.7 actor_loss=0.3532 critic_loss=128923456489.2444 entropy=15.5285 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 13980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-372711.7 mean_steps=16.1
|
|
[Episode 13990] reward=-95843438.1 actor_loss=0.3365 critic_loss=129252604131.5556 entropy=15.5478 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 14000] reward=-94118116.0 actor_loss=0.3404 critic_loss=122904119432.5333 entropy=15.5423 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 14000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563656.3 mean_steps=13.6
|
|
[Episode 14010] reward=-105573275.6 actor_loss=0.3053 critic_loss=138160658477.5111 entropy=15.5480 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 14020] reward=-95674000.6 actor_loss=0.3696 critic_loss=125770792777.9556 entropy=15.5632 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 14020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545579.2 mean_steps=12.8
|
|
[Episode 14030] reward=-92035770.5 actor_loss=0.3828 critic_loss=118182182001.7778 entropy=15.5511 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 14040] reward=-98759547.8 actor_loss=0.3362 critic_loss=128403887809.4222 entropy=15.5540 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 14040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530118.0 mean_steps=13.3
|
|
[Episode 14050] reward=-96885175.1 actor_loss=0.2694 critic_loss=127358732970.6667 entropy=15.5524 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 14060] reward=-93515919.4 actor_loss=0.4623 critic_loss=126427290009.6000 entropy=15.5726 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 14060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591033.1 mean_steps=13.8
|
|
[Episode 14070] reward=-96561578.3 actor_loss=0.3075 critic_loss=127051035261.1555 entropy=15.5785 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 14080] reward=-96185360.5 actor_loss=0.3645 critic_loss=122457858776.1778 entropy=15.5958 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 14080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578830.8 mean_steps=13.8
|
|
[Episode 14090] reward=-91655545.6 actor_loss=0.4825 critic_loss=120896658181.6889 entropy=15.6093 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 14100] reward=-89549539.0 actor_loss=0.3190 critic_loss=116453429156.9778 entropy=15.6388 approx_kl=0.0011 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 14100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-553926.9 mean_steps=13.0
|
|
[Episode 14110] reward=-97973169.9 actor_loss=0.3541 critic_loss=132028332623.6444 entropy=15.6479 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 14120] reward=-101849298.1 actor_loss=0.2862 critic_loss=133208022857.9556 entropy=15.6563 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 14120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438952.1 mean_steps=14.8
|
|
[Episode 14130] reward=-98193397.8 actor_loss=0.2992 critic_loss=128509938346.6667 entropy=15.6574 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 14140] reward=-96107627.6 actor_loss=0.3848 critic_loss=124493354507.3778 entropy=15.6731 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 14140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483995.7 mean_steps=16.1
|
|
[Episode 14150] reward=-86083627.8 actor_loss=0.3452 critic_loss=114407122898.4889 entropy=15.6663 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1178 front_blocked=0
|
|
[Episode 14160] reward=-88726166.0 actor_loss=0.3318 critic_loss=115681663021.5111 entropy=15.6522 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 14160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-447012.7 mean_steps=16.4
|
|
[Episode 14170] reward=-100959246.0 actor_loss=0.2609 critic_loss=132510486619.0222 entropy=15.6585 approx_kl=0.0019 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 14180] reward=-91701795.4 actor_loss=0.4053 critic_loss=121126515507.2000 entropy=15.6748 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 14180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-648677.9 mean_steps=13.2
|
|
[Episode 14190] reward=-96711830.4 actor_loss=0.3283 critic_loss=130959960746.6667 entropy=15.6779 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 14200] reward=-97805121.0 actor_loss=0.3982 critic_loss=126544550661.6889 entropy=15.6859 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 14200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467572.7 mean_steps=15.1
|
|
[Episode 14210] reward=-94454846.0 actor_loss=0.3109 critic_loss=122596158486.7556 entropy=15.6974 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 14220] reward=-102577501.1 actor_loss=0.3656 critic_loss=134856974153.9556 entropy=15.6898 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 14220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431953.7 mean_steps=14.9
|
|
[Episode 14230] reward=-106693887.6 actor_loss=0.3946 critic_loss=141388643259.7333 entropy=15.6896 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 14240] reward=-100568036.6 actor_loss=0.4466 critic_loss=137249303756.8000 entropy=15.6989 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 14240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490276.9 mean_steps=14.3
|
|
[Episode 14250] reward=-101853142.8 actor_loss=0.4064 critic_loss=131479998737.0667 entropy=15.6941 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 14260] reward=-88967355.0 actor_loss=0.3137 critic_loss=115149377262.9333 entropy=15.6888 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1165 front_blocked=0
|
|
[Eval 14260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614039.2 mean_steps=13.3
|
|
[Episode 14270] reward=-99810109.2 actor_loss=0.3501 critic_loss=132308813960.5333 entropy=15.7054 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 14280] reward=-100037750.3 actor_loss=0.2971 critic_loss=134757867155.9111 entropy=15.6887 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 14280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531801.9 mean_steps=14.3
|
|
[Episode 14290] reward=-106961122.1 actor_loss=0.3778 critic_loss=143953482365.1555 entropy=15.7070 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 14300] reward=-91124631.5 actor_loss=0.4961 critic_loss=117855009541.6889 entropy=15.7177 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 14300] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-278540.7 mean_steps=17.6
|
|
[Episode 14310] reward=-96339091.0 actor_loss=0.3550 critic_loss=123918011232.7111 entropy=15.7071 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 14320] reward=-104822323.8 actor_loss=0.3084 critic_loss=140127500879.6444 entropy=15.6916 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 14320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553354.5 mean_steps=13.6
|
|
[Episode 14330] reward=-97738593.4 actor_loss=0.3801 critic_loss=124736314299.7333 entropy=15.7140 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 14340] reward=-99365366.5 actor_loss=0.3332 critic_loss=132034199369.9556 entropy=15.7029 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 14340] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-240696.0 mean_steps=18.6
|
|
[Episode 14350] reward=-104356363.8 actor_loss=0.3719 critic_loss=137521981758.5778 entropy=15.6948 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 14360] reward=-95141968.7 actor_loss=0.3889 critic_loss=126323095415.4667 entropy=15.7208 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 14360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504787.1 mean_steps=14.2
|
|
[Episode 14370] reward=-100465835.6 actor_loss=0.3363 critic_loss=132125010238.5778 entropy=15.7135 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 14380] reward=-98919482.0 actor_loss=0.4056 critic_loss=132182857773.5111 entropy=15.7268 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 14380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-538014.9 mean_steps=12.7
|
|
[Episode 14390] reward=-98811968.7 actor_loss=0.4396 critic_loss=131011089385.2444 entropy=15.7134 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 14400] reward=-106732431.9 actor_loss=0.2351 critic_loss=142446750378.6667 entropy=15.7199 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 14400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559098.9 mean_steps=12.7
|
|
[Episode 14410] reward=-99437140.4 actor_loss=0.4552 critic_loss=130427298156.0889 entropy=15.7313 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 14420] reward=-100454359.1 actor_loss=0.3217 critic_loss=131870729466.3111 entropy=15.7432 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 14420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579351.4 mean_steps=12.8
|
|
[Episode 14430] reward=-100189999.9 actor_loss=0.3835 critic_loss=134339069178.3111 entropy=15.7467 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 14440] reward=-102951728.2 actor_loss=0.3598 critic_loss=140701625184.7111 entropy=15.7508 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 14440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408053.4 mean_steps=15.5
|
|
[Episode 14450] reward=-89188276.9 actor_loss=0.2954 critic_loss=117832191180.8000 entropy=15.7764 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 14460] reward=-99795577.6 actor_loss=0.4178 critic_loss=135119040785.0667 entropy=15.7788 approx_kl=0.0011 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 14460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499402.4 mean_steps=14.2
|
|
[Episode 14470] reward=-103981502.3 actor_loss=0.2686 critic_loss=139626840974.2222 entropy=15.7758 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 14480] reward=-103954631.7 actor_loss=0.3830 critic_loss=142529097636.9778 entropy=15.7794 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 14480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495308.2 mean_steps=14.1
|
|
[Episode 14490] reward=-100218887.1 actor_loss=0.4050 critic_loss=133192999458.1333 entropy=15.7646 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 14500] reward=-95441564.3 actor_loss=0.4186 critic_loss=124291932160.0000 entropy=15.7460 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 14500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486908.0 mean_steps=14.3
|
|
[Episode 14510] reward=-107786827.3 actor_loss=0.3145 critic_loss=145292291458.8445 entropy=15.7422 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 14520] reward=-103024154.5 actor_loss=0.3404 critic_loss=136673243591.1111 entropy=15.7378 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 14520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431059.1 mean_steps=14.4
|
|
[Episode 14530] reward=-97293478.5 actor_loss=0.4017 critic_loss=128379759456.7111 entropy=15.7360 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 14540] reward=-94491667.1 actor_loss=0.3323 critic_loss=122281858389.3333 entropy=15.7247 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 14540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452491.7 mean_steps=15.9
|
|
[Episode 14550] reward=-101601044.9 actor_loss=0.2789 critic_loss=134949210976.7111 entropy=15.7290 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 14560] reward=-99078630.7 actor_loss=0.4209 critic_loss=132525143017.2444 entropy=15.7247 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 14560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513791.7 mean_steps=14.4
|
|
[Episode 14570] reward=-96944564.2 actor_loss=0.3842 critic_loss=126619911964.4444 entropy=15.7250 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 14580] reward=-97115868.0 actor_loss=0.2986 critic_loss=129174843281.2973 entropy=15.7251 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 14580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419583.5 mean_steps=14.7
|
|
[Episode 14590] reward=-93802643.1 actor_loss=0.3560 critic_loss=121994783766.7556 entropy=15.7277 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 14600] reward=-92782745.1 actor_loss=0.3151 critic_loss=127192673302.7556 entropy=15.7117 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 14600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438711.0 mean_steps=14.8
|
|
[Episode 14610] reward=-101962997.5 actor_loss=0.3056 critic_loss=135067727371.3778 entropy=15.6990 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 14620] reward=-100057203.6 actor_loss=0.3715 critic_loss=134316808419.5556 entropy=15.7081 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 14620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488670.8 mean_steps=14.3
|
|
[Episode 14630] reward=-95940043.2 actor_loss=0.3845 critic_loss=125946926239.2889 entropy=15.7037 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 14640] reward=-93682319.1 actor_loss=0.2732 critic_loss=122441661371.7333 entropy=15.7045 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 14640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-469630.7 mean_steps=13.2
|
|
[Episode 14650] reward=-92169366.6 actor_loss=0.4308 critic_loss=117735882934.0444 entropy=15.7260 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 14660] reward=-94811412.5 actor_loss=0.3183 critic_loss=127968195925.3333 entropy=15.7349 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 14660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424851.6 mean_steps=15.9
|
|
[Episode 14670] reward=-97594150.9 actor_loss=0.4559 critic_loss=127835357366.0444 entropy=15.7254 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 14680] reward=-102055713.9 actor_loss=0.4521 critic_loss=132426362060.8000 entropy=15.7048 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 14680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569617.4 mean_steps=12.8
|
|
[Episode 14690] reward=-105747532.3 actor_loss=0.3193 critic_loss=138098401462.0444 entropy=15.7110 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 14700] reward=-101369577.9 actor_loss=0.3428 critic_loss=129209288476.4444 entropy=15.7074 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 14700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592147.5 mean_steps=12.8
|
|
[Episode 14710] reward=-96175009.6 actor_loss=0.3085 critic_loss=124626322318.2222 entropy=15.6863 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 14720] reward=-103880217.4 actor_loss=0.3288 critic_loss=136094328422.4000 entropy=15.7020 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 14720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-430406.6 mean_steps=17.1
|
|
[Episode 14730] reward=-100344325.4 actor_loss=0.3594 critic_loss=134424815934.5778 entropy=15.7040 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 14740] reward=-96110510.0 actor_loss=0.4521 critic_loss=129239481093.6889 entropy=15.6917 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 14740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500873.1 mean_steps=14.4
|
|
[Episode 14750] reward=-91985358.7 actor_loss=0.3922 critic_loss=122603611204.2667 entropy=15.7064 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 14760] reward=-98746315.9 actor_loss=0.3913 critic_loss=136871930174.5778 entropy=15.6992 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 14760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573306.7 mean_steps=13.0
|
|
[Episode 14770] reward=-100590212.6 actor_loss=0.2819 critic_loss=128942218171.7333 entropy=15.7058 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 14780] reward=-102323703.1 actor_loss=0.2894 critic_loss=130374199796.6222 entropy=15.7131 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 14780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554717.8 mean_steps=12.8
|
|
[Episode 14790] reward=-94903478.6 actor_loss=0.2913 critic_loss=125707714924.0889 entropy=15.7061 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 14800] reward=-104697962.5 actor_loss=0.4338 critic_loss=137590998994.4889 entropy=15.7093 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 14800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559605.6 mean_steps=13.7
|
|
[Episode 14810] reward=-105285269.6 actor_loss=0.2871 critic_loss=136435362656.7111 entropy=15.7119 approx_kl=0.0022 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 14820] reward=-97448178.4 actor_loss=0.3860 critic_loss=131554706136.1778 entropy=15.7248 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 14820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-413026.9 mean_steps=14.8
|
|
[Episode 14830] reward=-102507232.1 actor_loss=0.3229 critic_loss=135914664027.0222 entropy=15.7136 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 14840] reward=-104168292.7 actor_loss=0.3671 critic_loss=138882878850.8445 entropy=15.7024 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 14840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453134.8 mean_steps=16.4
|
|
[Episode 14850] reward=-96038274.8 actor_loss=0.4143 critic_loss=125710458880.0000 entropy=15.7178 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 14860] reward=-102141804.9 actor_loss=0.3717 critic_loss=134540955921.0667 entropy=15.7415 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 14860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547486.8 mean_steps=14.7
|
|
[Episode 14870] reward=-103066828.0 actor_loss=0.3526 critic_loss=138565671867.7333 entropy=15.7491 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 14880] reward=-94303931.8 actor_loss=0.3743 critic_loss=122907148652.0889 entropy=15.7441 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 14880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402718.8 mean_steps=16.2
|
|
[Episode 14890] reward=-98213548.7 actor_loss=0.3132 critic_loss=127973817275.7333 entropy=15.7627 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 14900] reward=-87911170.3 actor_loss=0.3853 critic_loss=112568568672.7111 entropy=15.7512 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 14900] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-351438.8 mean_steps=17.1
|
|
[Episode 14910] reward=-91385970.7 actor_loss=0.3829 critic_loss=120148641200.3556 entropy=15.7656 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 14920] reward=-102101227.7 actor_loss=0.3592 critic_loss=130461473723.7333 entropy=15.7520 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 14920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577181.9 mean_steps=12.9
|
|
[Episode 14930] reward=-103140259.5 actor_loss=0.3951 critic_loss=136069739861.3333 entropy=15.7894 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 14940] reward=-104619961.5 actor_loss=0.3297 critic_loss=142778280072.5333 entropy=15.8191 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 14940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477029.3 mean_steps=15.0
|
|
[Episode 14950] reward=-97570922.7 actor_loss=0.3584 critic_loss=126996683707.7333 entropy=15.8203 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 14960] reward=-98175958.4 actor_loss=0.3107 critic_loss=130697795447.4667 entropy=15.8231 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 14960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430580.6 mean_steps=15.0
|
|
[Episode 14970] reward=-111621435.7 actor_loss=0.3036 critic_loss=149692724383.2889 entropy=15.8311 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 14980] reward=-89578430.6 actor_loss=0.4404 critic_loss=118847384052.6222 entropy=15.8429 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 14980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-645565.4 mean_steps=11.3
|
|
[Episode 14990] reward=-103249940.2 actor_loss=0.2667 critic_loss=138168125030.4000 entropy=15.8369 approx_kl=0.0025 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 15000] reward=-96238702.6 actor_loss=0.3771 critic_loss=126427887843.5556 entropy=15.8447 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 15000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-375532.7 mean_steps=17.2
|
|
[Episode 15010] reward=-99643252.0 actor_loss=0.3662 critic_loss=134503050262.7556 entropy=15.8399 approx_kl=0.0018 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 15020] reward=-97806208.0 actor_loss=0.3331 critic_loss=124899794761.9556 entropy=15.8511 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 15020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-348447.6 mean_steps=16.2
|
|
[Episode 15030] reward=-97792727.6 actor_loss=0.4026 critic_loss=131931838145.4222 entropy=15.8769 approx_kl=0.0031 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 15040] reward=-104141551.9 actor_loss=0.3333 critic_loss=138811134771.2000 entropy=15.8889 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 15040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424455.5 mean_steps=15.7
|
|
[Episode 15050] reward=-98840325.2 actor_loss=0.4300 critic_loss=133178213990.4000 entropy=15.9173 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 15060] reward=-96167916.8 actor_loss=0.3152 critic_loss=123246243748.9778 entropy=15.9249 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 15060] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-383435.5 mean_steps=18.2
|
|
[Episode 15070] reward=-100868618.7 actor_loss=0.3224 critic_loss=136663903072.7111 entropy=15.9143 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 15080] reward=-105591262.8 actor_loss=0.1977 critic_loss=139266452502.7556 entropy=15.9131 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Eval 15080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557687.8 mean_steps=12.4
|
|
[Episode 15090] reward=-102022421.7 actor_loss=0.2487 critic_loss=139240146716.4445 entropy=15.9053 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1204 front_blocked=0
|
|
[Episode 15100] reward=-101038255.9 actor_loss=0.3809 critic_loss=131002015561.9556 entropy=15.9039 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 15100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-397365.3 mean_steps=16.6
|
|
[Episode 15110] reward=-100123129.6 actor_loss=0.3472 critic_loss=132383195500.0889 entropy=15.9181 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 15120] reward=-97911237.0 actor_loss=0.3234 critic_loss=129472343608.8889 entropy=15.9420 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 15120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557264.3 mean_steps=13.3
|
|
[Episode 15130] reward=-95497308.5 actor_loss=0.3101 critic_loss=127429332172.8000 entropy=15.9416 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 15140] reward=-99732205.0 actor_loss=0.3906 critic_loss=130665653498.3111 entropy=15.9688 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 15140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-594496.6 mean_steps=14.9
|
|
[Episode 15150] reward=-92217681.0 actor_loss=0.4052 critic_loss=119215694552.1778 entropy=15.9568 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 15160] reward=-100439123.8 actor_loss=0.3345 critic_loss=131007512758.0444 entropy=15.9372 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 15160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534432.5 mean_steps=14.8
|
|
[Episode 15170] reward=-104271820.0 actor_loss=0.3807 critic_loss=139463661977.6000 entropy=15.9387 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 15180] reward=-98107086.7 actor_loss=0.3007 critic_loss=128167359101.1555 entropy=15.9307 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 15180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431992.3 mean_steps=16.1
|
|
[Episode 15190] reward=-99546346.1 actor_loss=0.3078 critic_loss=130508808009.9556 entropy=15.9329 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 15200] reward=-96047437.6 actor_loss=0.3519 critic_loss=129249752951.4667 entropy=15.9230 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 15200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541483.5 mean_steps=13.7
|
|
[Episode 15210] reward=-102403019.3 actor_loss=0.4074 critic_loss=138795595457.4222 entropy=15.9235 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 15220] reward=-100161679.9 actor_loss=0.3719 critic_loss=134965072509.1555 entropy=15.9329 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 15220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367865.7 mean_steps=16.4
|
|
[Episode 15230] reward=-99910685.9 actor_loss=0.2372 critic_loss=129416172703.2889 entropy=15.9217 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1198 front_blocked=0
|
|
[Episode 15240] reward=-105736719.3 actor_loss=0.3249 critic_loss=139982875124.6222 entropy=15.9114 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 15240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442644.1 mean_steps=15.9
|
|
[Episode 15250] reward=-106998514.4 actor_loss=0.3621 critic_loss=137824420932.2667 entropy=15.8793 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 15260] reward=-102178004.7 actor_loss=0.3601 critic_loss=137627613957.6889 entropy=15.8916 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 15260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-498219.2 mean_steps=13.3
|
|
[Episode 15270] reward=-106434662.3 actor_loss=0.3668 critic_loss=141740530619.7333 entropy=15.9044 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 15280] reward=-102373585.1 actor_loss=0.3479 critic_loss=131594072155.0222 entropy=15.9191 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 15280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-673935.2 mean_steps=12.4
|
|
[Episode 15290] reward=-98767624.5 actor_loss=0.4605 critic_loss=130590348629.3333 entropy=15.9287 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 15300] reward=-97540895.5 actor_loss=0.2958 critic_loss=127697983533.5111 entropy=15.9322 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 15300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-556264.1 mean_steps=13.1
|
|
[Episode 15310] reward=-97029936.3 actor_loss=0.4643 critic_loss=128071820174.2222 entropy=15.9325 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 15320] reward=-100997540.9 actor_loss=0.2835 critic_loss=135377196555.3778 entropy=15.9100 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 15320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517240.9 mean_steps=13.6
|
|
[Episode 15330] reward=-105359118.0 actor_loss=0.3828 critic_loss=136984810837.3333 entropy=15.9018 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 15340] reward=-88792149.4 actor_loss=0.3305 critic_loss=116061084421.6889 entropy=15.8861 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 15340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-379023.3 mean_steps=14.9
|
|
[Episode 15350] reward=-100591219.8 actor_loss=0.3672 critic_loss=134489355969.4222 entropy=15.9063 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 15360] reward=-106156171.6 actor_loss=0.2973 critic_loss=140991476531.2000 entropy=15.9074 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 15360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553331.1 mean_steps=14.6
|
|
[Episode 15370] reward=-93889039.2 actor_loss=0.3594 critic_loss=123429660444.4444 entropy=15.9066 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 15380] reward=-98811942.9 actor_loss=0.3061 critic_loss=133559505351.1111 entropy=15.9111 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 15380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476302.5 mean_steps=15.3
|
|
[Episode 15390] reward=-103739164.2 actor_loss=0.3455 critic_loss=139995352268.8000 entropy=15.9136 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 15400] reward=-109829634.0 actor_loss=0.3801 critic_loss=145848657510.4000 entropy=15.9314 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 15400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405060.5 mean_steps=16.5
|
|
[Episode 15410] reward=-102720561.8 actor_loss=0.4152 critic_loss=136068787768.8889 entropy=15.9399 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 15420] reward=-96236050.6 actor_loss=0.3130 critic_loss=130870814674.4889 entropy=15.9512 approx_kl=0.0021 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 15420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-464900.1 mean_steps=13.9
|
|
[Episode 15430] reward=-96667974.8 actor_loss=0.3512 critic_loss=126248869705.9556 entropy=15.9594 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 15440] reward=-107232642.2 actor_loss=0.3086 critic_loss=139419882473.2444 entropy=15.9825 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 15440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616861.7 mean_steps=13.4
|
|
[Episode 15450] reward=-106021917.4 actor_loss=0.2521 critic_loss=140696505184.7111 entropy=15.9773 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 15460] reward=-103855535.4 actor_loss=0.3713 critic_loss=139265666252.8000 entropy=15.9989 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 15460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448876.8 mean_steps=15.8
|
|
[Episode 15470] reward=-104126790.1 actor_loss=0.3033 critic_loss=138431058011.0222 entropy=15.9903 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 15480] reward=-95889193.6 actor_loss=0.4014 critic_loss=128788575027.2000 entropy=15.9699 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 15480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-314604.9 mean_steps=16.9
|
|
[Episode 15490] reward=-101096291.5 actor_loss=0.3201 critic_loss=132937878732.8000 entropy=15.9939 approx_kl=0.0017 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 15500] reward=-103731685.2 actor_loss=0.3211 critic_loss=136831671409.7778 entropy=15.9706 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 15500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506735.5 mean_steps=13.2
|
|
[Episode 15510] reward=-109399091.3 actor_loss=0.3611 critic_loss=147480620418.8445 entropy=15.9591 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 15520] reward=-103141859.6 actor_loss=0.3742 critic_loss=135392176992.7111 entropy=15.9736 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 15520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509978.5 mean_steps=14.9
|
|
[Episode 15530] reward=-104988214.7 actor_loss=0.3751 critic_loss=139187678776.8889 entropy=15.9952 approx_kl=0.0016 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 15540] reward=-102627958.7 actor_loss=0.3187 critic_loss=141853923555.5555 entropy=15.9950 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 15540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411080.8 mean_steps=15.6
|
|
[Episode 15550] reward=-95796165.2 actor_loss=0.4447 critic_loss=128894993476.2667 entropy=15.9923 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 15560] reward=-95570861.6 actor_loss=0.4863 critic_loss=139939903533.5111 entropy=15.9921 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 15560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540709.8 mean_steps=13.4
|
|
[Episode 15570] reward=-93926642.8 actor_loss=0.2950 critic_loss=125401108388.9778 entropy=16.0031 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 15580] reward=-97562738.3 actor_loss=0.4877 critic_loss=126033299592.5333 entropy=15.9934 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 15580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545106.2 mean_steps=14.8
|
|
[Episode 15590] reward=-98558267.7 actor_loss=0.4244 critic_loss=130547603137.4222 entropy=16.0036 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 15600] reward=-101754339.6 actor_loss=0.3265 critic_loss=133220370750.5778 entropy=16.0030 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 15600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-268641.1 mean_steps=17.8
|
|
[Episode 15610] reward=-100954776.7 actor_loss=0.3622 critic_loss=134762648371.2000 entropy=16.0291 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 15620] reward=-103196868.5 actor_loss=0.3910 critic_loss=138293306072.1778 entropy=16.0179 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 15620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423481.1 mean_steps=15.6
|
|
[Episode 15630] reward=-101575584.6 actor_loss=0.3763 critic_loss=134926445590.7556 entropy=16.0334 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 15640] reward=-95725510.5 actor_loss=0.4196 critic_loss=132065626248.5333 entropy=16.0404 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 15640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574836.4 mean_steps=14.1
|
|
[Episode 15650] reward=-106345689.2 actor_loss=0.3018 critic_loss=137572456493.5111 entropy=16.0425 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 15660] reward=-106605934.2 actor_loss=0.4244 critic_loss=139048508529.7778 entropy=16.0457 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 15660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470392.2 mean_steps=14.2
|
|
[Episode 15670] reward=-101143291.8 actor_loss=0.3487 critic_loss=130645247590.4000 entropy=16.0316 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 15680] reward=-99201541.8 actor_loss=0.2658 critic_loss=132898532192.7111 entropy=16.0179 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1217 front_blocked=0
|
|
[Eval 15680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443626.6 mean_steps=15.7
|
|
[Episode 15690] reward=-103015869.0 actor_loss=0.3320 critic_loss=137823415500.8000 entropy=16.0317 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 15700] reward=-97776098.2 actor_loss=0.3404 critic_loss=126372794276.9778 entropy=16.0502 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 15700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368643.8 mean_steps=16.6
|
|
[Episode 15710] reward=-99307943.2 actor_loss=0.3248 critic_loss=130569580452.9778 entropy=16.0501 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 15720] reward=-105327002.4 actor_loss=0.3576 critic_loss=140902227603.9111 entropy=16.0727 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 15720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455396.7 mean_steps=15.9
|
|
[Episode 15730] reward=-97132260.1 actor_loss=0.4257 critic_loss=128523535064.1778 entropy=16.0813 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 15740] reward=-97791740.9 actor_loss=0.3793 critic_loss=130475835574.0444 entropy=16.1068 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 15740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575405.2 mean_steps=13.8
|
|
[Episode 15750] reward=-105750108.1 actor_loss=0.3121 critic_loss=144586151981.5111 entropy=16.0976 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 15760] reward=-101099020.1 actor_loss=0.3130 critic_loss=133753932640.7111 entropy=16.0830 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 15760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400853.0 mean_steps=15.5
|
|
[Episode 15770] reward=-101522855.9 actor_loss=0.3346 critic_loss=132790222483.9111 entropy=16.0748 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 15780] reward=-102201897.5 actor_loss=0.2998 critic_loss=132562946912.7111 entropy=16.0670 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 15780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537031.5 mean_steps=13.7
|
|
[Episode 15790] reward=-104670034.2 actor_loss=0.3441 critic_loss=137380162400.7111 entropy=16.0575 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 15800] reward=-97782764.9 actor_loss=0.4459 critic_loss=134853157228.0889 entropy=16.0483 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 15800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413943.9 mean_steps=16.1
|
|
[Episode 15810] reward=-100406187.2 actor_loss=0.3324 critic_loss=133687425979.7333 entropy=16.0384 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 15820] reward=-105438212.8 actor_loss=0.3549 critic_loss=143009907962.3111 entropy=16.0480 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 15820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569917.9 mean_steps=12.9
|
|
[Episode 15830] reward=-100077067.8 actor_loss=0.3588 critic_loss=137114167068.4444 entropy=16.0449 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 15840] reward=-99100298.6 actor_loss=0.4485 critic_loss=132605539123.2000 entropy=16.0497 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 15840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519665.5 mean_steps=14.4
|
|
[Episode 15850] reward=-97098028.5 actor_loss=0.4478 critic_loss=128082807830.7556 entropy=16.0667 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 15860] reward=-102108483.0 actor_loss=0.4088 critic_loss=133667095074.1333 entropy=16.0827 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 15860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518304.4 mean_steps=14.1
|
|
[Episode 15870] reward=-107790071.6 actor_loss=0.3165 critic_loss=143892985719.4667 entropy=16.0910 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 15880] reward=-107287524.8 actor_loss=0.2588 critic_loss=141668660383.2889 entropy=16.1029 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 15880] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-325690.1 mean_steps=18.6
|
|
[Episode 15890] reward=-100765889.7 actor_loss=0.3617 critic_loss=136843924122.7907 entropy=16.1097 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 15900] reward=-106370588.3 actor_loss=0.2041 critic_loss=144053255281.7778 entropy=16.1269 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Eval 15900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522778.6 mean_steps=14.2
|
|
[Episode 15910] reward=-106774672.3 actor_loss=0.2740 critic_loss=140272357284.9778 entropy=16.1349 approx_kl=0.0030 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 15920] reward=-108554725.0 actor_loss=0.2826 critic_loss=145206655385.6000 entropy=16.1332 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 15920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561322.7 mean_steps=14.1
|
|
[Episode 15930] reward=-106807204.6 actor_loss=0.3455 critic_loss=142028265153.4222 entropy=16.1360 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 15940] reward=-105882467.4 actor_loss=0.3297 critic_loss=144253582995.9111 entropy=16.1130 approx_kl=0.0023 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 15940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564509.8 mean_steps=13.8
|
|
[Episode 15950] reward=-103244108.4 actor_loss=0.2351 critic_loss=138259917118.5778 entropy=16.1043 approx_kl=0.0020 kl_stop=0 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 15960] reward=-102329973.0 actor_loss=0.3229 critic_loss=134878582465.4222 entropy=16.1310 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 15960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552035.8 mean_steps=13.7
|
|
[Episode 15970] reward=-111099068.7 actor_loss=0.3927 critic_loss=152444586120.5333 entropy=16.1308 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 15980] reward=-101049512.8 actor_loss=0.3116 critic_loss=132574349084.4444 entropy=16.1291 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 15980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396334.7 mean_steps=16.6
|
|
[Episode 15990] reward=-107525763.9 actor_loss=0.3425 critic_loss=140074896588.8000 entropy=16.1378 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 16000] reward=-108720015.4 actor_loss=0.3867 critic_loss=145150142782.5778 entropy=16.1676 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 16000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-667997.4 mean_steps=11.6
|
|
[Episode 16010] reward=-102187228.8 actor_loss=0.3683 critic_loss=136423576917.3333 entropy=16.1761 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 16020] reward=-109460599.4 actor_loss=0.3020 critic_loss=146458762535.8222 entropy=16.1757 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 16020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-328092.6 mean_steps=17.1
|
|
[Episode 16030] reward=-110397662.2 actor_loss=0.2581 critic_loss=147897247698.4889 entropy=16.1929 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 16040] reward=-95206583.6 actor_loss=0.5018 critic_loss=130365716343.4667 entropy=16.2013 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 16040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490510.8 mean_steps=14.5
|
|
[Episode 16050] reward=-103797973.2 actor_loss=0.2870 critic_loss=136011194185.9556 entropy=16.2065 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 16060] reward=-108550544.5 actor_loss=0.2495 critic_loss=148401041863.1111 entropy=16.2215 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 16060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377981.1 mean_steps=16.8
|
|
[Episode 16070] reward=-104479601.9 actor_loss=0.3033 critic_loss=137681423200.7111 entropy=16.2156 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 16080] reward=-104602535.0 actor_loss=0.4231 critic_loss=141183436208.3556 entropy=16.2334 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 16080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528506.5 mean_steps=14.4
|
|
[Episode 16090] reward=-106733296.2 actor_loss=0.3236 critic_loss=143062717235.2000 entropy=16.2480 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 16100] reward=-106008622.7 actor_loss=0.3497 critic_loss=146195204778.6667 entropy=16.2592 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 16100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430750.4 mean_steps=15.0
|
|
[Episode 16110] reward=-103078391.7 actor_loss=0.3444 critic_loss=141047007732.6222 entropy=16.2577 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 16120] reward=-106198703.3 actor_loss=0.3829 critic_loss=142679926192.3556 entropy=16.2606 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 16120] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-705270.4 mean_steps=12.2
|
|
[Episode 16130] reward=-100442195.3 actor_loss=0.3081 critic_loss=132338670341.6889 entropy=16.2408 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 16140] reward=-111256509.8 actor_loss=0.4184 critic_loss=150108013636.2667 entropy=16.2408 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 16140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348924.5 mean_steps=17.4
|
|
[Episode 16150] reward=-117568647.4 actor_loss=0.3173 critic_loss=154471399059.9111 entropy=16.2498 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 16160] reward=-118045879.0 actor_loss=0.2295 critic_loss=160670317499.7333 entropy=16.2474 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 16160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366061.3 mean_steps=17.1
|
|
[Episode 16170] reward=-112511841.0 actor_loss=0.3450 critic_loss=155962143539.2000 entropy=16.2467 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 16180] reward=-103425610.3 actor_loss=0.3321 critic_loss=140010683505.7778 entropy=16.2301 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 16180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511274.9 mean_steps=14.0
|
|
[Episode 16190] reward=-104993314.4 actor_loss=0.4007 critic_loss=155287421656.1778 entropy=16.2358 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 16200] reward=-106571483.4 actor_loss=0.4112 critic_loss=145767887303.1111 entropy=16.2392 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 16200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485370.2 mean_steps=15.1
|
|
[Episode 16210] reward=-107439442.3 actor_loss=0.3441 critic_loss=145814773760.0000 entropy=16.2274 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 16220] reward=-103880179.2 actor_loss=0.3870 critic_loss=136063233956.9778 entropy=16.2003 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 16220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603609.1 mean_steps=12.8
|
|
[Episode 16230] reward=-118379943.5 actor_loss=0.2338 critic_loss=162650826706.4889 entropy=16.1966 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 16240] reward=-112285168.9 actor_loss=0.2778 critic_loss=150221876246.7556 entropy=16.1832 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 16240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550796.8 mean_steps=13.8
|
|
[Episode 16250] reward=-103591917.8 actor_loss=0.4308 critic_loss=138138734136.8889 entropy=16.2253 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 16260] reward=-111588033.4 actor_loss=0.3483 critic_loss=152829979306.6667 entropy=16.2180 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 16260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529800.6 mean_steps=13.4
|
|
[Episode 16270] reward=-116091928.3 actor_loss=0.3385 critic_loss=155227366286.2222 entropy=16.2312 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 16280] reward=-111253930.5 actor_loss=0.3208 critic_loss=150216364305.0667 entropy=16.2319 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 16280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-340782.7 mean_steps=16.1
|
|
[Episode 16290] reward=-100183927.9 actor_loss=0.3890 critic_loss=132608560696.8889 entropy=16.2138 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 16300] reward=-109737875.6 actor_loss=0.3993 critic_loss=148086959490.8445 entropy=16.2220 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 16300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-387039.3 mean_steps=16.2
|
|
[Episode 16310] reward=-100744616.9 actor_loss=0.3014 critic_loss=139700175576.1778 entropy=16.2325 approx_kl=0.0028 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 16320] reward=-118995479.5 actor_loss=0.4268 critic_loss=161789603566.9333 entropy=16.2474 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 16320] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-379957.9 mean_steps=17.4
|
|
[Episode 16330] reward=-102730616.1 actor_loss=0.3189 critic_loss=143311660100.2667 entropy=16.2440 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 16340] reward=-99131449.8 actor_loss=0.3628 critic_loss=135823562433.4222 entropy=16.2433 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 16340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515658.9 mean_steps=13.5
|
|
[Episode 16350] reward=-107937722.5 actor_loss=0.3973 critic_loss=144766047391.2889 entropy=16.2469 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 16360] reward=-111805057.8 actor_loss=0.3449 critic_loss=145685138636.8000 entropy=16.2584 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 16360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473401.9 mean_steps=15.1
|
|
[Episode 16370] reward=-105031374.1 actor_loss=0.3737 critic_loss=143595459197.1555 entropy=16.2475 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 16380] reward=-105135231.5 actor_loss=0.3479 critic_loss=139014633335.4667 entropy=16.2378 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 16380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409531.0 mean_steps=15.7
|
|
[Episode 16390] reward=-110518203.8 actor_loss=0.3263 critic_loss=145730989442.8445 entropy=16.2368 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 16400] reward=-107390072.5 actor_loss=0.3575 critic_loss=140360680334.2222 entropy=16.2210 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 16400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379982.8 mean_steps=16.7
|
|
[Episode 16410] reward=-105098120.6 actor_loss=0.3696 critic_loss=141243698016.7111 entropy=16.2293 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 16420] reward=-109279037.0 actor_loss=0.3175 critic_loss=149156047348.6222 entropy=16.2400 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 16420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499327.3 mean_steps=14.2
|
|
[Episode 16430] reward=-101177360.4 actor_loss=0.3667 critic_loss=136552918948.9778 entropy=16.2707 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 16440] reward=-102288217.2 actor_loss=0.4093 critic_loss=141142229174.0444 entropy=16.2869 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 16440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438007.6 mean_steps=15.8
|
|
[Episode 16450] reward=-114959247.0 actor_loss=0.3540 critic_loss=153524503256.1778 entropy=16.2821 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 16460] reward=-118657086.7 actor_loss=0.3414 critic_loss=164931653905.0667 entropy=16.2965 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 16460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526690.5 mean_steps=14.6
|
|
[Episode 16470] reward=-107797186.6 actor_loss=0.2364 critic_loss=146848402272.7111 entropy=16.3083 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 16480] reward=-104084074.6 actor_loss=0.3417 critic_loss=139781595318.0444 entropy=16.3046 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 16480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400513.6 mean_steps=16.4
|
|
[Episode 16490] reward=-106618492.6 actor_loss=0.3891 critic_loss=147662707097.6000 entropy=16.2990 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 16500] reward=-105451625.3 actor_loss=0.3816 critic_loss=143578682163.2000 entropy=16.2918 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 16500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-452800.3 mean_steps=17.1
|
|
[Episode 16510] reward=-108053571.1 actor_loss=0.4021 critic_loss=145204827659.3778 entropy=16.2721 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 16520] reward=-104485776.7 actor_loss=0.2519 critic_loss=139654887287.4667 entropy=16.2921 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 16520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450917.7 mean_steps=16.2
|
|
[Episode 16530] reward=-107164329.1 actor_loss=0.3798 critic_loss=144047258191.6444 entropy=16.2860 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 16540] reward=-107084198.4 actor_loss=0.3384 critic_loss=141587313823.2889 entropy=16.3004 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 16540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417802.4 mean_steps=15.7
|
|
[Episode 16550] reward=-106364382.2 actor_loss=0.2980 critic_loss=143909889092.2667 entropy=16.2849 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 16560] reward=-110404897.1 actor_loss=0.3358 critic_loss=149533438407.1111 entropy=16.2947 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 16560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431972.5 mean_steps=15.6
|
|
[Episode 16570] reward=-115627477.3 actor_loss=0.3931 critic_loss=154695163357.8667 entropy=16.2975 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 16580] reward=-107907527.2 actor_loss=0.3466 critic_loss=142719592948.6222 entropy=16.2798 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 16580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469134.7 mean_steps=14.3
|
|
[Episode 16590] reward=-112286015.3 actor_loss=0.3564 critic_loss=151716320233.2444 entropy=16.2712 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 16600] reward=-98578480.6 actor_loss=0.3882 critic_loss=133491110889.2444 entropy=16.2820 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 16600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495177.6 mean_steps=14.9
|
|
[Episode 16610] reward=-104138248.8 actor_loss=0.3457 critic_loss=136669566475.3778 entropy=16.2848 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 16620] reward=-110177621.9 actor_loss=0.3548 critic_loss=148676108105.9556 entropy=16.3042 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 16620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493527.5 mean_steps=14.8
|
|
[Episode 16630] reward=-107023499.1 actor_loss=0.2971 critic_loss=142181983300.2667 entropy=16.3285 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 16640] reward=-108636634.4 actor_loss=0.3473 critic_loss=142159327323.0222 entropy=16.3179 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 16640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509052.2 mean_steps=14.4
|
|
[Episode 16650] reward=-110004284.3 actor_loss=0.3222 critic_loss=142989436882.4889 entropy=16.3297 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 16660] reward=-112314414.8 actor_loss=0.2415 critic_loss=150192442936.8889 entropy=16.3383 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 16660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465975.6 mean_steps=15.2
|
|
[Episode 16670] reward=-99487854.5 actor_loss=0.2870 critic_loss=127149401338.3111 entropy=16.3631 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 16680] reward=-105290636.0 actor_loss=0.4104 critic_loss=146846818668.0889 entropy=16.3590 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 16680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501770.9 mean_steps=14.2
|
|
[Episode 16690] reward=-111831200.3 actor_loss=0.3638 critic_loss=150967314568.5333 entropy=16.3898 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 16700] reward=-103301450.4 actor_loss=0.3270 critic_loss=138504015003.1515 entropy=16.3923 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 16700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418473.5 mean_steps=15.7
|
|
[Episode 16710] reward=-109542009.4 actor_loss=0.2328 critic_loss=148700447994.3111 entropy=16.4070 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 16720] reward=-114054450.6 actor_loss=0.3358 critic_loss=153907415540.6222 entropy=16.4129 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 16720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479609.1 mean_steps=15.6
|
|
[Episode 16730] reward=-113546040.3 actor_loss=0.3276 critic_loss=152834390607.6444 entropy=16.4194 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 16740] reward=-101481154.2 actor_loss=0.3120 critic_loss=133957599232.0000 entropy=16.4433 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 16740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471328.0 mean_steps=15.2
|
|
[Episode 16750] reward=-112381651.6 actor_loss=0.3566 critic_loss=146484570066.4889 entropy=16.4350 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 16760] reward=-109468617.6 actor_loss=0.3140 critic_loss=145702465991.1111 entropy=16.4495 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 16760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-627800.9 mean_steps=12.2
|
|
[Episode 16770] reward=-107961851.5 actor_loss=0.3068 critic_loss=140310066153.2444 entropy=16.4529 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 16780] reward=-108804598.5 actor_loss=0.4202 critic_loss=142693968918.7556 entropy=16.4625 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 16780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507666.7 mean_steps=14.2
|
|
[Episode 16790] reward=-114005971.5 actor_loss=0.3188 critic_loss=155507322242.8445 entropy=16.4686 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 16800] reward=-109484859.5 actor_loss=0.3800 critic_loss=145625171376.3556 entropy=16.4663 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 16800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-646678.0 mean_steps=12.4
|
|
[Episode 16810] reward=-107668604.0 actor_loss=0.2788 critic_loss=144457585095.1111 entropy=16.4663 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 16820] reward=-104043198.1 actor_loss=0.4634 critic_loss=138607413930.6667 entropy=16.4817 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 16820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559475.1 mean_steps=13.6
|
|
[Episode 16830] reward=-108786984.3 actor_loss=0.4066 critic_loss=141781795726.2222 entropy=16.4816 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 16840] reward=-113240561.6 actor_loss=0.2984 critic_loss=155688884269.5111 entropy=16.4863 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 16840] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-304958.3 mean_steps=16.7
|
|
[Episode 16850] reward=-105284495.4 actor_loss=0.2881 critic_loss=138556897689.6000 entropy=16.5144 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 16860] reward=-108416716.7 actor_loss=0.2906 critic_loss=146791944283.0222 entropy=16.5297 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 16860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409004.4 mean_steps=15.3
|
|
[Episode 16870] reward=-113881753.4 actor_loss=0.2836 critic_loss=152167386316.8000 entropy=16.5098 approx_kl=0.0029 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 16880] reward=-114875665.9 actor_loss=0.3113 critic_loss=154398855805.1555 entropy=16.5127 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 16880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429026.8 mean_steps=15.7
|
|
[Episode 16890] reward=-110652894.6 actor_loss=0.3977 critic_loss=153455561204.6222 entropy=16.5220 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 16900] reward=-112809287.6 actor_loss=0.3313 critic_loss=156590152817.7778 entropy=16.5190 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 16900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603066.3 mean_steps=12.7
|
|
[Episode 16910] reward=-110973963.6 actor_loss=0.2004 critic_loss=151081386348.0889 entropy=16.5150 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 16920] reward=-108919802.1 actor_loss=0.2997 critic_loss=147073540278.0444 entropy=16.5226 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 16920] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-643285.6 mean_steps=11.3
|
|
[Episode 16930] reward=-108325711.2 actor_loss=0.4458 critic_loss=143869665644.0889 entropy=16.5129 approx_kl=0.0024 kl_stop=0 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 16940] reward=-105988197.9 actor_loss=0.3703 critic_loss=142239350784.0000 entropy=16.5188 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 16940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513200.4 mean_steps=13.6
|
|
[Episode 16950] reward=-101239845.2 actor_loss=0.4814 critic_loss=132235663223.4667 entropy=16.5049 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 16960] reward=-104794269.2 actor_loss=0.4486 critic_loss=138320848486.4000 entropy=16.5011 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 16960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592908.4 mean_steps=13.1
|
|
[Episode 16970] reward=-108779957.9 actor_loss=0.3081 critic_loss=145527335958.7556 entropy=16.5049 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 16980] reward=-109315560.0 actor_loss=0.3419 critic_loss=148591646401.4222 entropy=16.5127 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 16980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559467.6 mean_steps=13.7
|
|
[Episode 16990] reward=-111492661.9 actor_loss=0.4287 critic_loss=149169915312.3556 entropy=16.5294 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 17000] reward=-107830324.7 actor_loss=0.4383 critic_loss=151177994057.9556 entropy=16.5396 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 17000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453925.7 mean_steps=14.8
|
|
[Episode 17010] reward=-116937322.7 actor_loss=0.2146 critic_loss=163570340841.2444 entropy=16.5227 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 17020] reward=-110383595.4 actor_loss=0.3997 critic_loss=147382905878.7556 entropy=16.5292 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 17020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-634153.1 mean_steps=12.6
|
|
[Episode 17030] reward=-103926941.7 actor_loss=0.3984 critic_loss=135458863877.6889 entropy=16.5248 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 17040] reward=-103870763.6 actor_loss=0.3639 critic_loss=139197849600.0000 entropy=16.5335 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 17040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540087.3 mean_steps=13.7
|
|
[Episode 17050] reward=-107627150.1 actor_loss=0.2640 critic_loss=143439215729.7778 entropy=16.5425 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 17060] reward=-106502606.0 actor_loss=0.2916 critic_loss=140471699046.4000 entropy=16.5555 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 17060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366324.4 mean_steps=16.5
|
|
[Episode 17070] reward=-116267810.2 actor_loss=0.3394 critic_loss=157636987380.6222 entropy=16.5614 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 17080] reward=-111974652.5 actor_loss=0.3289 critic_loss=148462186587.0222 entropy=16.5432 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 17080] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-655510.5 mean_steps=10.2
|
|
[Episode 17090] reward=-109951523.9 actor_loss=0.3093 critic_loss=148805181804.0889 entropy=16.5325 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 17100] reward=-106132651.1 actor_loss=0.3057 critic_loss=137209013680.3556 entropy=16.5181 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 17100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544119.0 mean_steps=14.6
|
|
[Episode 17110] reward=-114747840.7 actor_loss=0.3293 critic_loss=154784755803.0222 entropy=16.5304 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 17120] reward=-107202073.8 actor_loss=0.2878 critic_loss=140158501774.2222 entropy=16.5398 approx_kl=0.0037 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 17120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-419942.7 mean_steps=13.5
|
|
[Episode 17130] reward=-114844427.5 actor_loss=0.2664 critic_loss=153105258541.5111 entropy=16.5415 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 17140] reward=-111573472.4 actor_loss=0.3238 critic_loss=150098860805.6889 entropy=16.5307 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 17140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-571343.6 mean_steps=14.8
|
|
[Episode 17150] reward=-107743937.1 actor_loss=0.2967 critic_loss=145573551763.9111 entropy=16.5380 approx_kl=0.0033 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 17160] reward=-111874620.7 actor_loss=0.4285 critic_loss=155569381922.1333 entropy=16.5422 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 17160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509500.3 mean_steps=13.0
|
|
[Episode 17170] reward=-109484151.5 actor_loss=0.2839 critic_loss=148964152843.3778 entropy=16.5361 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 17180] reward=-105912046.6 actor_loss=0.2428 critic_loss=138905084450.1333 entropy=16.5180 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 17180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523354.7 mean_steps=14.4
|
|
[Episode 17190] reward=-105607361.1 actor_loss=0.3447 critic_loss=142702553875.6923 entropy=16.5014 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 17200] reward=-114301764.8 actor_loss=0.3787 critic_loss=148261650067.9111 entropy=16.5092 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 17200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495790.5 mean_steps=14.2
|
|
[Episode 17210] reward=-109186892.5 actor_loss=0.3720 critic_loss=147535251228.4445 entropy=16.5099 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 17220] reward=-105404692.1 actor_loss=0.3325 critic_loss=143452378089.2444 entropy=16.4896 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 17220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-384940.3 mean_steps=16.3
|
|
[Episode 17230] reward=-104941721.9 actor_loss=0.3232 critic_loss=136630221937.7778 entropy=16.5065 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 17240] reward=-103985855.7 actor_loss=0.3792 critic_loss=140379625517.5111 entropy=16.5055 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 17240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451834.8 mean_steps=15.9
|
|
[Episode 17250] reward=-109172259.9 actor_loss=0.2797 critic_loss=143838060726.0444 entropy=16.4992 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 17260] reward=-113037242.4 actor_loss=0.3649 critic_loss=149800405128.5333 entropy=16.4929 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 17260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-406663.2 mean_steps=14.5
|
|
[Episode 17270] reward=-109888594.4 actor_loss=0.4240 critic_loss=142479026858.6667 entropy=16.5234 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 17280] reward=-110144921.4 actor_loss=0.2097 critic_loss=148068949833.9556 entropy=16.5277 approx_kl=0.0027 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 17280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-365975.5 mean_steps=17.2
|
|
[Episode 17290] reward=-115689212.6 actor_loss=0.3632 critic_loss=157317387969.4222 entropy=16.5246 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 17300] reward=-106875886.8 actor_loss=0.2595 critic_loss=142336137443.5555 entropy=16.5198 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 17300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-613269.2 mean_steps=12.2
|
|
[Episode 17310] reward=-110658295.7 actor_loss=0.3437 critic_loss=152631345880.1778 entropy=16.5083 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 17320] reward=-109342223.3 actor_loss=0.3198 critic_loss=143008065126.4000 entropy=16.5516 approx_kl=0.0036 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 17320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463721.6 mean_steps=14.2
|
|
[Episode 17330] reward=-113899943.7 actor_loss=0.2668 critic_loss=154840643811.5555 entropy=16.5427 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 17340] reward=-113062364.6 actor_loss=0.3930 critic_loss=148210097993.9556 entropy=16.5786 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 17340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532097.1 mean_steps=13.3
|
|
[Episode 17350] reward=-109510080.9 actor_loss=0.2833 critic_loss=145448337408.0000 entropy=16.5799 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 17360] reward=-107831265.1 actor_loss=0.4732 critic_loss=143678613640.5333 entropy=16.5755 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 17360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402906.6 mean_steps=15.8
|
|
[Episode 17370] reward=-113512548.2 actor_loss=0.2299 critic_loss=148175336607.2889 entropy=16.5888 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 17380] reward=-110733225.8 actor_loss=0.2968 critic_loss=147523728849.4546 entropy=16.6042 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 17380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578370.2 mean_steps=13.4
|
|
[Episode 17390] reward=-113668035.6 actor_loss=0.2989 critic_loss=149322588394.0571 entropy=16.5986 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 17400] reward=-111415476.6 actor_loss=0.3303 critic_loss=151245294796.8000 entropy=16.6044 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 17400] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-339914.7 mean_steps=18.2
|
|
[Episode 17410] reward=-118363123.0 actor_loss=0.2561 critic_loss=159050646764.3077 entropy=16.6045 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 17420] reward=-114296453.6 actor_loss=0.3718 critic_loss=152640759398.4000 entropy=16.6057 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 17420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410952.9 mean_steps=17.4
|
|
[Episode 17430] reward=-115546047.7 actor_loss=0.3257 critic_loss=158153614950.4000 entropy=16.6161 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 17440] reward=-107831903.0 actor_loss=0.2914 critic_loss=154162322909.8667 entropy=16.6003 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 17440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-508676.6 mean_steps=15.2
|
|
[Episode 17450] reward=-116787956.8 actor_loss=0.2662 critic_loss=156014927127.2727 entropy=16.6218 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 17460] reward=-112048630.1 actor_loss=0.3443 critic_loss=147439968477.4054 entropy=16.6171 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 17460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427502.2 mean_steps=15.4
|
|
[Episode 17470] reward=-111586944.3 actor_loss=0.3042 critic_loss=153040483487.2889 entropy=16.6235 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 17480] reward=-107498234.0 actor_loss=0.2763 critic_loss=144091190067.2000 entropy=16.6174 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 17480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402068.2 mean_steps=15.6
|
|
[Episode 17490] reward=-110648967.9 actor_loss=0.2708 critic_loss=145242703462.4000 entropy=16.6165 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 17500] reward=-110901796.7 actor_loss=0.3370 critic_loss=145752615230.5778 entropy=16.6034 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 17500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-507145.3 mean_steps=12.0
|
|
[Episode 17510] reward=-113546109.3 actor_loss=0.2792 critic_loss=150857360998.4000 entropy=16.6121 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 17520] reward=-108125502.9 actor_loss=0.4005 critic_loss=142338524457.2903 entropy=16.6318 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 17520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-542649.9 mean_steps=12.8
|
|
[Episode 17530] reward=-115859667.8 actor_loss=0.3030 critic_loss=157650335243.3778 entropy=16.6108 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 17540] reward=-106898951.7 actor_loss=0.3367 critic_loss=145681963235.5555 entropy=16.6058 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 17540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461931.3 mean_steps=14.2
|
|
[Episode 17550] reward=-106177988.8 actor_loss=0.3867 critic_loss=145674798148.2667 entropy=16.6025 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 17560] reward=-103847102.5 actor_loss=0.3480 critic_loss=136501454074.3111 entropy=16.5822 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 17560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-604030.2 mean_steps=12.8
|
|
[Episode 17570] reward=-110858815.3 actor_loss=0.3186 critic_loss=151501343948.8000 entropy=16.5996 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 17580] reward=-112159431.3 actor_loss=0.3149 critic_loss=148901535379.9111 entropy=16.5706 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 17580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-527571.7 mean_steps=12.8
|
|
[Episode 17590] reward=-113015467.6 actor_loss=0.3326 critic_loss=152403976972.1905 entropy=16.5552 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 17600] reward=-111172882.5 actor_loss=0.2747 critic_loss=152534133418.6667 entropy=16.5497 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 17600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562852.0 mean_steps=12.8
|
|
[Episode 17610] reward=-107792479.6 actor_loss=0.2385 critic_loss=142617239552.0000 entropy=16.5372 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 17620] reward=-112888188.2 actor_loss=0.3067 critic_loss=151186574358.7556 entropy=16.5455 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 17620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573676.4 mean_steps=13.7
|
|
[Episode 17630] reward=-113531436.5 actor_loss=0.3288 critic_loss=149570426925.5111 entropy=16.5406 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 17640] reward=-102249461.2 actor_loss=0.3933 critic_loss=129364538709.3333 entropy=16.5287 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 17640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521381.0 mean_steps=14.1
|
|
[Episode 17650] reward=-108921353.1 actor_loss=0.3903 critic_loss=143260294257.7778 entropy=16.5530 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 17660] reward=-109569277.8 actor_loss=0.4810 critic_loss=146351393450.6667 entropy=16.5739 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 17660] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-693582.1 mean_steps=10.4
|
|
[Episode 17670] reward=-104277852.3 actor_loss=0.4647 critic_loss=136787349595.0222 entropy=16.5612 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 17680] reward=-106223239.6 actor_loss=0.4141 critic_loss=140799346460.4445 entropy=16.5820 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 17680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572631.6 mean_steps=13.0
|
|
[Episode 17690] reward=-105627526.3 actor_loss=0.1769 critic_loss=137527818831.6444 entropy=16.5809 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 17700] reward=-111601413.8 actor_loss=0.3794 critic_loss=148703197775.6444 entropy=16.5928 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 17700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630889.5 mean_steps=12.7
|
|
[Episode 17710] reward=-104149720.6 actor_loss=0.3932 critic_loss=144380889411.3684 entropy=16.6039 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 17720] reward=-106566802.3 actor_loss=0.3078 critic_loss=139873021314.8445 entropy=16.5964 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 17720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458846.3 mean_steps=14.8
|
|
[Episode 17730] reward=-107020052.5 actor_loss=0.3365 critic_loss=142880861817.9048 entropy=16.5970 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 17740] reward=-112253941.3 actor_loss=0.3330 critic_loss=151007559680.0000 entropy=16.5960 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 17740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-466032.4 mean_steps=15.8
|
|
[Episode 17750] reward=-113804054.7 actor_loss=0.2869 critic_loss=150253117622.0444 entropy=16.5984 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 17760] reward=-118890129.5 actor_loss=0.3037 critic_loss=157521383697.0667 entropy=16.6104 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 17760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-460616.7 mean_steps=16.2
|
|
[Episode 17770] reward=-115472911.9 actor_loss=0.3181 critic_loss=155112524334.5454 entropy=16.6440 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 17780] reward=-107089676.0 actor_loss=0.3707 critic_loss=142467341243.7333 entropy=16.6425 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 17780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539673.8 mean_steps=13.4
|
|
[Episode 17790] reward=-108919578.9 actor_loss=0.3354 critic_loss=152571500071.3846 entropy=16.6343 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 17800] reward=-106847572.1 actor_loss=0.3853 critic_loss=142409514643.9111 entropy=16.6367 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 17800] success_rate=0.300 qp_infeasible_rate=0.650 mean_return=-527627.8 mean_steps=173.4
|
|
[Episode 17810] reward=-104881707.4 actor_loss=0.2840 critic_loss=138686452829.0909 entropy=16.6533 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 17820] reward=-118912125.2 actor_loss=0.3285 critic_loss=161381389289.2444 entropy=16.6409 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 17820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424038.1 mean_steps=14.6
|
|
[Episode 17830] reward=-116964367.6 actor_loss=0.2775 critic_loss=153999286454.0444 entropy=16.6445 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 17840] reward=-115867450.7 actor_loss=0.2380 critic_loss=152944248149.3333 entropy=16.6429 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 17840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570284.1 mean_steps=12.8
|
|
[Episode 17850] reward=-115075941.6 actor_loss=0.2895 critic_loss=158270131299.0968 entropy=16.6598 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 17860] reward=-113548551.9 actor_loss=0.2947 critic_loss=151410205218.1333 entropy=16.6590 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 17860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510067.9 mean_steps=13.2
|
|
[Episode 17870] reward=-107969586.0 actor_loss=0.3934 critic_loss=143842560500.6222 entropy=16.6576 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 17880] reward=-108740219.1 actor_loss=0.3215 critic_loss=142463181346.1333 entropy=16.6573 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 17880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471786.9 mean_steps=13.8
|
|
[Episode 17890] reward=-111244627.2 actor_loss=0.2963 critic_loss=149017273344.0000 entropy=16.6547 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 17900] reward=-107299210.2 actor_loss=0.4158 critic_loss=138934687425.4222 entropy=16.6448 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 17900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515728.3 mean_steps=14.2
|
|
[Episode 17910] reward=-111825800.9 actor_loss=0.2669 critic_loss=151574069760.0000 entropy=16.6314 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 17920] reward=-111072938.7 actor_loss=0.4139 critic_loss=149193184597.3333 entropy=16.6270 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 17920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-495002.5 mean_steps=13.4
|
|
[Episode 17930] reward=-111990991.2 actor_loss=0.2604 critic_loss=150006355649.4222 entropy=16.6351 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 17940] reward=-115388939.7 actor_loss=0.2772 critic_loss=151932335991.4667 entropy=16.6409 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 17940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431187.6 mean_steps=15.7
|
|
[Episode 17950] reward=-107388833.6 actor_loss=0.4325 critic_loss=139762995655.1111 entropy=16.6360 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 17960] reward=-110658143.8 actor_loss=0.3974 critic_loss=152874050446.2222 entropy=16.6343 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 17960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489361.2 mean_steps=14.1
|
|
[Episode 17970] reward=-113114637.6 actor_loss=0.2472 critic_loss=150802986507.3778 entropy=16.6360 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 17980] reward=-107637429.0 actor_loss=0.3461 critic_loss=142269676657.7778 entropy=16.6548 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 17980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501967.3 mean_steps=13.3
|
|
[Episode 17990] reward=-111666608.9 actor_loss=0.3700 critic_loss=152254924208.3556 entropy=16.6636 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 18000] reward=-106205243.8 actor_loss=0.4056 critic_loss=140593898382.2222 entropy=16.6862 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 18000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562475.1 mean_steps=12.9
|
|
[Episode 18010] reward=-116845689.4 actor_loss=0.2940 critic_loss=151661475521.4222 entropy=16.6984 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 18020] reward=-111718220.1 actor_loss=0.2826 critic_loss=145366856863.2889 entropy=16.7095 approx_kl=0.0039 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 18020] success_rate=0.200 qp_infeasible_rate=0.750 mean_return=-585152.1 mean_steps=171.6
|
|
[Episode 18030] reward=-111110543.5 actor_loss=0.3311 critic_loss=146142021586.4889 entropy=16.7155 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 18040] reward=-112080413.0 actor_loss=0.2793 critic_loss=147831867528.5333 entropy=16.7318 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 18040] success_rate=0.250 qp_infeasible_rate=0.700 mean_return=-533432.0 mean_steps=172.2
|
|
[Episode 18050] reward=-110505459.7 actor_loss=0.2450 critic_loss=149384668228.2667 entropy=16.7293 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 18060] reward=-113005098.4 actor_loss=0.2610 critic_loss=152048327856.5517 entropy=16.7162 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 18060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463923.7 mean_steps=15.3
|
|
[Episode 18070] reward=-110699715.7 actor_loss=0.4170 critic_loss=147347741857.6842 entropy=16.7016 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 18080] reward=-103609988.9 actor_loss=0.3387 critic_loss=139817788302.2222 entropy=16.6931 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 18080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535849.1 mean_steps=13.9
|
|
[Episode 18090] reward=-103514519.2 actor_loss=0.3129 critic_loss=146477911972.9778 entropy=16.6775 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 18100] reward=-112775194.4 actor_loss=0.3639 critic_loss=154187487277.5111 entropy=16.6796 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 18100] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-651005.0 mean_steps=10.3
|
|
[Episode 18110] reward=-104581861.9 actor_loss=0.4125 critic_loss=136818051208.5333 entropy=16.6748 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 18120] reward=-110104444.4 actor_loss=0.3409 critic_loss=144889611825.5484 entropy=16.6797 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 18120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-543305.0 mean_steps=12.6
|
|
[Episode 18130] reward=-106781667.8 actor_loss=0.3354 critic_loss=142650150638.9333 entropy=16.6806 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 18140] reward=-113406731.7 actor_loss=0.3094 critic_loss=151670902620.1600 entropy=16.6838 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 18140] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-709464.9 mean_steps=10.8
|
|
[Episode 18150] reward=-108247836.5 actor_loss=0.3201 critic_loss=142305569996.8000 entropy=16.6894 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18160] reward=-112774079.2 actor_loss=0.2798 critic_loss=152727343377.0667 entropy=16.6921 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 18160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-542421.4 mean_steps=12.2
|
|
[Episode 18170] reward=-108862690.1 actor_loss=0.3489 critic_loss=142049513745.0667 entropy=16.7080 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 18180] reward=-117861575.3 actor_loss=0.2822 critic_loss=159774057722.3111 entropy=16.7129 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 18180] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-285245.7 mean_steps=18.0
|
|
[Episode 18190] reward=-113186693.7 actor_loss=0.3834 critic_loss=156012997290.6667 entropy=16.7211 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 18200] reward=-112000955.4 actor_loss=0.3057 critic_loss=153302998493.8667 entropy=16.7315 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 18200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-414298.5 mean_steps=14.7
|
|
[Episode 18210] reward=-111412182.7 actor_loss=0.3086 critic_loss=146666467419.0222 entropy=16.7370 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 18220] reward=-108136861.4 actor_loss=0.2916 critic_loss=145967376702.5778 entropy=16.7339 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 18220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405602.8 mean_steps=15.3
|
|
[Episode 18230] reward=-110513367.8 actor_loss=0.3632 critic_loss=149364397033.2444 entropy=16.7306 approx_kl=0.0044 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 18240] reward=-111094926.2 actor_loss=0.3610 critic_loss=153549540631.2727 entropy=16.7163 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 18240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540357.4 mean_steps=13.3
|
|
[Episode 18250] reward=-104942204.3 actor_loss=0.3623 critic_loss=141487530530.1333 entropy=16.7061 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 18260] reward=-106668350.3 actor_loss=0.3073 critic_loss=142881876587.1628 entropy=16.7092 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 18260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524542.0 mean_steps=14.1
|
|
[Episode 18270] reward=-108688303.2 actor_loss=0.3652 critic_loss=142847260717.5111 entropy=16.7231 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18280] reward=-116834231.3 actor_loss=0.3160 critic_loss=157714670478.2222 entropy=16.7292 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 18280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563420.7 mean_steps=14.0
|
|
[Episode 18290] reward=-113769472.9 actor_loss=0.3654 critic_loss=151499887957.3333 entropy=16.7379 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 18300] reward=-108468398.1 actor_loss=0.3691 critic_loss=145868505816.1778 entropy=16.7696 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 18300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481546.3 mean_steps=14.3
|
|
[Episode 18310] reward=-104661745.8 actor_loss=0.3670 critic_loss=141348583424.0000 entropy=16.7694 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18320] reward=-113695201.7 actor_loss=0.3146 critic_loss=150376193024.0000 entropy=16.7658 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 18320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516613.4 mean_steps=15.6
|
|
[Episode 18330] reward=-115860695.8 actor_loss=0.3040 critic_loss=160421419417.6000 entropy=16.7687 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 18340] reward=-116598798.3 actor_loss=0.2990 critic_loss=156390055073.6842 entropy=16.7580 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 18340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536672.1 mean_steps=14.1
|
|
[Episode 18350] reward=-111471678.6 actor_loss=0.4088 critic_loss=153282865470.5778 entropy=16.7539 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 18360] reward=-112867396.6 actor_loss=0.2946 critic_loss=150868210119.1111 entropy=16.7590 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 18360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402632.6 mean_steps=16.4
|
|
[Episode 18370] reward=-115247312.8 actor_loss=0.3887 critic_loss=151066419681.8824 entropy=16.7758 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 18380] reward=-107540002.2 actor_loss=0.3043 critic_loss=139365726344.5333 entropy=16.7755 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 18380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-495804.1 mean_steps=13.4
|
|
[Episode 18390] reward=-103875561.9 actor_loss=0.4115 critic_loss=145482031650.1333 entropy=16.7845 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 18400] reward=-108488930.9 actor_loss=0.3091 critic_loss=144640995783.1111 entropy=16.7855 approx_kl=0.0035 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 18400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436248.2 mean_steps=16.4
|
|
[Episode 18410] reward=-109935139.0 actor_loss=0.4450 critic_loss=147241428036.2667 entropy=16.8278 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 18420] reward=-105934246.4 actor_loss=0.3207 critic_loss=147134063372.1905 entropy=16.8252 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 18420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-440791.7 mean_steps=17.1
|
|
[Episode 18430] reward=-115634199.1 actor_loss=0.2521 critic_loss=160302488234.6667 entropy=16.8356 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 18440] reward=-114911620.2 actor_loss=0.2650 critic_loss=156233011837.1555 entropy=16.8330 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 18440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-599274.0 mean_steps=13.8
|
|
[Episode 18450] reward=-116521263.5 actor_loss=0.3031 critic_loss=155136166496.8649 entropy=16.8339 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 18460] reward=-109749435.4 actor_loss=0.3193 critic_loss=142921811922.4889 entropy=16.8210 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 18460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462369.6 mean_steps=14.2
|
|
[Episode 18470] reward=-114697055.7 actor_loss=0.3137 critic_loss=158968148878.2222 entropy=16.8111 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18480] reward=-113126245.3 actor_loss=0.2914 critic_loss=149227197053.1555 entropy=16.8095 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 18480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434074.9 mean_steps=14.8
|
|
[Episode 18490] reward=-115195297.6 actor_loss=0.3188 critic_loss=156432237636.2667 entropy=16.8121 approx_kl=0.0038 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 18500] reward=-106817334.4 actor_loss=0.3052 critic_loss=142968191021.5111 entropy=16.7881 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 18500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463199.2 mean_steps=16.2
|
|
[Episode 18510] reward=-113350674.1 actor_loss=0.3407 critic_loss=152398692898.1333 entropy=16.7982 approx_kl=0.0032 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 18520] reward=-111293938.0 actor_loss=0.3209 critic_loss=147417685168.5517 entropy=16.8060 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 18520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433447.8 mean_steps=16.1
|
|
[Episode 18530] reward=-118734859.5 actor_loss=0.3355 critic_loss=161605847904.7111 entropy=16.7944 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 18540] reward=-116793772.6 actor_loss=0.2797 critic_loss=158348256506.3111 entropy=16.7874 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 18540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481441.7 mean_steps=14.1
|
|
[Episode 18550] reward=-117042306.5 actor_loss=0.2868 critic_loss=153395868467.2000 entropy=16.7957 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 18560] reward=-109783412.2 actor_loss=0.3048 critic_loss=150295689443.5555 entropy=16.8086 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 18560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510290.4 mean_steps=15.4
|
|
[Episode 18570] reward=-102061694.1 actor_loss=0.3339 critic_loss=138712900364.1905 entropy=16.8285 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 18580] reward=-114398518.7 actor_loss=0.2805 critic_loss=152862845246.5778 entropy=16.8314 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 18580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424937.4 mean_steps=14.4
|
|
[Episode 18590] reward=-114206855.3 actor_loss=0.3693 critic_loss=154151887394.1333 entropy=16.8429 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 18600] reward=-109234448.4 actor_loss=0.3301 critic_loss=148478572407.4667 entropy=16.8477 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 18600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426277.2 mean_steps=15.2
|
|
[Episode 18610] reward=-113531339.8 actor_loss=0.3545 critic_loss=200326496984.1778 entropy=16.8566 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 18620] reward=-115434192.9 actor_loss=0.3591 critic_loss=170802590641.2308 entropy=16.8738 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 18620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-387431.0 mean_steps=17.2
|
|
[Episode 18630] reward=-114682859.3 actor_loss=0.2981 critic_loss=161781912274.8235 entropy=16.8677 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 18640] reward=-111400368.2 actor_loss=0.3769 critic_loss=149883167630.2222 entropy=16.8559 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 18640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443116.2 mean_steps=14.7
|
|
[Episode 18650] reward=-111793381.1 actor_loss=0.3351 critic_loss=153651215291.7333 entropy=16.8471 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18660] reward=-113627788.6 actor_loss=0.2727 critic_loss=150801819602.4889 entropy=16.8597 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 18660] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-258482.8 mean_steps=18.0
|
|
[Episode 18670] reward=-112976160.6 actor_loss=0.3082 critic_loss=155530943237.6889 entropy=16.8618 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 18680] reward=-113020345.4 actor_loss=0.3036 critic_loss=155205278105.6000 entropy=16.8703 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 18680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528077.6 mean_steps=14.5
|
|
[Episode 18690] reward=-115776241.0 actor_loss=0.2562 critic_loss=155879532134.4000 entropy=16.8607 approx_kl=0.0042 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 18700] reward=-115296131.2 actor_loss=0.3454 critic_loss=154422786275.5555 entropy=16.8516 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 18700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445290.7 mean_steps=15.8
|
|
[Episode 18710] reward=-109888179.4 actor_loss=0.4826 critic_loss=145580872317.1555 entropy=16.8290 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 18720] reward=-117672343.6 actor_loss=0.2447 critic_loss=155757233617.4546 entropy=16.8008 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 18720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-339785.2 mean_steps=16.1
|
|
[Episode 18730] reward=-104729363.7 actor_loss=0.4162 critic_loss=141395880800.7111 entropy=16.7806 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 18740] reward=-106113191.6 actor_loss=0.2830 critic_loss=143692835986.2857 entropy=16.7997 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 18740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409442.0 mean_steps=16.5
|
|
[Episode 18750] reward=-109481339.8 actor_loss=0.2911 critic_loss=140362963535.6444 entropy=16.7729 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 18760] reward=-113750378.6 actor_loss=0.5332 critic_loss=155358628704.7111 entropy=16.7991 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1536 front_blocked=0
|
|
[Eval 18760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584266.0 mean_steps=13.1
|
|
[Episode 18770] reward=-111046930.4 actor_loss=0.3536 critic_loss=146401290740.6222 entropy=16.8052 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 18780] reward=-114273982.5 actor_loss=0.2223 critic_loss=156783491299.5555 entropy=16.8304 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 18780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-629741.2 mean_steps=13.4
|
|
[Episode 18790] reward=-114423959.8 actor_loss=0.2981 critic_loss=152301536560.4324 entropy=16.8269 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 18800] reward=-110825287.4 actor_loss=0.3468 critic_loss=145654472400.5926 entropy=16.8343 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 18800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563286.6 mean_steps=13.4
|
|
[Episode 18810] reward=-117345312.9 actor_loss=0.2693 critic_loss=162705581670.4000 entropy=16.8541 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 18820] reward=-113036433.8 actor_loss=0.2775 critic_loss=149634672230.4000 entropy=16.8360 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 18820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573677.5 mean_steps=13.8
|
|
[Episode 18830] reward=-109756601.7 actor_loss=0.2878 critic_loss=147818137372.4445 entropy=16.8410 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 18840] reward=-111327481.5 actor_loss=0.3593 critic_loss=142899461597.8667 entropy=16.8257 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 18840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591190.3 mean_steps=13.2
|
|
[Episode 18850] reward=-110472464.2 actor_loss=0.3280 critic_loss=147153481090.8445 entropy=16.8213 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 18860] reward=-112889425.5 actor_loss=0.2497 critic_loss=147848078222.2222 entropy=16.8230 approx_kl=0.0026 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 18860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553850.7 mean_steps=13.6
|
|
[Episode 18870] reward=-117246323.2 actor_loss=0.2613 critic_loss=155615503974.4000 entropy=16.8226 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 18880] reward=-114179906.7 actor_loss=0.3058 critic_loss=146583985629.8667 entropy=16.8295 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 18880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-625692.9 mean_steps=12.8
|
|
[Episode 18890] reward=-110939314.0 actor_loss=0.2609 critic_loss=142340651053.5111 entropy=16.8485 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 18900] reward=-116489096.7 actor_loss=0.2762 critic_loss=158008015803.7333 entropy=16.8440 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 18900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535765.2 mean_steps=13.8
|
|
[Episode 18910] reward=-111313703.9 actor_loss=0.3309 critic_loss=143161456321.4222 entropy=16.8554 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 18920] reward=-113489418.5 actor_loss=0.3784 critic_loss=149939626894.2222 entropy=16.8464 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 18920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564000.7 mean_steps=13.9
|
|
[Episode 18930] reward=-111130180.5 actor_loss=0.3283 critic_loss=139828154459.0222 entropy=16.8202 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 18940] reward=-117697266.3 actor_loss=0.2729 critic_loss=153423118336.0000 entropy=16.8045 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 18940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505090.0 mean_steps=14.3
|
|
[Episode 18950] reward=-108971101.1 actor_loss=0.2652 critic_loss=144093308154.3111 entropy=16.8232 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 18960] reward=-114526276.3 actor_loss=0.3432 critic_loss=149023923833.9048 entropy=16.8235 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 18960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-424464.1 mean_steps=13.8
|
|
[Episode 18970] reward=-115396148.2 actor_loss=0.2791 critic_loss=153217090992.3556 entropy=16.8292 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 18980] reward=-109455272.5 actor_loss=0.3105 critic_loss=140674674688.0000 entropy=16.8237 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 18980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528841.4 mean_steps=13.2
|
|
[Episode 18990] reward=-115041769.8 actor_loss=0.2628 critic_loss=153458656324.2667 entropy=16.8319 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 19000] reward=-118382105.3 actor_loss=0.3639 critic_loss=159320789937.2308 entropy=16.8335 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 19000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543747.7 mean_steps=13.6
|
|
[Episode 19010] reward=-113845959.3 actor_loss=0.2879 critic_loss=148296765952.0000 entropy=16.8425 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 19020] reward=-118788512.5 actor_loss=0.3059 critic_loss=157951262720.0000 entropy=16.8487 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 19020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-475711.7 mean_steps=15.6
|
|
[Episode 19030] reward=-108812721.9 actor_loss=0.3737 critic_loss=142346187651.1219 entropy=16.8347 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 19040] reward=-108336302.2 actor_loss=0.3648 critic_loss=140287497739.3778 entropy=16.8393 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 19040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540657.2 mean_steps=13.3
|
|
[Episode 19050] reward=-113394115.1 actor_loss=0.2931 critic_loss=147175653558.0444 entropy=16.8433 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 19060] reward=-116874706.9 actor_loss=0.2516 critic_loss=159404838412.4878 entropy=16.8450 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 19060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507194.3 mean_steps=13.2
|
|
[Episode 19070] reward=-112795314.4 actor_loss=0.2411 critic_loss=147453815552.0000 entropy=16.8715 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 19080] reward=-115054010.8 actor_loss=0.2384 critic_loss=151675357803.1628 entropy=16.8417 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 19080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583321.2 mean_steps=12.6
|
|
[Episode 19090] reward=-117354808.4 actor_loss=0.3619 critic_loss=157765440853.3333 entropy=16.8449 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 19100] reward=-113620928.8 actor_loss=0.3020 critic_loss=151983187558.4000 entropy=16.8612 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 19100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538468.5 mean_steps=13.9
|
|
[Episode 19110] reward=-111342442.9 actor_loss=0.2730 critic_loss=141705221643.3778 entropy=16.8559 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 19120] reward=-113405464.1 actor_loss=0.3128 critic_loss=151986079880.5333 entropy=16.8736 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 19120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475597.9 mean_steps=14.7
|
|
[Episode 19130] reward=-110324733.5 actor_loss=0.3156 critic_loss=141796676221.1555 entropy=16.8785 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 19140] reward=-117919392.0 actor_loss=0.2541 critic_loss=152481654374.4000 entropy=16.8955 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 19140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553707.3 mean_steps=13.2
|
|
[Episode 19150] reward=-114437853.1 actor_loss=0.3501 critic_loss=157643189270.7556 entropy=16.9063 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 19160] reward=-120317505.6 actor_loss=0.2457 critic_loss=169606759310.2222 entropy=16.9001 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 19160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488894.8 mean_steps=14.6
|
|
[Episode 19170] reward=-116564646.6 actor_loss=0.3860 critic_loss=149738971682.1333 entropy=16.9257 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 19180] reward=-117771672.5 actor_loss=0.2784 critic_loss=160790125317.6889 entropy=16.9312 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 19180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430796.0 mean_steps=14.6
|
|
[Episode 19190] reward=-114490128.9 actor_loss=0.2742 critic_loss=154282108928.0000 entropy=16.9161 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 19200] reward=-114008535.3 actor_loss=0.3446 critic_loss=162680174425.9460 entropy=16.9525 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 19200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-361584.9 mean_steps=15.9
|
|
[Episode 19210] reward=-112041113.6 actor_loss=0.2083 critic_loss=146444223427.7647 entropy=16.9563 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 19220] reward=-112617998.0 actor_loss=0.3503 critic_loss=159421872061.9355 entropy=16.9453 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 19220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-381153.4 mean_steps=17.1
|
|
[Episode 19230] reward=-113647760.6 actor_loss=0.2702 critic_loss=147171114279.8222 entropy=16.9753 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 19240] reward=-116328352.5 actor_loss=0.3079 critic_loss=154355248911.0588 entropy=17.0069 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 19240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-494027.6 mean_steps=12.9
|
|
[Episode 19250] reward=-114986366.5 actor_loss=0.2656 critic_loss=154177971086.2222 entropy=17.0087 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 19260] reward=-114587693.7 actor_loss=0.2682 critic_loss=156075912457.4815 entropy=17.0036 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 19260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434664.2 mean_steps=14.2
|
|
[Episode 19270] reward=-116050884.6 actor_loss=0.2519 critic_loss=155009630736.5161 entropy=17.0049 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 19280] reward=-116441307.6 actor_loss=0.4436 critic_loss=155733273395.2000 entropy=17.0363 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 19280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-645701.8 mean_steps=11.9
|
|
[Episode 19290] reward=-114009432.1 actor_loss=0.3694 critic_loss=149192500163.7647 entropy=17.0448 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 19300] reward=-120106890.7 actor_loss=0.2045 critic_loss=158849176917.3333 entropy=17.0341 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 19300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585254.4 mean_steps=13.2
|
|
[Episode 19310] reward=-114311448.5 actor_loss=0.3843 critic_loss=151108820081.7778 entropy=17.0395 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 19320] reward=-106234681.0 actor_loss=0.3792 critic_loss=140584324066.7429 entropy=17.0520 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 19320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440682.2 mean_steps=15.8
|
|
[Episode 19330] reward=-117877548.8 actor_loss=0.2991 critic_loss=155457866020.5714 entropy=17.0624 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 19340] reward=-106505772.6 actor_loss=0.2746 critic_loss=150023120850.4889 entropy=17.0641 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 19340] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-665713.5 mean_steps=11.3
|
|
[Episode 19350] reward=-113189591.8 actor_loss=0.2441 critic_loss=152144613284.9778 entropy=17.0846 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 19360] reward=-115696202.1 actor_loss=0.2864 critic_loss=174505409299.6923 entropy=17.0801 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 19360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597601.8 mean_steps=12.6
|
|
[Episode 19370] reward=-114088083.6 actor_loss=0.2167 critic_loss=152556809602.8445 entropy=17.0815 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 19380] reward=-113928647.3 actor_loss=0.2951 critic_loss=156648780396.6060 entropy=17.0834 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 19380] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-313812.6 mean_steps=18.5
|
|
[Episode 19390] reward=-113787109.4 actor_loss=0.3227 critic_loss=158005427010.3704 entropy=17.0690 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 19400] reward=-117280592.1 actor_loss=0.3030 critic_loss=158944653721.6000 entropy=17.0811 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 19400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-460721.1 mean_steps=15.6
|
|
[Episode 19410] reward=-108806724.5 actor_loss=0.3533 critic_loss=150229333515.3778 entropy=17.0677 approx_kl=0.0045 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 19420] reward=-119230517.9 actor_loss=0.1661 critic_loss=158137258257.0667 entropy=17.0637 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 19420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509183.5 mean_steps=14.8
|
|
[Episode 19430] reward=-115356180.6 actor_loss=0.4012 critic_loss=156819811714.8445 entropy=17.0560 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 19440] reward=-117752059.4 actor_loss=0.3162 critic_loss=162116479502.6286 entropy=17.0732 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 19440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541187.0 mean_steps=14.1
|
|
[Episode 19450] reward=-109454072.4 actor_loss=0.4358 critic_loss=143873014351.6444 entropy=17.0749 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 19460] reward=-116557963.9 actor_loss=0.3403 critic_loss=156017521152.0000 entropy=17.0474 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 19460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553311.6 mean_steps=14.4
|
|
[Episode 19470] reward=-114521947.7 actor_loss=0.3241 critic_loss=158995523538.4889 entropy=17.0361 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 19480] reward=-111821625.4 actor_loss=0.3871 critic_loss=148120633799.1111 entropy=17.0379 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 19480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505938.1 mean_steps=14.2
|
|
[Episode 19490] reward=-114847788.8 actor_loss=0.2794 critic_loss=157436783638.7556 entropy=17.0369 approx_kl=0.0043 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 19500] reward=-124046985.1 actor_loss=0.1953 critic_loss=179588537093.6889 entropy=17.0244 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 19500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429414.0 mean_steps=15.0
|
|
[Episode 19510] reward=-115643111.1 actor_loss=0.3497 critic_loss=152402023037.1555 entropy=17.0309 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 19520] reward=-113826381.8 actor_loss=0.3970 critic_loss=155407081472.0000 entropy=17.0530 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 19520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476745.8 mean_steps=13.6
|
|
[Episode 19530] reward=-119067346.8 actor_loss=0.3302 critic_loss=161867733765.6889 entropy=17.0385 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 19540] reward=-110146740.4 actor_loss=0.3392 critic_loss=149599985078.8571 entropy=17.0511 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 19540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494059.0 mean_steps=14.2
|
|
[Episode 19550] reward=-113772510.4 actor_loss=0.2713 critic_loss=163224527030.0444 entropy=17.0422 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 19560] reward=-110902597.2 actor_loss=0.2952 critic_loss=151632695933.1555 entropy=17.0508 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 19560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587175.7 mean_steps=13.9
|
|
[Episode 19570] reward=-114117640.5 actor_loss=0.2994 critic_loss=151512958020.2667 entropy=17.0276 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 19580] reward=-116090119.0 actor_loss=0.3005 critic_loss=154594791575.7037 entropy=17.0397 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 19580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617859.8 mean_steps=12.2
|
|
[Episode 19590] reward=-120883827.9 actor_loss=0.2940 critic_loss=161082062848.0000 entropy=17.0411 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 19600] reward=-109619779.7 actor_loss=0.3213 critic_loss=147374925141.3333 entropy=17.0356 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 19600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466169.1 mean_steps=15.2
|
|
[Episode 19610] reward=-121198032.0 actor_loss=0.2139 critic_loss=165588464515.1219 entropy=17.0552 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 19620] reward=-112223874.3 actor_loss=0.2483 critic_loss=147737083483.8974 entropy=17.0504 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 19620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529068.3 mean_steps=14.7
|
|
[Episode 19630] reward=-119777738.3 actor_loss=0.2274 critic_loss=161994754184.5333 entropy=17.0425 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 19640] reward=-112970213.1 actor_loss=0.2785 critic_loss=151296070997.3333 entropy=17.0499 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 19640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512483.3 mean_steps=14.3
|
|
[Episode 19650] reward=-112872017.1 actor_loss=0.3857 critic_loss=153886928404.4800 entropy=17.0605 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 19660] reward=-108896359.9 actor_loss=0.3415 critic_loss=144298622464.0000 entropy=17.0485 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 19660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-618024.6 mean_steps=13.2
|
|
[Episode 19670] reward=-109467964.1 actor_loss=0.3240 critic_loss=144228163948.0889 entropy=17.0544 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 19680] reward=-117507436.5 actor_loss=0.2739 critic_loss=155927824520.5333 entropy=17.0486 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 19680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-333550.5 mean_steps=15.9
|
|
[Episode 19690] reward=-116939187.1 actor_loss=0.2905 critic_loss=154745734576.3556 entropy=17.0399 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 19700] reward=-113966614.3 actor_loss=0.3461 critic_loss=149810482380.8000 entropy=17.0411 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 19700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476010.7 mean_steps=15.3
|
|
[Episode 19710] reward=-111515310.4 actor_loss=0.2923 critic_loss=155999881898.6667 entropy=17.0513 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 19720] reward=-112226479.6 actor_loss=0.3146 critic_loss=153266432445.2174 entropy=17.0610 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 19720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-633410.6 mean_steps=12.2
|
|
[Episode 19730] reward=-113454892.4 actor_loss=0.2844 critic_loss=149939604684.8000 entropy=17.0743 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 19740] reward=-110691267.5 actor_loss=0.2943 critic_loss=148233209992.5333 entropy=17.0992 approx_kl=0.0052 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 19740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-540198.5 mean_steps=12.6
|
|
[Episode 19750] reward=-112970743.7 actor_loss=0.3530 critic_loss=158182348845.5111 entropy=17.0959 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 19760] reward=-121666354.5 actor_loss=0.2744 critic_loss=163974872715.6364 entropy=17.1019 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 19760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576156.9 mean_steps=13.7
|
|
[Episode 19770] reward=-110653929.0 actor_loss=0.3229 critic_loss=185169442679.4667 entropy=17.0901 approx_kl=0.0040 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 19780] reward=-120831997.0 actor_loss=0.3372 critic_loss=164499362793.2444 entropy=17.0979 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 19780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459826.1 mean_steps=14.9
|
|
[Episode 19790] reward=-113141600.7 actor_loss=0.2296 critic_loss=154262246134.5185 entropy=17.0799 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 19800] reward=-138775995.4 actor_loss=0.2969 critic_loss=2088121994288.7620 entropy=17.0982 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 19800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537017.6 mean_steps=13.8
|
|
[Episode 19810] reward=-117944781.4 actor_loss=0.4045 critic_loss=157167665152.0000 entropy=17.0940 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 19820] reward=-114338277.9 actor_loss=0.3426 critic_loss=151020438232.1778 entropy=17.1028 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 19820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436377.8 mean_steps=14.8
|
|
[Episode 19830] reward=-112869417.7 actor_loss=0.3416 critic_loss=149604231668.6222 entropy=17.0887 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 19840] reward=-116449352.3 actor_loss=0.3333 critic_loss=160639449245.5385 entropy=17.0893 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 19840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-374725.4 mean_steps=14.8
|
|
[Episode 19850] reward=-117807013.8 actor_loss=0.3490 critic_loss=159144528858.0741 entropy=17.0946 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 19860] reward=-116333798.4 actor_loss=0.3071 critic_loss=154186699753.2444 entropy=17.0893 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 19860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482459.1 mean_steps=14.8
|
|
[Episode 19870] reward=-111155136.8 actor_loss=0.2944 critic_loss=156318920004.6829 entropy=17.1032 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 19880] reward=-116020203.0 actor_loss=0.2859 critic_loss=153981244393.2444 entropy=17.0839 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 19880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-395537.9 mean_steps=15.7
|
|
[Episode 19890] reward=-115350908.4 actor_loss=0.3412 critic_loss=155543701640.5333 entropy=17.0799 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 19900] reward=-116916161.0 actor_loss=0.2160 critic_loss=171598995720.2581 entropy=17.0778 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 19900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516823.1 mean_steps=15.4
|
|
[Episode 19910] reward=-111187095.4 actor_loss=0.3757 critic_loss=154969710309.5172 entropy=17.0667 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 19920] reward=-119361066.3 actor_loss=0.2877 critic_loss=165919544164.8485 entropy=17.0805 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 19920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-646665.2 mean_steps=12.6
|
|
[Episode 19930] reward=-119143415.4 actor_loss=0.3717 critic_loss=169576451780.9231 entropy=17.0825 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 19940] reward=-112842208.7 actor_loss=0.3712 critic_loss=146437269094.4000 entropy=17.0927 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 19940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469806.7 mean_steps=15.2
|
|
[Episode 19950] reward=-117723242.9 actor_loss=0.1846 critic_loss=151507245465.6000 entropy=17.0869 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 19960] reward=-113942404.9 actor_loss=0.2954 critic_loss=151648535620.2667 entropy=17.0787 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 19960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562759.7 mean_steps=13.3
|
|
[Episode 19970] reward=-115087415.8 actor_loss=0.2725 critic_loss=156145627451.0769 entropy=17.0820 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 19980] reward=-113332388.8 actor_loss=0.2979 critic_loss=152287022284.8000 entropy=17.0741 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 19980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-713907.9 mean_steps=11.5
|
|
[Episode 19990] reward=-122216385.2 actor_loss=0.3001 critic_loss=162227017669.4857 entropy=17.0746 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 20000] reward=-115096811.6 actor_loss=0.3292 critic_loss=153180977754.3529 entropy=17.0709 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 20000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-277978.7 mean_steps=16.4
|
|
[Episode 20010] reward=-117624464.2 actor_loss=0.3814 critic_loss=159411159040.0000 entropy=17.0694 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 20020] reward=-113943633.5 actor_loss=0.3282 critic_loss=146240765132.8000 entropy=17.0706 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 20020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622928.4 mean_steps=11.9
|
|
[Episode 20030] reward=-121977139.4 actor_loss=0.3152 critic_loss=167552548208.6400 entropy=17.0888 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 20040] reward=-110377645.8 actor_loss=0.3234 critic_loss=142865102893.5111 entropy=17.0954 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 20040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-362373.4 mean_steps=16.2
|
|
[Episode 20050] reward=-123886731.5 actor_loss=0.2891 critic_loss=166457897415.1111 entropy=17.0986 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 20060] reward=-114590740.0 actor_loss=0.4231 critic_loss=152875242837.3333 entropy=17.0911 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 20060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-429948.2 mean_steps=17.8
|
|
[Episode 20070] reward=-112386172.3 actor_loss=0.3671 critic_loss=151338986154.6667 entropy=17.0803 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 20080] reward=-113077859.2 actor_loss=0.3227 critic_loss=146149058878.5778 entropy=17.0781 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-481998.5 mean_steps=13.2
|
|
[Episode 20090] reward=-114512627.7 actor_loss=0.3960 critic_loss=156394941518.7692 entropy=17.0888 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 20100] reward=-113759661.4 actor_loss=0.2703 critic_loss=147827382164.2105 entropy=17.1024 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 20100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398442.7 mean_steps=16.6
|
|
[Episode 20110] reward=-112660990.1 actor_loss=0.3720 critic_loss=151935544706.8445 entropy=17.1070 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 20120] reward=-116432647.4 actor_loss=0.2477 critic_loss=153817776492.0889 entropy=17.1330 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 20120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586051.9 mean_steps=12.9
|
|
[Episode 20130] reward=-111340827.9 actor_loss=0.3639 critic_loss=147050437745.7778 entropy=17.1267 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 20140] reward=-113988338.3 actor_loss=0.4193 critic_loss=150194125482.6667 entropy=17.1401 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 20140] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-696857.5 mean_steps=10.7
|
|
[Episode 20150] reward=-117373653.6 actor_loss=0.2830 critic_loss=159849970654.9677 entropy=17.1317 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 20160] reward=-114816792.1 actor_loss=0.2263 critic_loss=145396497885.8667 entropy=17.1395 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 20160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-469233.1 mean_steps=15.6
|
|
[Episode 20170] reward=-120365080.1 actor_loss=0.2899 critic_loss=158300720696.8889 entropy=17.1333 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 20180] reward=-113625973.2 actor_loss=0.3931 critic_loss=153741112479.2889 entropy=17.1445 approx_kl=0.0049 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 20180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-543961.8 mean_steps=14.8
|
|
[Episode 20190] reward=-116198453.7 actor_loss=0.2778 critic_loss=151014434328.3810 entropy=17.1508 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 20200] reward=-114016360.8 actor_loss=0.3987 critic_loss=156697822640.3556 entropy=17.1618 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 20200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465022.7 mean_steps=14.8
|
|
[Episode 20210] reward=-116417998.9 actor_loss=0.3764 critic_loss=156636942609.0667 entropy=17.1807 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 20220] reward=-118700867.2 actor_loss=0.3057 critic_loss=157787676240.8421 entropy=17.1652 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 20220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486317.9 mean_steps=15.1
|
|
[Episode 20230] reward=-115545094.1 actor_loss=0.3363 critic_loss=154856702714.0465 entropy=17.1991 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 20240] reward=-116399077.5 actor_loss=0.3014 critic_loss=156832264086.9744 entropy=17.2027 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 20240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507486.7 mean_steps=14.3
|
|
[Episode 20250] reward=-113406314.3 actor_loss=0.3510 critic_loss=156077967132.4445 entropy=17.2050 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 20260] reward=-121544085.2 actor_loss=0.2936 critic_loss=164171140995.8788 entropy=17.1988 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425302.6 mean_steps=16.1
|
|
[Episode 20270] reward=-119004922.4 actor_loss=0.2553 critic_loss=157653709085.7675 entropy=17.2137 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 20280] reward=-111180165.3 actor_loss=0.3910 critic_loss=146196377144.8889 entropy=17.2159 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 20280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-336427.3 mean_steps=17.3
|
|
[Episode 20290] reward=-116315005.4 actor_loss=0.3335 critic_loss=156306698649.6000 entropy=17.2261 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 20300] reward=-115439470.4 actor_loss=0.2350 critic_loss=151491298540.3077 entropy=17.2169 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 20300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491516.7 mean_steps=13.5
|
|
[Episode 20310] reward=-120931177.0 actor_loss=0.2214 critic_loss=165903315035.0222 entropy=17.2196 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 20320] reward=-120567378.9 actor_loss=0.2752 critic_loss=157109115037.5385 entropy=17.2200 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 20320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574899.2 mean_steps=14.1
|
|
[Episode 20330] reward=-121148817.6 actor_loss=0.3033 critic_loss=164834046634.6667 entropy=17.2268 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 20340] reward=-117965695.2 actor_loss=0.2242 critic_loss=155429919129.6000 entropy=17.2404 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 20340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585238.9 mean_steps=12.8
|
|
[Episode 20350] reward=-113085547.2 actor_loss=0.2827 critic_loss=148635698426.3111 entropy=17.2466 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 20360] reward=-113053135.8 actor_loss=0.2922 critic_loss=150961453974.0690 entropy=17.2571 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 20360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-447850.5 mean_steps=13.6
|
|
[Episode 20370] reward=-118284186.6 actor_loss=0.2840 critic_loss=159341890218.6667 entropy=17.2591 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 20380] reward=-118229641.0 actor_loss=0.2479 critic_loss=153664117873.7778 entropy=17.2518 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 20380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518793.1 mean_steps=14.3
|
|
[Episode 20390] reward=-124362786.1 actor_loss=0.2762 critic_loss=173298160981.3333 entropy=17.2417 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 20400] reward=-116251875.6 actor_loss=0.2716 critic_loss=158283477133.2414 entropy=17.2667 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 20400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-529846.2 mean_steps=12.4
|
|
[Episode 20410] reward=-109112807.6 actor_loss=0.3772 critic_loss=143753388032.0000 entropy=17.2671 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 20420] reward=-119327562.6 actor_loss=0.3277 critic_loss=163292862873.6000 entropy=17.2669 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 20420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513518.5 mean_steps=14.6
|
|
[Episode 20430] reward=-123543499.3 actor_loss=0.2543 critic_loss=161598800236.0889 entropy=17.2674 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 20440] reward=-118001306.2 actor_loss=0.2482 critic_loss=162261429452.8000 entropy=17.2791 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 20440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-406835.8 mean_steps=14.2
|
|
[Episode 20450] reward=-113469006.4 actor_loss=0.3850 critic_loss=147950814640.3556 entropy=17.2834 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 20460] reward=-114295953.2 actor_loss=0.2465 critic_loss=154325239974.0540 entropy=17.3004 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 20460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505816.7 mean_steps=13.4
|
|
[Episode 20470] reward=-118218424.4 actor_loss=0.2763 critic_loss=178563207805.1555 entropy=17.2955 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 20480] reward=-114009888.9 actor_loss=0.3615 critic_loss=152123750462.0606 entropy=17.2882 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 20480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619959.5 mean_steps=12.8
|
|
[Episode 20490] reward=-118162569.6 actor_loss=0.2011 critic_loss=157419807788.5217 entropy=17.2921 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 20500] reward=-111190705.0 actor_loss=0.2210 critic_loss=142763677013.3333 entropy=17.2836 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 20500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592535.7 mean_steps=14.3
|
|
[Episode 20510] reward=-118027299.9 actor_loss=0.3450 critic_loss=153394356770.1333 entropy=17.2943 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 20520] reward=-120246448.3 actor_loss=0.2948 critic_loss=163492292926.5778 entropy=17.2803 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 20520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-692206.6 mean_steps=12.7
|
|
[Episode 20530] reward=-118359185.1 actor_loss=0.3346 critic_loss=162196007594.6667 entropy=17.2794 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 20540] reward=-115443951.2 actor_loss=0.3526 critic_loss=155817054966.5185 entropy=17.2849 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-621304.9 mean_steps=13.2
|
|
[Episode 20550] reward=-115370634.4 actor_loss=0.3328 critic_loss=165406680795.4286 entropy=17.2796 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 20560] reward=-115736477.0 actor_loss=0.3086 critic_loss=152059060224.0000 entropy=17.2884 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363828.6 mean_steps=16.3
|
|
[Episode 20570] reward=-119957029.2 actor_loss=0.3347 critic_loss=161638287132.4445 entropy=17.2940 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 20580] reward=-113932125.3 actor_loss=0.2654 critic_loss=148871186204.4445 entropy=17.3270 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 20580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521670.0 mean_steps=15.2
|
|
[Episode 20590] reward=-111385613.3 actor_loss=0.3237 critic_loss=154024902656.0000 entropy=17.3446 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 20600] reward=-117220031.4 actor_loss=0.2323 critic_loss=170952248888.8889 entropy=17.3311 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 20600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531618.5 mean_steps=14.4
|
|
[Episode 20610] reward=-123541353.0 actor_loss=0.3388 critic_loss=170318698177.4222 entropy=17.3139 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 20620] reward=-118755382.1 actor_loss=0.2352 critic_loss=156739363726.2222 entropy=17.3011 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 20620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-373374.2 mean_steps=16.3
|
|
[Episode 20630] reward=-112812462.7 actor_loss=0.2877 critic_loss=153182692966.4000 entropy=17.3025 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 20640] reward=-113369416.7 actor_loss=0.2999 critic_loss=158903105763.5555 entropy=17.3015 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 20640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440851.5 mean_steps=15.8
|
|
[Episode 20650] reward=-113855010.8 actor_loss=0.3134 critic_loss=153076411830.8571 entropy=17.2906 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 20660] reward=-114873521.5 actor_loss=0.3179 critic_loss=156341717978.0741 entropy=17.2779 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 20660] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-682303.1 mean_steps=11.7
|
|
[Episode 20670] reward=-113014871.4 actor_loss=0.3862 critic_loss=150137829309.9355 entropy=17.2667 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 20680] reward=-114594941.0 actor_loss=0.4258 critic_loss=159273616452.2667 entropy=17.2583 approx_kl=0.0048 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 20680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619084.2 mean_steps=13.0
|
|
[Episode 20690] reward=-109938674.9 actor_loss=0.3142 critic_loss=173631232773.6889 entropy=17.2558 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 20700] reward=-113693313.8 actor_loss=0.3373 critic_loss=156483839226.3111 entropy=17.2556 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 20700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617268.4 mean_steps=11.9
|
|
[Episode 20710] reward=-116483769.8 actor_loss=0.3069 critic_loss=154588278307.7209 entropy=17.2914 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 20720] reward=-113796671.5 actor_loss=0.2819 critic_loss=150745144797.8667 entropy=17.2819 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 20720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-582470.0 mean_steps=11.8
|
|
[Episode 20730] reward=-113682515.2 actor_loss=0.3494 critic_loss=150938433399.4667 entropy=17.2839 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 20740] reward=-121105951.8 actor_loss=0.2298 critic_loss=174300163822.9333 entropy=17.2991 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 20740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414066.6 mean_steps=15.6
|
|
[Episode 20750] reward=-121684651.6 actor_loss=0.2456 critic_loss=168477720120.8889 entropy=17.2841 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 20760] reward=-117349279.6 actor_loss=0.3645 critic_loss=160426505011.2000 entropy=17.2821 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 20760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442421.0 mean_steps=14.9
|
|
[Episode 20770] reward=-115509433.1 actor_loss=0.1867 critic_loss=148779957840.8421 entropy=17.3104 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 20780] reward=-115645896.1 actor_loss=0.2012 critic_loss=146437607332.9778 entropy=17.3076 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 20780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458429.7 mean_steps=15.1
|
|
[Episode 20790] reward=-121335558.0 actor_loss=0.2851 critic_loss=159413484748.8000 entropy=17.3057 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 20800] reward=-114241741.6 actor_loss=0.3493 critic_loss=153556854473.6970 entropy=17.3014 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 20800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-471943.2 mean_steps=12.9
|
|
[Episode 20810] reward=-119757406.5 actor_loss=0.2355 critic_loss=159399136162.9091 entropy=17.3192 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 20820] reward=-116699801.0 actor_loss=0.2241 critic_loss=154519036450.1333 entropy=17.2997 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 20820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420840.7 mean_steps=15.7
|
|
[Episode 20830] reward=-119140882.2 actor_loss=0.3190 critic_loss=245062808234.6667 entropy=17.2917 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 20840] reward=-118842324.8 actor_loss=0.3379 critic_loss=175640439974.6977 entropy=17.2856 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 20840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491901.8 mean_steps=15.2
|
|
[Episode 20850] reward=-115563261.3 actor_loss=0.2119 critic_loss=159160116955.4286 entropy=17.2686 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 20860] reward=-116022428.0 actor_loss=0.2539 critic_loss=159623636218.3111 entropy=17.2799 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 20860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631416.6 mean_steps=12.0
|
|
[Episode 20870] reward=-120144114.8 actor_loss=0.2889 critic_loss=168153224078.2222 entropy=17.2809 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 20880] reward=-112852783.5 actor_loss=0.3852 critic_loss=156117016185.9048 entropy=17.2897 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 20880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-362580.0 mean_steps=15.4
|
|
[Episode 20890] reward=-119713008.8 actor_loss=0.3266 critic_loss=159853647917.5111 entropy=17.2704 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 20900] reward=-115957571.1 actor_loss=0.2847 critic_loss=155900912786.2857 entropy=17.2622 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 20900] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-299069.4 mean_steps=16.9
|
|
[Episode 20910] reward=-118811393.3 actor_loss=0.2466 critic_loss=154412464420.5714 entropy=17.2701 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 20920] reward=-112407383.1 actor_loss=0.3802 critic_loss=152298414080.0000 entropy=17.2665 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434209.9 mean_steps=14.5
|
|
[Episode 20930] reward=-115312128.2 actor_loss=0.3285 critic_loss=152205076230.2439 entropy=17.2855 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 20940] reward=-120410279.2 actor_loss=0.2009 critic_loss=162384097735.1111 entropy=17.2796 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 20940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-606906.1 mean_steps=11.9
|
|
[Episode 20950] reward=-119721460.4 actor_loss=0.3688 critic_loss=157156929357.9131 entropy=17.2903 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 20960] reward=-117817895.0 actor_loss=0.3032 critic_loss=155872166687.2195 entropy=17.2900 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 20960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563559.1 mean_steps=12.9
|
|
[Episode 20970] reward=-115379357.6 actor_loss=0.3440 critic_loss=157530606369.3913 entropy=17.2851 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 20980] reward=-119255227.9 actor_loss=0.3219 critic_loss=158449453899.2941 entropy=17.3039 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 20980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379415.7 mean_steps=16.1
|
|
[Episode 20990] reward=-120395366.1 actor_loss=0.2509 critic_loss=244503901059.1219 entropy=17.2950 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 21000] reward=-118124882.1 actor_loss=0.3246 critic_loss=155178581651.9111 entropy=17.2935 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 21000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452601.8 mean_steps=16.1
|
|
[Episode 21010] reward=-118008701.9 actor_loss=0.2850 critic_loss=156665312530.7317 entropy=17.2903 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 21020] reward=-113103998.9 actor_loss=0.3726 critic_loss=154938790161.0667 entropy=17.2845 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 21020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423566.3 mean_steps=16.8
|
|
[Episode 21030] reward=-116717242.0 actor_loss=0.3205 critic_loss=155800988330.6667 entropy=17.2854 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21040] reward=-122498541.0 actor_loss=0.2066 critic_loss=164542588660.8696 entropy=17.2738 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 21040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615835.3 mean_steps=13.1
|
|
[Episode 21050] reward=-115709937.4 actor_loss=0.3050 critic_loss=152682755557.0526 entropy=17.2975 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 21060] reward=-116477563.0 actor_loss=0.3422 critic_loss=153871369485.4737 entropy=17.2931 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 21060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493927.9 mean_steps=14.2
|
|
[Episode 21070] reward=-114041804.3 actor_loss=0.2784 critic_loss=152461024369.7778 entropy=17.2857 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 21080] reward=-118647196.4 actor_loss=0.2970 critic_loss=156850360980.6452 entropy=17.2782 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 21080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525672.8 mean_steps=13.1
|
|
[Episode 21090] reward=-118115440.9 actor_loss=0.2813 critic_loss=160843097063.0244 entropy=17.2763 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 21100] reward=-116479653.1 actor_loss=0.2785 critic_loss=159697613917.0909 entropy=17.2703 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 21100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-544974.4 mean_steps=12.5
|
|
[Episode 21110] reward=-116860853.9 actor_loss=0.4079 critic_loss=162455563468.8000 entropy=17.2857 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 21120] reward=-115520793.4 actor_loss=0.3783 critic_loss=156566668709.6471 entropy=17.2964 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 21120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475920.0 mean_steps=14.0
|
|
[Episode 21130] reward=-123090092.3 actor_loss=0.2829 critic_loss=165494470519.4667 entropy=17.3073 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 21140] reward=-117600776.0 actor_loss=0.3453 critic_loss=154425840981.3333 entropy=17.3064 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 21140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483598.2 mean_steps=14.0
|
|
[Episode 21150] reward=-116991795.2 actor_loss=0.3496 critic_loss=154709799367.1111 entropy=17.3033 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 21160] reward=-119432931.6 actor_loss=0.2298 critic_loss=157719942485.3333 entropy=17.3066 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 21160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539794.3 mean_steps=14.8
|
|
[Episode 21170] reward=-117313963.7 actor_loss=0.2889 critic_loss=155691544849.0667 entropy=17.2971 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 21180] reward=-116480974.4 actor_loss=0.3086 critic_loss=150676678168.3810 entropy=17.3110 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 21180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-512380.2 mean_steps=12.3
|
|
[Episode 21190] reward=-116886362.7 actor_loss=0.3944 critic_loss=151325892790.0444 entropy=17.3239 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 21200] reward=-124148650.4 actor_loss=0.3550 critic_loss=169827288600.3810 entropy=17.3170 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 21200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-665255.5 mean_steps=11.9
|
|
[Episode 21210] reward=-116213854.0 actor_loss=0.2789 critic_loss=156074363562.6667 entropy=17.3004 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 21220] reward=-113064518.4 actor_loss=0.3139 critic_loss=153683364704.7111 entropy=17.2944 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 21220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-579165.6 mean_steps=13.9
|
|
[Episode 21230] reward=-117686357.5 actor_loss=0.2795 critic_loss=154922995396.9231 entropy=17.2981 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 21240] reward=-116424208.3 actor_loss=0.2675 critic_loss=151031888463.6444 entropy=17.3216 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 21240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-658771.3 mean_steps=12.3
|
|
[Episode 21250] reward=-112534884.9 actor_loss=0.2626 critic_loss=148001447025.7778 entropy=17.3270 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 21260] reward=-113618524.0 actor_loss=0.2756 critic_loss=150984646473.9556 entropy=17.3136 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 21260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537262.2 mean_steps=14.2
|
|
[Episode 21270] reward=-117845419.6 actor_loss=0.3444 critic_loss=166350141889.5610 entropy=17.2942 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 21280] reward=-118377098.2 actor_loss=0.3209 critic_loss=170803938736.3556 entropy=17.2940 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 21280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505904.8 mean_steps=14.9
|
|
[Episode 21290] reward=-117140983.7 actor_loss=0.2510 critic_loss=153868777110.5882 entropy=17.3012 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 21300] reward=-113889812.8 actor_loss=0.3177 critic_loss=158121220141.5111 entropy=17.2993 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 21300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-614002.5 mean_steps=14.1
|
|
[Episode 21310] reward=-121305627.0 actor_loss=0.2406 critic_loss=158470418747.0769 entropy=17.2908 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21320] reward=-118852217.7 actor_loss=0.3332 critic_loss=156851215837.8667 entropy=17.3006 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 21320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-452108.4 mean_steps=16.4
|
|
[Episode 21330] reward=-123331420.9 actor_loss=0.2135 critic_loss=165654907744.7111 entropy=17.3090 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 21340] reward=-118020342.9 actor_loss=0.4243 critic_loss=157614577527.4667 entropy=17.3085 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 21340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551991.7 mean_steps=13.0
|
|
[Episode 21350] reward=-121806120.0 actor_loss=0.2647 critic_loss=164553268701.8667 entropy=17.3231 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 21360] reward=-120295831.6 actor_loss=0.2425 critic_loss=159094741401.6000 entropy=17.3270 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 21360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454346.4 mean_steps=15.2
|
|
[Episode 21370] reward=-115426535.8 actor_loss=0.3423 critic_loss=153770368231.2258 entropy=17.3402 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 21380] reward=-119540027.9 actor_loss=0.3569 critic_loss=156433395126.8571 entropy=17.3321 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 21380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529247.2 mean_steps=162.1
|
|
[Episode 21390] reward=-115875035.5 actor_loss=0.3525 critic_loss=154746270967.1724 entropy=17.3432 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 21400] reward=-120918694.6 actor_loss=0.3186 critic_loss=163892237458.2857 entropy=17.3598 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 21400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423742.9 mean_steps=15.7
|
|
[Episode 21410] reward=-122767480.9 actor_loss=0.1716 critic_loss=166419417560.6154 entropy=17.3744 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 21420] reward=-114807592.7 actor_loss=0.2923 critic_loss=152542432460.8000 entropy=17.3857 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 21420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470296.0 mean_steps=15.1
|
|
[Episode 21430] reward=-114032417.7 actor_loss=0.3679 critic_loss=148736188962.1333 entropy=17.4021 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 21440] reward=-117538546.4 actor_loss=0.2540 critic_loss=158366413040.9412 entropy=17.4229 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 21440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488272.7 mean_steps=15.1
|
|
[Episode 21450] reward=-123384680.9 actor_loss=0.3211 critic_loss=165666401666.8445 entropy=17.4328 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 21460] reward=-118752162.0 actor_loss=0.3038 critic_loss=157612514417.7778 entropy=17.4221 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 21460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-512954.5 mean_steps=16.3
|
|
[Episode 21470] reward=-113977911.6 actor_loss=0.4197 critic_loss=151399574186.6667 entropy=17.4229 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 21480] reward=-117716082.6 actor_loss=0.2892 critic_loss=155130841088.0000 entropy=17.4191 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 21480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481182.2 mean_steps=15.0
|
|
[Episode 21490] reward=-122807230.9 actor_loss=0.2816 critic_loss=165481200103.6190 entropy=17.4132 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21500] reward=-110116987.6 actor_loss=0.3530 critic_loss=150246564386.1333 entropy=17.4240 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 21500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544815.9 mean_steps=13.7
|
|
[Episode 21510] reward=-121864000.0 actor_loss=0.2206 critic_loss=162364690080.9143 entropy=17.4361 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 21520] reward=-121167524.9 actor_loss=0.2453 critic_loss=161384971195.7333 entropy=17.4304 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 21520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-356335.4 mean_steps=15.7
|
|
[Episode 21530] reward=-116492523.8 actor_loss=0.2161 critic_loss=158499116646.4000 entropy=17.4333 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 21540] reward=-115988961.6 actor_loss=0.3181 critic_loss=152414143192.1778 entropy=17.4265 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 21540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-400302.4 mean_steps=17.4
|
|
[Episode 21550] reward=-119024686.4 actor_loss=0.3686 critic_loss=158736888093.7675 entropy=17.4128 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 21560] reward=-113957509.0 actor_loss=0.2697 critic_loss=150221694855.5294 entropy=17.4294 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 21560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420555.3 mean_steps=14.6
|
|
[Episode 21570] reward=-114722284.1 actor_loss=0.4109 critic_loss=157122850907.0222 entropy=17.4332 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 21580] reward=-114571338.1 actor_loss=0.2111 critic_loss=155812679094.8571 entropy=17.4276 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 21580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441523.3 mean_steps=15.3
|
|
[Episode 21590] reward=-112436433.6 actor_loss=0.4814 critic_loss=149621443309.2683 entropy=17.4252 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 21600] reward=-116399409.8 actor_loss=0.2871 critic_loss=162609861778.2857 entropy=17.3895 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 21600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-305059.4 mean_steps=17.3
|
|
[Episode 21610] reward=-114128193.0 actor_loss=0.2760 critic_loss=152714867858.2857 entropy=17.3916 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 21620] reward=-117206768.7 actor_loss=0.3265 critic_loss=154147914508.1905 entropy=17.4012 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 21620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551167.6 mean_steps=12.4
|
|
[Episode 21630] reward=-118454822.2 actor_loss=0.2452 critic_loss=160349004686.2222 entropy=17.4281 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 21640] reward=-119035253.5 actor_loss=0.2694 critic_loss=157917239237.4857 entropy=17.4244 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 21640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589283.0 mean_steps=12.6
|
|
[Episode 21650] reward=-121558100.3 actor_loss=0.2922 critic_loss=168026378148.9778 entropy=17.4296 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 21660] reward=-112975377.7 actor_loss=0.2374 critic_loss=146020242773.3333 entropy=17.4130 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 21660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630877.6 mean_steps=12.8
|
|
[Episode 21670] reward=-120508179.9 actor_loss=0.2296 critic_loss=160306629290.6667 entropy=17.4358 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 21680] reward=-109911789.1 actor_loss=0.2333 critic_loss=142281563648.0000 entropy=17.4533 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 21680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479758.5 mean_steps=16.0
|
|
[Episode 21690] reward=-124632572.9 actor_loss=0.1811 critic_loss=166164644386.1333 entropy=17.4494 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 21700] reward=-116395475.1 actor_loss=0.2728 critic_loss=160289065171.8621 entropy=17.4427 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 21700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576575.7 mean_steps=13.4
|
|
[Episode 21710] reward=-113416009.8 actor_loss=0.2033 critic_loss=148658389504.0000 entropy=17.4417 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 21720] reward=-123813684.1 actor_loss=0.2094 critic_loss=174709199667.2000 entropy=17.4476 approx_kl=0.0046 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 21720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-579716.3 mean_steps=11.8
|
|
[Episode 21730] reward=-121304190.3 actor_loss=0.2923 critic_loss=163161604096.0000 entropy=17.4368 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21740] reward=-120474557.5 actor_loss=0.2299 critic_loss=166548445125.4857 entropy=17.4373 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 21740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537539.0 mean_steps=14.6
|
|
[Episode 21750] reward=-117391657.4 actor_loss=0.2237 critic_loss=154533398078.4390 entropy=17.4238 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 21760] reward=-114868120.9 actor_loss=0.3029 critic_loss=151120919171.6571 entropy=17.4415 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 21760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556348.4 mean_steps=14.2
|
|
[Episode 21770] reward=-117761748.3 actor_loss=0.3058 critic_loss=160080872880.3556 entropy=17.4398 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21780] reward=-112321942.3 actor_loss=0.3488 critic_loss=153208448614.4000 entropy=17.4495 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 21780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496688.1 mean_steps=14.0
|
|
[Episode 21790] reward=-121094960.0 actor_loss=0.3031 critic_loss=160766227251.2000 entropy=17.4427 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 21800] reward=-121963617.8 actor_loss=0.3050 critic_loss=164475250688.0000 entropy=17.4287 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 21800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548351.3 mean_steps=14.2
|
|
[Episode 21810] reward=-116155696.4 actor_loss=0.3501 critic_loss=159011276889.0435 entropy=17.4175 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 21820] reward=-120796694.3 actor_loss=0.2497 critic_loss=161153373525.3333 entropy=17.4193 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 21820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426456.7 mean_steps=15.4
|
|
[Episode 21830] reward=-121566279.2 actor_loss=0.2799 critic_loss=162550053819.7333 entropy=17.4143 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 21840] reward=-118471790.9 actor_loss=0.2686 critic_loss=154624619315.2000 entropy=17.4189 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 21840] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348142.8 mean_steps=16.6
|
|
[Episode 21850] reward=-118880527.6 actor_loss=0.2995 critic_loss=163396171275.3778 entropy=17.4277 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 21860] reward=-119356004.6 actor_loss=0.3250 critic_loss=160072327714.1333 entropy=17.4213 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 21860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492558.8 mean_steps=15.1
|
|
[Episode 21870] reward=-114088480.1 actor_loss=0.2873 critic_loss=148187526667.3778 entropy=17.4303 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 21880] reward=-122857689.2 actor_loss=0.3182 critic_loss=164217184438.0444 entropy=17.4262 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 21880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-598455.6 mean_steps=12.9
|
|
[Episode 21890] reward=-123301740.7 actor_loss=0.2084 critic_loss=161901125222.4000 entropy=17.4127 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 21900] reward=-115402198.2 actor_loss=0.3764 critic_loss=155486925544.7273 entropy=17.3932 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 21900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513957.9 mean_steps=14.1
|
|
[Episode 21910] reward=-118186234.5 actor_loss=0.2786 critic_loss=161801292068.5714 entropy=17.3942 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 21920] reward=-122946982.1 actor_loss=0.2022 critic_loss=169514340937.1429 entropy=17.3861 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 21920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575695.0 mean_steps=13.7
|
|
[Episode 21930] reward=-113068260.3 actor_loss=0.3847 critic_loss=151823609483.6364 entropy=17.3881 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 21940] reward=-121677894.3 actor_loss=0.3611 critic_loss=180619645574.7368 entropy=17.3849 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 21940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635154.1 mean_steps=12.1
|
|
[Episode 21950] reward=-120379376.8 actor_loss=0.3328 critic_loss=161766991098.3111 entropy=17.3744 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 21960] reward=-116385666.8 actor_loss=0.3178 critic_loss=155218318313.2444 entropy=17.3696 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 21960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-319466.3 mean_steps=16.6
|
|
[Episode 21970] reward=-122128580.9 actor_loss=0.2624 critic_loss=167486060953.6000 entropy=17.3732 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 21980] reward=-115821093.4 actor_loss=0.2726 critic_loss=154862945348.2667 entropy=17.3666 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 21980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559471.1 mean_steps=12.6
|
|
[Episode 21990] reward=-115217875.5 actor_loss=0.2454 critic_loss=146130666746.3111 entropy=17.3712 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 22000] reward=-121131347.8 actor_loss=0.3119 critic_loss=165510896025.6000 entropy=17.3801 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 22000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491335.0 mean_steps=14.3
|
|
[Episode 22010] reward=-119106673.6 actor_loss=0.3029 critic_loss=159708003696.6400 entropy=17.3983 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22020] reward=-125491090.1 actor_loss=0.2648 critic_loss=167477558567.8222 entropy=17.4090 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 22020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366230.3 mean_steps=16.1
|
|
[Episode 22030] reward=-118175642.0 actor_loss=0.3030 critic_loss=159720367354.3111 entropy=17.4076 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 22040] reward=-116812219.4 actor_loss=0.3364 critic_loss=149169139438.9333 entropy=17.4087 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 22040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-627144.3 mean_steps=12.6
|
|
[Episode 22050] reward=-119287492.4 actor_loss=0.2932 critic_loss=161032085738.0571 entropy=17.4094 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 22060] reward=-116653348.7 actor_loss=0.3220 critic_loss=159236855853.5111 entropy=17.4090 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 22060] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-690967.5 mean_steps=11.6
|
|
[Episode 22070] reward=-123816130.5 actor_loss=0.3506 critic_loss=168304921258.6667 entropy=17.4154 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 22080] reward=-120130380.1 actor_loss=0.2779 critic_loss=160038359412.3636 entropy=17.4170 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-584572.3 mean_steps=11.9
|
|
[Episode 22090] reward=-114546726.2 actor_loss=0.4323 critic_loss=148726679688.5333 entropy=17.4264 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 22100] reward=-121965855.6 actor_loss=0.2483 critic_loss=160701763049.7391 entropy=17.4307 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 22100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447490.2 mean_steps=15.7
|
|
[Episode 22110] reward=-110912714.7 actor_loss=0.3494 critic_loss=146450038363.8974 entropy=17.4362 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 22120] reward=-115745107.3 actor_loss=0.2487 critic_loss=148624969272.8889 entropy=17.4426 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 22120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540824.4 mean_steps=13.6
|
|
[Episode 22130] reward=-120645572.6 actor_loss=0.3111 critic_loss=161757883050.6667 entropy=17.4448 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 22140] reward=-119973243.0 actor_loss=0.2928 critic_loss=157521298063.3600 entropy=17.4429 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 22140] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-274590.5 mean_steps=17.5
|
|
[Episode 22150] reward=-116594675.9 actor_loss=0.2822 critic_loss=157542298596.3243 entropy=17.4451 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 22160] reward=-117103717.1 actor_loss=0.3975 critic_loss=158830596587.5200 entropy=17.4444 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 22160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-513477.1 mean_steps=14.9
|
|
[Episode 22170] reward=-112438234.9 actor_loss=0.3599 critic_loss=149638645532.4445 entropy=17.4507 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22180] reward=-121722014.2 actor_loss=0.2786 critic_loss=166837222955.8857 entropy=17.4756 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448994.0 mean_steps=16.1
|
|
[Episode 22190] reward=-120863058.6 actor_loss=0.2986 critic_loss=161491376810.6667 entropy=17.4641 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22200] reward=-118098675.1 actor_loss=0.2357 critic_loss=156615637583.6444 entropy=17.4650 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 22200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477118.4 mean_steps=15.1
|
|
[Episode 22210] reward=-118296455.0 actor_loss=0.2385 critic_loss=155762433137.7778 entropy=17.4719 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 22220] reward=-117234564.7 actor_loss=0.3227 critic_loss=159037553595.7333 entropy=17.4695 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 22220] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-672523.9 mean_steps=11.4
|
|
[Episode 22230] reward=-115677570.1 actor_loss=0.3126 critic_loss=148224167116.8000 entropy=17.4534 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 22240] reward=-113656052.0 actor_loss=0.2725 critic_loss=149456470447.1579 entropy=17.4587 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 22240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516388.1 mean_steps=13.9
|
|
[Episode 22250] reward=-122096800.6 actor_loss=0.2605 critic_loss=159858983276.0889 entropy=17.4585 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 22260] reward=-120480370.3 actor_loss=0.3960 critic_loss=162909955072.0000 entropy=17.4452 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 22260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520676.6 mean_steps=13.8
|
|
[Episode 22270] reward=-125521041.1 actor_loss=0.2701 critic_loss=171543295426.5600 entropy=17.4585 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 22280] reward=-113355048.7 actor_loss=0.2628 critic_loss=147860345105.0667 entropy=17.4644 approx_kl=0.0112 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510018.8 mean_steps=14.1
|
|
[Episode 22290] reward=-118824444.7 actor_loss=0.3386 critic_loss=162424853117.1555 entropy=17.4649 approx_kl=0.0050 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 22300] reward=-115505385.8 actor_loss=0.4798 critic_loss=156876480760.2424 entropy=17.4756 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 22300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455148.8 mean_steps=15.2
|
|
[Episode 22310] reward=-122663280.4 actor_loss=0.3097 critic_loss=160640987648.0000 entropy=17.4814 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 22320] reward=-117688823.7 actor_loss=0.2626 critic_loss=156719342738.2857 entropy=17.5058 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-371761.9 mean_steps=15.0
|
|
[Episode 22330] reward=-120204323.4 actor_loss=0.3443 critic_loss=160371462144.0000 entropy=17.4972 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 22340] reward=-118952034.6 actor_loss=0.3325 critic_loss=164504718540.8000 entropy=17.4891 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 22340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536373.3 mean_steps=14.2
|
|
[Episode 22350] reward=-120573327.2 actor_loss=0.2563 critic_loss=175041762645.3333 entropy=17.4848 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 22360] reward=-118207424.1 actor_loss=0.3653 critic_loss=151383277203.9111 entropy=17.4901 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 22360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416198.0 mean_steps=15.2
|
|
[Episode 22370] reward=-119008815.0 actor_loss=0.3244 critic_loss=158285332480.0000 entropy=17.5028 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 22380] reward=-118762848.5 actor_loss=0.2903 critic_loss=155601994941.6296 entropy=17.5054 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 22380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475159.3 mean_steps=14.8
|
|
[Episode 22390] reward=-120042062.9 actor_loss=0.3699 critic_loss=161847087377.0667 entropy=17.4975 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 22400] reward=-122609253.9 actor_loss=0.3670 critic_loss=170874037071.4483 entropy=17.4868 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 22400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-467990.4 mean_steps=15.9
|
|
[Episode 22410] reward=-117999240.2 actor_loss=0.2750 critic_loss=163471733555.2000 entropy=17.4782 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 22420] reward=-121616077.1 actor_loss=0.2951 critic_loss=165608512512.0000 entropy=17.4866 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 22420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510320.8 mean_steps=14.1
|
|
[Episode 22430] reward=-117916945.2 actor_loss=0.3361 critic_loss=160822457794.5600 entropy=17.4661 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 22440] reward=-117926713.0 actor_loss=0.3035 critic_loss=165135999522.1333 entropy=17.4957 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 22440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383658.9 mean_steps=15.8
|
|
[Episode 22450] reward=-121548979.4 actor_loss=0.2899 critic_loss=162436863122.2857 entropy=17.4931 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 22460] reward=-123072012.2 actor_loss=0.2830 critic_loss=167165282222.0800 entropy=17.5033 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 22460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465486.3 mean_steps=14.7
|
|
[Episode 22470] reward=-116625204.4 actor_loss=0.2990 critic_loss=158066551974.6977 entropy=17.5187 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 22480] reward=-115795329.2 actor_loss=0.2861 critic_loss=154751566188.0889 entropy=17.5313 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 22480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451074.3 mean_steps=15.2
|
|
[Episode 22490] reward=-115173643.8 actor_loss=0.3206 critic_loss=157481987185.7778 entropy=17.5216 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 22500] reward=-120619624.2 actor_loss=0.3200 critic_loss=154755947269.6889 entropy=17.5166 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 22500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476387.2 mean_steps=15.2
|
|
[Episode 22510] reward=-114955719.1 actor_loss=0.2977 critic_loss=154121224192.0000 entropy=17.5129 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 22520] reward=-120819665.5 actor_loss=0.3694 critic_loss=162839073041.0667 entropy=17.5270 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 22520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499626.5 mean_steps=15.1
|
|
[Episode 22530] reward=-110299639.3 actor_loss=0.4658 critic_loss=148799627556.5714 entropy=17.5236 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 22540] reward=-122392380.6 actor_loss=0.2044 critic_loss=165310498905.0435 entropy=17.5216 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 22540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479930.2 mean_steps=14.1
|
|
[Episode 22550] reward=-116947708.7 actor_loss=0.3210 critic_loss=161244645052.6316 entropy=17.5046 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22560] reward=-119133000.7 actor_loss=0.1952 critic_loss=156164474288.3556 entropy=17.4890 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 22560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447821.6 mean_steps=15.6
|
|
[Episode 22570] reward=-115500704.5 actor_loss=0.3780 critic_loss=147979305402.8108 entropy=17.4836 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 22580] reward=-116325789.7 actor_loss=0.3144 critic_loss=152484362103.4667 entropy=17.4879 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 22580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-442779.1 mean_steps=13.8
|
|
[Episode 22590] reward=-123857263.0 actor_loss=0.4186 critic_loss=168299115395.1219 entropy=17.4656 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 22600] reward=-121317296.0 actor_loss=0.2906 critic_loss=153129213952.0000 entropy=17.4530 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 22600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458434.2 mean_steps=15.1
|
|
[Episode 22610] reward=-121870372.4 actor_loss=0.3144 critic_loss=167298566436.5714 entropy=17.4538 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 22620] reward=-119109222.0 actor_loss=0.2910 critic_loss=156758512071.1111 entropy=17.4645 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464085.2 mean_steps=14.8
|
|
[Episode 22630] reward=-116153501.8 actor_loss=0.3350 critic_loss=156483903297.4884 entropy=17.4704 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22640] reward=-123205816.8 actor_loss=0.2955 critic_loss=158578259618.3415 entropy=17.4757 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 22640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592291.3 mean_steps=13.8
|
|
[Episode 22650] reward=-125230662.8 actor_loss=0.3271 critic_loss=211266548916.7059 entropy=17.4831 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 22660] reward=-122181226.7 actor_loss=0.2523 critic_loss=164457074315.6364 entropy=17.4797 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 22660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359949.0 mean_steps=16.1
|
|
[Episode 22670] reward=-115751849.4 actor_loss=0.3611 critic_loss=155567866246.0952 entropy=17.4931 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 22680] reward=-119428925.0 actor_loss=0.2522 critic_loss=153865745997.5757 entropy=17.4885 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 22680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-675986.0 mean_steps=12.1
|
|
[Episode 22690] reward=-116854765.2 actor_loss=0.3610 critic_loss=151925701485.7143 entropy=17.5043 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 22700] reward=-118927453.8 actor_loss=0.3554 critic_loss=159669223033.9048 entropy=17.5030 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 22700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584777.8 mean_steps=13.0
|
|
[Episode 22710] reward=-112387515.9 actor_loss=0.3866 critic_loss=148087234349.9487 entropy=17.4951 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 22720] reward=-115998887.7 actor_loss=0.3606 critic_loss=153666259416.6154 entropy=17.5137 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 22720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446217.5 mean_steps=15.7
|
|
[Episode 22730] reward=-120856945.9 actor_loss=0.3142 critic_loss=155793247744.0000 entropy=17.5167 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 22740] reward=-119470774.4 actor_loss=0.2129 critic_loss=153327808967.1111 entropy=17.5288 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 22740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-362889.4 mean_steps=17.4
|
|
[Episode 22750] reward=-120869057.9 actor_loss=0.3118 critic_loss=155248421794.9091 entropy=17.5291 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 22760] reward=-118602824.4 actor_loss=0.3208 critic_loss=156314585682.5807 entropy=17.5138 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 22760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482154.0 mean_steps=14.9
|
|
[Episode 22770] reward=-118434734.2 actor_loss=0.3455 critic_loss=159224523434.6667 entropy=17.5187 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 22780] reward=-117069027.6 actor_loss=0.3869 critic_loss=154353684480.0000 entropy=17.5238 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 22780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527474.9 mean_steps=14.4
|
|
[Episode 22790] reward=-117376258.5 actor_loss=0.3038 critic_loss=156165458056.5333 entropy=17.5265 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 22800] reward=-120303199.1 actor_loss=0.1663 critic_loss=153172829070.2222 entropy=17.5309 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 22800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-625755.6 mean_steps=13.0
|
|
[Episode 22810] reward=-113875371.5 actor_loss=0.3620 critic_loss=153603579576.3200 entropy=17.5341 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 22820] reward=-121405567.1 actor_loss=0.2540 critic_loss=161384169472.0000 entropy=17.5340 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 22820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-403383.6 mean_steps=16.4
|
|
[Episode 22830] reward=-116617146.8 actor_loss=0.4117 critic_loss=158659473993.1429 entropy=17.5423 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 22840] reward=-119268872.6 actor_loss=0.3498 critic_loss=158803179019.3778 entropy=17.5580 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 22840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-605952.6 mean_steps=13.1
|
|
[Episode 22850] reward=-117016147.2 actor_loss=0.2295 critic_loss=154380162522.5366 entropy=17.5587 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 22860] reward=-119300024.5 actor_loss=0.3482 critic_loss=157571930885.6889 entropy=17.5483 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 22860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395440.1 mean_steps=16.4
|
|
[Episode 22870] reward=-118372483.9 actor_loss=0.2946 critic_loss=151044392125.6296 entropy=17.5277 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 22880] reward=-116786316.2 actor_loss=0.2798 critic_loss=155773453016.1778 entropy=17.5290 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 22880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428385.1 mean_steps=15.8
|
|
[Episode 22890] reward=-120094584.8 actor_loss=0.3908 critic_loss=159694173070.2222 entropy=17.5277 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 22900] reward=-116725557.6 actor_loss=0.2596 critic_loss=151794881565.2571 entropy=17.5143 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 22900] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-316938.1 mean_steps=17.8
|
|
[Episode 22910] reward=-117877820.0 actor_loss=0.3098 critic_loss=155510829371.0769 entropy=17.5180 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 22920] reward=-120517865.7 actor_loss=0.2157 critic_loss=164109671876.4651 entropy=17.5121 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 22920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463555.1 mean_steps=15.7
|
|
[Episode 22930] reward=-122926787.6 actor_loss=0.3622 critic_loss=167009657939.0270 entropy=17.5022 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 22940] reward=-119765649.9 actor_loss=0.3413 critic_loss=157512711134.9677 entropy=17.4973 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 22940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456711.1 mean_steps=14.8
|
|
[Episode 22950] reward=-118946226.6 actor_loss=0.3417 critic_loss=155128117036.1379 entropy=17.5027 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 22960] reward=-118307901.1 actor_loss=0.2870 critic_loss=155995420171.3778 entropy=17.5010 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 22960] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-238718.3 mean_steps=17.3
|
|
[Episode 22970] reward=-118192432.8 actor_loss=0.2867 critic_loss=151326838784.0000 entropy=17.5055 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 22980] reward=-114920339.0 actor_loss=0.2555 critic_loss=162960742649.7561 entropy=17.4975 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 22980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444952.1 mean_steps=15.7
|
|
[Episode 22990] reward=-118277390.7 actor_loss=0.2503 critic_loss=152586031104.0000 entropy=17.4967 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 23000] reward=-119567990.9 actor_loss=0.1943 critic_loss=152016061629.6296 entropy=17.4940 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 23000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586916.7 mean_steps=12.4
|
|
[Episode 23010] reward=-118881068.6 actor_loss=0.3507 critic_loss=153629621101.7143 entropy=17.4969 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 23020] reward=-122047179.1 actor_loss=0.3368 critic_loss=160481974539.1304 entropy=17.4963 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 23020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574970.6 mean_steps=12.8
|
|
[Episode 23030] reward=-122370286.4 actor_loss=0.2326 critic_loss=161643468273.3714 entropy=17.5057 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 23040] reward=-113900772.4 actor_loss=0.2943 critic_loss=149699444371.9111 entropy=17.5027 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 23040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517106.4 mean_steps=14.5
|
|
[Episode 23050] reward=-122721059.0 actor_loss=0.1824 critic_loss=163534947793.4546 entropy=17.5177 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 23060] reward=-119941204.3 actor_loss=0.1431 critic_loss=162667312128.0000 entropy=17.5087 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 23060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-506297.2 mean_steps=13.6
|
|
[Episode 23070] reward=-121416915.8 actor_loss=0.2079 critic_loss=160727836113.4546 entropy=17.5105 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 23080] reward=-122211678.9 actor_loss=0.2119 critic_loss=163000592856.6154 entropy=17.5045 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 23080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562884.0 mean_steps=12.8
|
|
[Episode 23090] reward=-113871970.9 actor_loss=0.3759 critic_loss=148738544981.3333 entropy=17.4869 approx_kl=0.0034 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 23100] reward=-116166731.7 actor_loss=0.2800 critic_loss=154368262144.0000 entropy=17.4925 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 23100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-481352.1 mean_steps=13.1
|
|
[Episode 23110] reward=-112240646.2 actor_loss=0.2643 critic_loss=144717497463.0698 entropy=17.4897 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 23120] reward=-112735230.5 actor_loss=0.3374 critic_loss=147643717677.5111 entropy=17.5024 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 23120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410350.5 mean_steps=16.9
|
|
[Episode 23130] reward=-118019572.4 actor_loss=0.2772 critic_loss=156642669727.2889 entropy=17.5092 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 23140] reward=-116550402.8 actor_loss=0.2550 critic_loss=153857934622.7200 entropy=17.5137 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 23140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526208.0 mean_steps=13.5
|
|
[Episode 23150] reward=-118343297.5 actor_loss=0.3158 critic_loss=157469890048.0000 entropy=17.5176 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 23160] reward=-119647009.7 actor_loss=0.3130 critic_loss=161386113706.6667 entropy=17.5017 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 23160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521512.6 mean_steps=13.6
|
|
[Episode 23170] reward=-109432292.8 actor_loss=0.3878 critic_loss=142512161314.1333 entropy=17.4840 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 23180] reward=-117200135.5 actor_loss=0.2649 critic_loss=154329704220.4445 entropy=17.4754 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 23180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-358128.9 mean_steps=15.8
|
|
[Episode 23190] reward=-119129275.0 actor_loss=0.2845 critic_loss=156130222080.0000 entropy=17.4625 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 23200] reward=-111787911.0 actor_loss=0.3026 critic_loss=145016803643.0769 entropy=17.4475 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 23200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487037.7 mean_steps=13.8
|
|
[Episode 23210] reward=-121195379.1 actor_loss=0.2741 critic_loss=157876272311.7949 entropy=17.4476 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 23220] reward=-114743440.4 actor_loss=0.3529 critic_loss=155869330242.3704 entropy=17.4389 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 23220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574646.8 mean_steps=12.7
|
|
[Episode 23230] reward=-121360300.8 actor_loss=0.2794 critic_loss=157284357334.3256 entropy=17.4310 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 23240] reward=-113698196.4 actor_loss=0.3040 critic_loss=146636449450.6667 entropy=17.4190 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 23240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492571.4 mean_steps=14.9
|
|
[Episode 23250] reward=-117934755.1 actor_loss=0.2724 critic_loss=152665464285.8667 entropy=17.4176 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 23260] reward=-119521151.8 actor_loss=0.3306 critic_loss=158746074453.3333 entropy=17.4384 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 23260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475597.2 mean_steps=14.2
|
|
[Episode 23270] reward=-116530239.6 actor_loss=0.3385 critic_loss=155634210861.5111 entropy=17.4318 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 23280] reward=-117844150.7 actor_loss=0.1801 critic_loss=156750982758.4000 entropy=17.4478 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 23280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599253.2 mean_steps=12.8
|
|
[Episode 23290] reward=-118956777.5 actor_loss=0.3721 critic_loss=157111858517.3333 entropy=17.4454 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 23300] reward=-119003829.6 actor_loss=0.2152 critic_loss=156473738308.2667 entropy=17.4488 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 23300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-608367.1 mean_steps=12.2
|
|
[Episode 23310] reward=-114743875.5 actor_loss=0.3814 critic_loss=153742881923.2820 entropy=17.4659 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 23320] reward=-118170010.1 actor_loss=0.3297 critic_loss=158923557213.6585 entropy=17.4790 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 23320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-370290.0 mean_steps=15.6
|
|
[Episode 23330] reward=-115890562.8 actor_loss=0.3597 critic_loss=149970760704.0000 entropy=17.4816 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 23340] reward=-121151625.3 actor_loss=0.3765 critic_loss=163895569612.8000 entropy=17.5022 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 23340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478215.6 mean_steps=14.8
|
|
[Episode 23350] reward=-119604688.3 actor_loss=0.3590 critic_loss=161356853096.2963 entropy=17.5023 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 23360] reward=-118047184.1 actor_loss=0.3164 critic_loss=156859430502.4000 entropy=17.4947 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 23360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483576.2 mean_steps=14.9
|
|
[Episode 23370] reward=-118781993.3 actor_loss=0.2658 critic_loss=153972569819.4286 entropy=17.4860 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 23380] reward=-120021958.0 actor_loss=0.2304 critic_loss=159843671799.7419 entropy=17.4889 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 23380] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-662529.9 mean_steps=12.0
|
|
[Episode 23390] reward=-120291906.6 actor_loss=0.2784 critic_loss=154832763835.7333 entropy=17.4939 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 23400] reward=-125970284.2 actor_loss=0.2093 critic_loss=170060320061.7931 entropy=17.5189 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 23400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468539.4 mean_steps=14.7
|
|
[Episode 23410] reward=-114922441.7 actor_loss=0.2836 critic_loss=151349539635.2000 entropy=17.5417 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 23420] reward=-117662028.2 actor_loss=0.3005 critic_loss=159439148646.4000 entropy=17.5423 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 23420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425968.5 mean_steps=14.4
|
|
[Episode 23430] reward=-122272803.9 actor_loss=0.2602 critic_loss=168146213741.7143 entropy=17.5358 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 23440] reward=-118813117.8 actor_loss=0.2742 critic_loss=162695508650.6667 entropy=17.5138 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 23440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485825.7 mean_steps=14.0
|
|
[Episode 23450] reward=-119686512.9 actor_loss=0.1893 critic_loss=155757384681.2444 entropy=17.5083 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 23460] reward=-117390233.4 actor_loss=0.2754 critic_loss=153978776598.7556 entropy=17.5237 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 23460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509733.3 mean_steps=12.9
|
|
[Episode 23470] reward=-119824167.8 actor_loss=0.3385 critic_loss=160081365835.2941 entropy=17.5218 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 23480] reward=-117979620.7 actor_loss=0.3235 critic_loss=156977780417.4222 entropy=17.5347 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 23480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547742.4 mean_steps=14.4
|
|
[Episode 23490] reward=-124188334.6 actor_loss=0.2664 critic_loss=160555942980.2667 entropy=17.5364 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 23500] reward=-119703559.3 actor_loss=0.2778 critic_loss=154404167680.0000 entropy=17.5352 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 23500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469588.5 mean_steps=15.1
|
|
[Episode 23510] reward=-121635840.8 actor_loss=0.3524 critic_loss=168237611235.5555 entropy=17.5290 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 23520] reward=-119970647.5 actor_loss=0.2988 critic_loss=159797110010.3111 entropy=17.5148 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 23520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543769.6 mean_steps=13.3
|
|
[Episode 23530] reward=-116726924.9 actor_loss=0.2812 critic_loss=157257646080.0000 entropy=17.5083 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 23540] reward=-117134580.8 actor_loss=0.2826 critic_loss=159325741627.5349 entropy=17.5023 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 23540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-605143.0 mean_steps=12.9
|
|
[Episode 23550] reward=-119225387.9 actor_loss=0.2536 critic_loss=160824524071.8222 entropy=17.5042 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 23560] reward=-112409411.4 actor_loss=0.4018 critic_loss=148690843511.4667 entropy=17.4882 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 23560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473298.1 mean_steps=13.8
|
|
[Episode 23570] reward=-117749495.6 actor_loss=0.3347 critic_loss=156248238266.1818 entropy=17.4996 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 23580] reward=-116109511.9 actor_loss=0.3683 critic_loss=157046319349.7600 entropy=17.4935 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 23580] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-341100.7 mean_steps=17.5
|
|
[Episode 23590] reward=-121273412.4 actor_loss=0.3111 critic_loss=177468999403.2433 entropy=17.4726 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 23600] reward=-119095411.9 actor_loss=0.2855 critic_loss=173557683293.0909 entropy=17.4584 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 23600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433930.5 mean_steps=14.9
|
|
[Episode 23610] reward=-120324087.1 actor_loss=0.2800 critic_loss=160746870708.1482 entropy=17.4589 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 23620] reward=-120186994.5 actor_loss=0.3605 critic_loss=196047278080.0000 entropy=17.4662 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 23620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547988.4 mean_steps=14.7
|
|
[Episode 23630] reward=-125270169.4 actor_loss=0.3640 critic_loss=179936358563.8400 entropy=17.4548 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 23640] reward=-115283810.5 actor_loss=0.4450 critic_loss=163475200682.6667 entropy=17.4660 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 23640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-527698.1 mean_steps=15.8
|
|
[Episode 23650] reward=-118109635.6 actor_loss=0.3351 critic_loss=151870746038.8571 entropy=17.4577 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 23660] reward=-116720142.8 actor_loss=0.3029 critic_loss=156685866037.8947 entropy=17.4413 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 23660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551053.7 mean_steps=13.3
|
|
[Episode 23670] reward=-118425907.4 actor_loss=0.3505 critic_loss=153302237964.1905 entropy=17.4534 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 23680] reward=-121246044.3 actor_loss=0.3842 critic_loss=164800604842.6667 entropy=17.4555 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 23680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462887.5 mean_steps=13.7
|
|
[Episode 23690] reward=-118274606.8 actor_loss=0.2327 critic_loss=155955501862.7879 entropy=17.4619 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 23700] reward=-121709953.4 actor_loss=0.2431 critic_loss=169914931758.5454 entropy=17.4716 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 23700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-263425.6 mean_steps=17.7
|
|
[Episode 23710] reward=-120348310.2 actor_loss=0.2492 critic_loss=160713734467.3684 entropy=17.4665 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 23720] reward=-117784150.2 actor_loss=0.3145 critic_loss=154481652895.2889 entropy=17.4717 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 23720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550744.9 mean_steps=13.2
|
|
[Episode 23730] reward=-122004198.7 actor_loss=0.2484 critic_loss=162502786291.8095 entropy=17.4853 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 23740] reward=-121250385.0 actor_loss=0.2305 critic_loss=159326301835.6364 entropy=17.4974 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 23740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574704.3 mean_steps=13.6
|
|
[Episode 23750] reward=-114070639.0 actor_loss=0.3810 critic_loss=151310429835.6364 entropy=17.5034 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 23760] reward=-120206031.4 actor_loss=0.3266 critic_loss=161675639011.5555 entropy=17.4985 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 23760] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-309940.0 mean_steps=18.2
|
|
[Episode 23770] reward=-114686529.0 actor_loss=0.3144 critic_loss=149127722077.0909 entropy=17.4985 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 23780] reward=-117585363.5 actor_loss=0.4119 critic_loss=160531564134.4000 entropy=17.5156 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 23780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397185.1 mean_steps=15.4
|
|
[Episode 23790] reward=-124796452.3 actor_loss=0.2557 critic_loss=169359392256.0000 entropy=17.5003 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 23800] reward=-119036596.9 actor_loss=0.2889 critic_loss=159872404257.3913 entropy=17.4917 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 23800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518329.0 mean_steps=15.1
|
|
[Episode 23810] reward=-124830610.9 actor_loss=0.2564 critic_loss=164443842150.4000 entropy=17.4975 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 23820] reward=-117048865.9 actor_loss=0.2782 critic_loss=150113787576.3200 entropy=17.5003 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 23820] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-666042.6 mean_steps=11.5
|
|
[Episode 23830] reward=-124478724.6 actor_loss=0.1859 critic_loss=168221166933.3333 entropy=17.4994 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 23840] reward=-122290756.4 actor_loss=0.2638 critic_loss=159208423424.0000 entropy=17.4994 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 23840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469722.5 mean_steps=14.1
|
|
[Episode 23850] reward=-116606401.1 actor_loss=0.2553 critic_loss=158983048819.6129 entropy=17.5062 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 23860] reward=-119819385.7 actor_loss=0.3287 critic_loss=160338355497.2903 entropy=17.5022 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 23860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-617216.3 mean_steps=13.9
|
|
[Episode 23870] reward=-114802867.4 actor_loss=0.3593 critic_loss=156000232745.2903 entropy=17.5151 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 23880] reward=-122205086.0 actor_loss=0.3436 critic_loss=164186360490.6667 entropy=17.5229 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 23880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496657.4 mean_steps=14.1
|
|
[Episode 23890] reward=-121086439.2 actor_loss=0.2714 critic_loss=177039182506.6667 entropy=17.5262 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 23900] reward=-122810038.3 actor_loss=0.2422 critic_loss=166581559777.8824 entropy=17.5061 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 23900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429071.6 mean_steps=15.6
|
|
[Episode 23910] reward=-111950776.9 actor_loss=0.2862 critic_loss=150285330139.4286 entropy=17.5133 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 23920] reward=-109514442.4 actor_loss=0.4325 critic_loss=141544382281.9556 entropy=17.5154 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 23920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548698.2 mean_steps=14.2
|
|
[Episode 23930] reward=-123511023.6 actor_loss=0.2223 critic_loss=162725675372.0889 entropy=17.5216 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 23940] reward=-119868650.7 actor_loss=0.2136 critic_loss=166242979748.9778 entropy=17.5079 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 23940] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-384205.5 mean_steps=17.2
|
|
[Episode 23950] reward=-118506335.0 actor_loss=0.2956 critic_loss=153965409426.2857 entropy=17.5247 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 23960] reward=-117896978.0 actor_loss=0.2248 critic_loss=155446319261.5385 entropy=17.5018 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 23960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545363.4 mean_steps=12.7
|
|
[Episode 23970] reward=-111113199.3 actor_loss=0.3711 critic_loss=147444019456.0000 entropy=17.5164 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 23980] reward=-121767579.7 actor_loss=0.2752 critic_loss=171557929672.3478 entropy=17.5275 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 23980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-610101.6 mean_steps=12.1
|
|
[Episode 23990] reward=-118273825.6 actor_loss=0.2245 critic_loss=158324340443.4286 entropy=17.5309 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 24000] reward=-120137643.1 actor_loss=0.3555 critic_loss=192190061992.5854 entropy=17.5367 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 24000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-577984.2 mean_steps=14.7
|
|
[Episode 24010] reward=-120462692.9 actor_loss=0.3252 critic_loss=182135820503.5789 entropy=17.5362 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 24020] reward=-116641966.8 actor_loss=0.3364 critic_loss=155790749286.4000 entropy=17.5352 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 24020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468150.3 mean_steps=14.9
|
|
[Episode 24030] reward=-119441810.6 actor_loss=0.3194 critic_loss=158239872705.4222 entropy=17.5348 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 24040] reward=-117224877.6 actor_loss=0.2635 critic_loss=153364089514.6667 entropy=17.5276 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 24040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626203.2 mean_steps=13.7
|
|
[Episode 24050] reward=-116396294.5 actor_loss=0.3775 critic_loss=155219188314.3529 entropy=17.5345 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 24060] reward=-119594895.2 actor_loss=0.3080 critic_loss=159996334592.0000 entropy=17.5399 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 24060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553078.1 mean_steps=13.3
|
|
[Episode 24070] reward=-119941977.8 actor_loss=0.3265 critic_loss=155116647947.3778 entropy=17.5261 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 24080] reward=-119362990.3 actor_loss=0.2573 critic_loss=158512433652.6222 entropy=17.5213 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 24080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415608.5 mean_steps=16.8
|
|
[Episode 24090] reward=-117869051.3 actor_loss=0.4161 critic_loss=151154666882.8445 entropy=17.5290 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 24100] reward=-119554107.6 actor_loss=0.2546 critic_loss=156669974937.6000 entropy=17.5490 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517555.4 mean_steps=15.7
|
|
[Episode 24110] reward=-113606594.1 actor_loss=0.3292 critic_loss=152036226463.1351 entropy=17.5516 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 24120] reward=-123084069.0 actor_loss=0.1987 critic_loss=161234675513.8065 entropy=17.5617 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 24120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508551.9 mean_steps=14.1
|
|
[Episode 24130] reward=-114960600.8 actor_loss=0.3109 critic_loss=145195460096.0000 entropy=17.5588 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 24140] reward=-119584899.1 actor_loss=0.2809 critic_loss=155969352386.2069 entropy=17.5736 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 24140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-386075.1 mean_steps=17.2
|
|
[Episode 24150] reward=-115510457.8 actor_loss=0.3486 critic_loss=149866212693.3333 entropy=17.5645 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 24160] reward=-116701957.8 actor_loss=0.3447 critic_loss=157252796416.0000 entropy=17.5444 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-476127.4 mean_steps=15.9
|
|
[Episode 24170] reward=-124918073.9 actor_loss=0.3605 critic_loss=170204864229.5172 entropy=17.5624 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 24180] reward=-117162746.3 actor_loss=0.3605 critic_loss=156828802486.8571 entropy=17.5612 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 24180] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-262491.8 mean_steps=17.1
|
|
[Episode 24190] reward=-120394474.4 actor_loss=0.3030 critic_loss=161832481414.7368 entropy=17.5804 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 24200] reward=-116631761.8 actor_loss=0.3202 critic_loss=154265727067.0222 entropy=17.5698 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 24200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515644.8 mean_steps=14.2
|
|
[Episode 24210] reward=-119236035.6 actor_loss=0.3634 critic_loss=160114308437.3333 entropy=17.5709 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 24220] reward=-123098966.9 actor_loss=0.2375 critic_loss=165176025998.2222 entropy=17.5775 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 24220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-529727.8 mean_steps=15.2
|
|
[Episode 24230] reward=-112906167.3 actor_loss=0.4273 critic_loss=149533926636.3077 entropy=17.5683 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 24240] reward=-120367011.3 actor_loss=0.3327 critic_loss=158710022144.0000 entropy=17.5740 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 24240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502837.2 mean_steps=13.5
|
|
[Episode 24250] reward=-118954787.2 actor_loss=0.2272 critic_loss=153938395136.0000 entropy=17.5771 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 24260] reward=-118672949.1 actor_loss=0.3883 critic_loss=157756889497.6000 entropy=17.5529 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 24260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497250.8 mean_steps=14.5
|
|
[Episode 24270] reward=-120430594.5 actor_loss=0.2871 critic_loss=155319005424.9412 entropy=17.5593 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 24280] reward=-117765251.7 actor_loss=0.2913 critic_loss=154111971601.0667 entropy=17.5746 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 24280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542973.4 mean_steps=13.6
|
|
[Episode 24290] reward=-121308379.1 actor_loss=0.2758 critic_loss=166731645690.0465 entropy=17.5642 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 24300] reward=-120563128.2 actor_loss=0.1684 critic_loss=164063908598.5185 entropy=17.5610 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 24300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546915.1 mean_steps=14.3
|
|
[Episode 24310] reward=-121338898.2 actor_loss=0.3882 critic_loss=164523535892.4800 entropy=17.5693 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 24320] reward=-121022803.5 actor_loss=0.3204 critic_loss=161376775606.8571 entropy=17.5889 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453903.1 mean_steps=15.5
|
|
[Episode 24330] reward=-114406007.9 actor_loss=0.3910 critic_loss=155556435285.3333 entropy=17.5843 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 24340] reward=-112686488.3 actor_loss=0.4218 critic_loss=148327018222.9333 entropy=17.5886 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 24340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485176.6 mean_steps=14.1
|
|
[Episode 24350] reward=-118936513.5 actor_loss=0.1686 critic_loss=154800929359.6444 entropy=17.5721 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 24360] reward=-116721289.5 actor_loss=0.3047 critic_loss=150513579101.0909 entropy=17.5680 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 24360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447210.9 mean_steps=15.1
|
|
[Episode 24370] reward=-120120231.5 actor_loss=0.3113 critic_loss=161020377575.6190 entropy=17.5816 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 24380] reward=-114406276.6 actor_loss=0.3450 critic_loss=150119550884.9778 entropy=17.5922 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 24380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529964.1 mean_steps=13.6
|
|
[Episode 24390] reward=-117094445.5 actor_loss=0.3747 critic_loss=162691779546.0741 entropy=17.5803 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 24400] reward=-125740075.9 actor_loss=0.1194 critic_loss=165740536320.0000 entropy=17.5876 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 24400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413848.6 mean_steps=16.6
|
|
[Episode 24410] reward=-120990684.2 actor_loss=0.2123 critic_loss=157191163904.0000 entropy=17.5726 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 24420] reward=-118930204.3 actor_loss=0.3570 critic_loss=158823704478.4762 entropy=17.5774 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 24420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-507327.3 mean_steps=15.8
|
|
[Episode 24430] reward=-118863794.0 actor_loss=0.1600 critic_loss=156507126637.7143 entropy=17.5746 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 24440] reward=-122689549.4 actor_loss=0.2597 critic_loss=160012794760.9302 entropy=17.5834 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427398.8 mean_steps=15.3
|
|
[Episode 24450] reward=-119644480.2 actor_loss=0.2194 critic_loss=159118364945.0667 entropy=17.5858 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 24460] reward=-116126681.0 actor_loss=0.2377 critic_loss=152607076807.1111 entropy=17.5751 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 24460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457478.5 mean_steps=14.1
|
|
[Episode 24470] reward=-115822981.8 actor_loss=0.3096 critic_loss=157378605147.0222 entropy=17.5925 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 24480] reward=-114734596.0 actor_loss=0.2121 critic_loss=156910954682.1818 entropy=17.5919 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 24480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-428010.5 mean_steps=16.2
|
|
[Episode 24490] reward=-118176281.2 actor_loss=0.2747 critic_loss=154765632821.5814 entropy=17.6099 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 24500] reward=-113551190.2 actor_loss=0.3304 critic_loss=148380777130.6667 entropy=17.6022 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 24500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425271.0 mean_steps=15.8
|
|
[Episode 24510] reward=-118562369.8 actor_loss=0.2670 critic_loss=152285321216.0000 entropy=17.5950 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 24520] reward=-122554747.3 actor_loss=0.2924 critic_loss=159991633474.7826 entropy=17.6067 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 24520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500144.3 mean_steps=15.2
|
|
[Episode 24530] reward=-120775832.1 actor_loss=0.2932 critic_loss=162593137095.1111 entropy=17.6198 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 24540] reward=-121692668.1 actor_loss=0.2471 critic_loss=164820268694.5882 entropy=17.6244 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 24540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-658401.4 mean_steps=12.4
|
|
[Episode 24550] reward=-122162366.5 actor_loss=0.1893 critic_loss=159012083757.5111 entropy=17.6259 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 24560] reward=-120050808.7 actor_loss=0.2775 critic_loss=159911996451.3103 entropy=17.6330 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 24560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413258.6 mean_steps=15.2
|
|
[Episode 24570] reward=-118847009.0 actor_loss=0.3331 critic_loss=157699884100.2667 entropy=17.6156 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 24580] reward=-121665866.9 actor_loss=0.2672 critic_loss=159729653077.3333 entropy=17.6080 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-343389.1 mean_steps=16.6
|
|
[Episode 24590] reward=-119934326.7 actor_loss=0.2270 critic_loss=160902981586.4889 entropy=17.5997 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 24600] reward=-120750341.0 actor_loss=0.2577 critic_loss=156823674631.7576 entropy=17.5939 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 24600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-438648.6 mean_steps=16.6
|
|
[Episode 24610] reward=-117321629.3 actor_loss=0.4398 critic_loss=159268524889.3023 entropy=17.5940 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 24620] reward=-120849932.0 actor_loss=0.2648 critic_loss=157950523255.4667 entropy=17.5911 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 24620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532180.6 mean_steps=13.3
|
|
[Episode 24630] reward=-115910795.4 actor_loss=0.2890 critic_loss=149512479703.0400 entropy=17.5987 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 24640] reward=-119861314.4 actor_loss=0.3164 critic_loss=163035508736.0000 entropy=17.6038 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 24640] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-675400.9 mean_steps=11.4
|
|
[Episode 24650] reward=-120895859.8 actor_loss=0.2988 critic_loss=155631202152.2963 entropy=17.5875 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 24660] reward=-116674140.8 actor_loss=0.3148 critic_loss=149287023616.0000 entropy=17.5871 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 24660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456579.3 mean_steps=14.9
|
|
[Episode 24670] reward=-119793366.1 actor_loss=0.2616 critic_loss=164150504834.8445 entropy=17.5883 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 24680] reward=-117579918.0 actor_loss=0.3386 critic_loss=151898190530.2069 entropy=17.5913 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 24680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474354.8 mean_steps=14.8
|
|
[Episode 24690] reward=-118644863.5 actor_loss=0.2968 critic_loss=160010993664.0000 entropy=17.5774 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 24700] reward=-118303070.5 actor_loss=0.2858 critic_loss=155145999337.2444 entropy=17.5683 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 24700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-447520.4 mean_steps=16.9
|
|
[Episode 24710] reward=-122211274.3 actor_loss=0.2388 critic_loss=162798682112.0000 entropy=17.5672 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 24720] reward=-121405677.7 actor_loss=0.3552 critic_loss=193151899693.5111 entropy=17.5630 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 24720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375097.5 mean_steps=15.8
|
|
[Episode 24730] reward=-121028603.3 actor_loss=0.2507 critic_loss=158835140853.7600 entropy=17.5708 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 24740] reward=-116221104.6 actor_loss=0.4661 critic_loss=152224251221.3333 entropy=17.5586 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 24740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360777.6 mean_steps=16.2
|
|
[Episode 24750] reward=-119632513.0 actor_loss=0.3225 critic_loss=156529265019.2592 entropy=17.5635 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 24760] reward=-120298540.4 actor_loss=0.1835 critic_loss=158493311606.1538 entropy=17.5608 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 24760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474810.8 mean_steps=14.8
|
|
[Episode 24770] reward=-119699650.0 actor_loss=0.3536 critic_loss=160808444152.2424 entropy=17.5483 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 24780] reward=-119695607.7 actor_loss=0.2474 critic_loss=156816503490.2069 entropy=17.5588 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 24780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565986.6 mean_steps=13.8
|
|
[Episode 24790] reward=-115293094.4 actor_loss=0.3391 critic_loss=151699483096.6154 entropy=17.5591 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 24800] reward=-121120545.7 actor_loss=0.3175 critic_loss=157175164436.4800 entropy=17.5655 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 24800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-600837.3 mean_steps=12.1
|
|
[Episode 24810] reward=-119346990.5 actor_loss=0.2476 critic_loss=150224235490.7429 entropy=17.5961 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 24820] reward=-122638098.0 actor_loss=0.2622 critic_loss=160331209339.5862 entropy=17.6022 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 24820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-410242.8 mean_steps=17.0
|
|
[Episode 24830] reward=-125272242.3 actor_loss=0.1965 critic_loss=165778336426.6667 entropy=17.5864 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 24840] reward=-119714814.8 actor_loss=0.3016 critic_loss=157659229317.5652 entropy=17.5920 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 24840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397831.2 mean_steps=15.4
|
|
[Episode 24850] reward=-120974402.8 actor_loss=0.3288 critic_loss=160938817859.3684 entropy=17.5835 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 24860] reward=-121662866.3 actor_loss=0.2489 critic_loss=164211016499.2000 entropy=17.5790 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 24860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361432.6 mean_steps=17.4
|
|
[Episode 24870] reward=-126748560.8 actor_loss=0.2195 critic_loss=163351021080.3810 entropy=17.5788 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 24880] reward=-121789297.1 actor_loss=0.2566 critic_loss=161020479624.5333 entropy=17.5866 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 24880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416619.0 mean_steps=15.8
|
|
[Episode 24890] reward=-120185929.8 actor_loss=0.2596 critic_loss=154784623274.6667 entropy=17.5851 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 24900] reward=-124052168.4 actor_loss=0.2725 critic_loss=163622728797.0909 entropy=17.5864 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 24900] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-758019.0 mean_steps=10.0
|
|
[Episode 24910] reward=-124675629.7 actor_loss=0.2548 critic_loss=165384522020.5714 entropy=17.5742 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 24920] reward=-119514412.5 actor_loss=0.2487 critic_loss=158962358998.7097 entropy=17.5805 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 24920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-424590.3 mean_steps=16.4
|
|
[Episode 24930] reward=-120281717.2 actor_loss=0.2933 critic_loss=157391825942.7556 entropy=17.5677 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 24940] reward=-119476636.9 actor_loss=0.2867 critic_loss=152918645800.9600 entropy=17.5657 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 24940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568099.9 mean_steps=13.7
|
|
[Episode 24950] reward=-121230230.9 actor_loss=0.3073 critic_loss=164646367547.0769 entropy=17.5636 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 24960] reward=-114166272.9 actor_loss=0.3679 critic_loss=150411751318.9744 entropy=17.5399 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 24960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481895.0 mean_steps=15.1
|
|
[Episode 24970] reward=-115983002.2 actor_loss=0.2916 critic_loss=152378856945.3714 entropy=17.5214 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 24980] reward=-115855586.1 actor_loss=0.3563 critic_loss=154379872665.6000 entropy=17.5187 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 24980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485920.6 mean_steps=14.2
|
|
[Episode 24990] reward=-120077248.4 actor_loss=0.2976 critic_loss=166262409323.7895 entropy=17.5232 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 25000] reward=-121636717.8 actor_loss=0.3804 critic_loss=158922832164.5714 entropy=17.5208 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 25000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493475.1 mean_steps=15.2
|
|
[Episode 25010] reward=-119133028.4 actor_loss=0.3520 critic_loss=154725080905.9556 entropy=17.5144 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 25020] reward=-119902346.6 actor_loss=0.2928 critic_loss=173372108526.9333 entropy=17.5056 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 25020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-306870.5 mean_steps=16.8
|
|
[Episode 25030] reward=-122757039.4 actor_loss=0.2681 critic_loss=158148748585.2903 entropy=17.5045 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25040] reward=-113866918.9 actor_loss=0.2884 critic_loss=146737667739.8261 entropy=17.4964 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506459.6 mean_steps=14.4
|
|
[Episode 25050] reward=-118014720.3 actor_loss=0.2032 critic_loss=153046336000.0000 entropy=17.4969 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 25060] reward=-121107531.5 actor_loss=0.3067 critic_loss=158933046325.8947 entropy=17.4891 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 25060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443314.3 mean_steps=15.4
|
|
[Episode 25070] reward=-117440666.7 actor_loss=0.3337 critic_loss=153748309392.6956 entropy=17.4781 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25080] reward=-118432994.0 actor_loss=0.3335 critic_loss=158827544576.0000 entropy=17.4866 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 25080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428602.5 mean_steps=15.0
|
|
[Episode 25090] reward=-121637447.3 actor_loss=0.2597 critic_loss=156238760025.0435 entropy=17.4934 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 25100] reward=-124299770.7 actor_loss=0.2393 critic_loss=166928595899.7333 entropy=17.4856 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 25100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417188.5 mean_steps=15.3
|
|
[Episode 25110] reward=-113751309.5 actor_loss=0.4024 critic_loss=151415659633.7778 entropy=17.4742 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 25120] reward=-121412322.1 actor_loss=0.3111 critic_loss=166423872658.2857 entropy=17.4808 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 25120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532429.0 mean_steps=14.4
|
|
[Episode 25130] reward=-112757763.3 actor_loss=0.2742 critic_loss=146744883791.6444 entropy=17.4799 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 25140] reward=-118425107.6 actor_loss=0.2727 critic_loss=154988800682.6667 entropy=17.4825 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-526163.1 mean_steps=14.3
|
|
[Episode 25150] reward=-117430349.1 actor_loss=0.3355 critic_loss=150076939013.6889 entropy=17.4993 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 25160] reward=-118696903.1 actor_loss=0.2648 critic_loss=155550061621.8947 entropy=17.4850 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-497567.0 mean_steps=13.3
|
|
[Episode 25170] reward=-116990326.6 actor_loss=0.3737 critic_loss=156530440005.8182 entropy=17.4992 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 25180] reward=-116985192.9 actor_loss=0.2417 critic_loss=148958733653.3333 entropy=17.5080 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-487760.9 mean_steps=15.6
|
|
[Episode 25190] reward=-121050307.7 actor_loss=0.3219 critic_loss=154695931828.1482 entropy=17.5221 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 25200] reward=-121164734.5 actor_loss=0.3829 critic_loss=162883133767.6800 entropy=17.5208 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 25200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539657.0 mean_steps=14.4
|
|
[Episode 25210] reward=-119745973.1 actor_loss=0.1826 critic_loss=151743735053.4737 entropy=17.5287 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 25220] reward=-124633490.1 actor_loss=0.2134 critic_loss=168321406645.6774 entropy=17.5250 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 25220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-393851.9 mean_steps=15.2
|
|
[Episode 25230] reward=-119888933.7 actor_loss=0.3372 critic_loss=156246723530.1053 entropy=17.5208 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25240] reward=-120873934.5 actor_loss=0.3303 critic_loss=159612514838.2609 entropy=17.5140 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 25240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502729.2 mean_steps=15.2
|
|
[Episode 25250] reward=-123614207.8 actor_loss=0.3361 critic_loss=161240286276.2667 entropy=17.4994 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 25260] reward=-114711602.8 actor_loss=0.2874 critic_loss=146399674864.4849 entropy=17.5013 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 25260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409234.4 mean_steps=15.2
|
|
[Episode 25270] reward=-118380855.9 actor_loss=0.3273 critic_loss=174622989516.8000 entropy=17.5057 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 25280] reward=-122650624.8 actor_loss=0.2254 critic_loss=166651749096.7273 entropy=17.4983 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 25280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-342475.6 mean_steps=16.9
|
|
[Episode 25290] reward=-120574972.0 actor_loss=0.3692 critic_loss=167393442977.6842 entropy=17.5132 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 25300] reward=-118073882.5 actor_loss=0.3724 critic_loss=156936574530.7826 entropy=17.5131 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 25300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476283.5 mean_steps=15.2
|
|
[Episode 25310] reward=-119207890.5 actor_loss=0.4118 critic_loss=166764778460.6897 entropy=17.5090 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 25320] reward=-115752432.1 actor_loss=0.4122 critic_loss=151912255324.1600 entropy=17.5134 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 25320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-608505.5 mean_steps=13.7
|
|
[Episode 25330] reward=-118900201.9 actor_loss=0.2531 critic_loss=157028941824.0000 entropy=17.5121 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 25340] reward=-115527934.9 actor_loss=0.3915 critic_loss=152890800038.9565 entropy=17.5108 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 25340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-427788.0 mean_steps=15.4
|
|
[Episode 25350] reward=-120359898.5 actor_loss=0.1739 critic_loss=160316607083.1628 entropy=17.5147 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 25360] reward=-117365277.8 actor_loss=0.3216 critic_loss=149098823248.8421 entropy=17.5227 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 25360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-606211.7 mean_steps=14.0
|
|
[Episode 25370] reward=-122780672.0 actor_loss=0.1736 critic_loss=155514985006.5454 entropy=17.5274 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 25380] reward=-118180356.6 actor_loss=0.3232 critic_loss=152668837361.3714 entropy=17.5213 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 25380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444468.4 mean_steps=15.5
|
|
[Episode 25390] reward=-117439135.9 actor_loss=0.2510 critic_loss=153184345784.3200 entropy=17.5329 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 25400] reward=-118152687.0 actor_loss=0.3290 critic_loss=156886936234.6667 entropy=17.5439 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 25400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539629.3 mean_steps=13.3
|
|
[Episode 25410] reward=-117220187.0 actor_loss=0.1687 critic_loss=152972610638.7692 entropy=17.5393 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 25420] reward=-125811581.9 actor_loss=0.1986 critic_loss=163342324203.5200 entropy=17.5465 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 25420] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-348066.6 mean_steps=18.0
|
|
[Episode 25430] reward=-121069640.7 actor_loss=0.3202 critic_loss=158869476903.3846 entropy=17.5704 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 25440] reward=-119153578.2 actor_loss=0.3064 critic_loss=158085172302.7692 entropy=17.5863 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-411760.5 mean_steps=16.5
|
|
[Episode 25450] reward=-120347629.4 actor_loss=0.1542 critic_loss=164790777976.4706 entropy=17.5830 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 25460] reward=-120162817.0 actor_loss=0.2654 critic_loss=162527146299.0769 entropy=17.5765 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 25460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561039.5 mean_steps=13.7
|
|
[Episode 25470] reward=-124935603.1 actor_loss=0.2413 critic_loss=162068652942.2222 entropy=17.5986 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 25480] reward=-117670092.0 actor_loss=0.3702 critic_loss=163925568418.9091 entropy=17.5876 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 25480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-570624.8 mean_steps=12.7
|
|
[Episode 25490] reward=-122896314.2 actor_loss=0.2852 critic_loss=160386456616.9600 entropy=17.5845 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 25500] reward=-123838604.6 actor_loss=0.1856 critic_loss=162065824593.1707 entropy=17.5865 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 25500] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-370410.7 mean_steps=17.1
|
|
[Episode 25510] reward=-121082883.9 actor_loss=0.4157 critic_loss=158382291431.6190 entropy=17.5875 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 25520] reward=-124196524.4 actor_loss=0.3181 critic_loss=165573411840.0000 entropy=17.5874 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 25520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463835.5 mean_steps=15.7
|
|
[Episode 25530] reward=-123052647.9 actor_loss=0.2549 critic_loss=158514095718.4000 entropy=17.5940 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 25540] reward=-121633146.1 actor_loss=0.2401 critic_loss=157291884784.9412 entropy=17.5978 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 25540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-330265.7 mean_steps=15.8
|
|
[Episode 25550] reward=-120740314.6 actor_loss=0.2206 critic_loss=158762627072.0000 entropy=17.5996 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 25560] reward=-120569014.8 actor_loss=0.2541 critic_loss=157914056380.6316 entropy=17.5996 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 25560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464829.5 mean_steps=15.5
|
|
[Episode 25570] reward=-119893057.8 actor_loss=0.2827 critic_loss=162753348190.8148 entropy=17.5945 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 25580] reward=-126570323.1 actor_loss=0.3076 critic_loss=169413158138.3111 entropy=17.6014 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 25580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504601.9 mean_steps=15.6
|
|
[Episode 25590] reward=-113647721.5 actor_loss=0.3562 critic_loss=146291370302.5778 entropy=17.5860 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 25600] reward=-118169452.0 actor_loss=0.2796 critic_loss=152306989332.7567 entropy=17.5818 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 25600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471997.5 mean_steps=14.1
|
|
[Episode 25610] reward=-117976927.7 actor_loss=0.2322 critic_loss=154762524052.8372 entropy=17.5829 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 25620] reward=-118861613.4 actor_loss=0.1420 critic_loss=154031238299.1515 entropy=17.5935 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Eval 25620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441587.6 mean_steps=14.6
|
|
[Episode 25630] reward=-124058567.2 actor_loss=0.1864 critic_loss=162364224759.1724 entropy=17.5942 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 25640] reward=-119073164.9 actor_loss=0.3710 critic_loss=160510248401.4546 entropy=17.5868 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 25640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457883.0 mean_steps=15.8
|
|
[Episode 25650] reward=-119490137.4 actor_loss=0.2909 critic_loss=157431527833.6000 entropy=17.5892 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25660] reward=-121390360.1 actor_loss=0.2791 critic_loss=156809566406.1935 entropy=17.6032 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 25660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551161.6 mean_steps=12.5
|
|
[Episode 25670] reward=-117590829.3 actor_loss=0.2875 critic_loss=159033011222.7556 entropy=17.6028 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 25680] reward=-121036917.5 actor_loss=0.2510 critic_loss=158270812615.1111 entropy=17.5895 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 25680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-634126.3 mean_steps=12.1
|
|
[Episode 25690] reward=-119429213.2 actor_loss=0.2933 critic_loss=153808238819.5555 entropy=17.5937 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 25700] reward=-115885910.8 actor_loss=0.3685 critic_loss=155239219655.1111 entropy=17.5992 approx_kl=0.0115 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 25700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480846.5 mean_steps=15.0
|
|
[Episode 25710] reward=-120738729.1 actor_loss=0.2630 critic_loss=166427482004.2105 entropy=17.5937 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 25720] reward=-122364073.7 actor_loss=0.1947 critic_loss=158336857247.2889 entropy=17.5938 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 25720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606365.1 mean_steps=12.8
|
|
[Episode 25730] reward=-121280819.8 actor_loss=0.3015 critic_loss=162989040857.2121 entropy=17.5891 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 25740] reward=-121083797.2 actor_loss=0.3018 critic_loss=154957423957.3333 entropy=17.5953 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 25740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505956.3 mean_steps=14.2
|
|
[Episode 25750] reward=-120846851.4 actor_loss=0.2468 critic_loss=160599326288.8421 entropy=17.5992 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 25760] reward=-119558757.3 actor_loss=0.3086 critic_loss=157186634183.1111 entropy=17.5848 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 25760] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-290318.3 mean_steps=17.4
|
|
[Episode 25770] reward=-115277544.3 actor_loss=0.2800 critic_loss=154881037890.7826 entropy=17.5808 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 25780] reward=-114460442.3 actor_loss=0.3212 critic_loss=149089262913.8286 entropy=17.5731 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 25780] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-249905.5 mean_steps=18.3
|
|
[Episode 25790] reward=-122027185.0 actor_loss=0.3099 critic_loss=161316614235.0222 entropy=17.5592 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25800] reward=-121886151.8 actor_loss=0.3658 critic_loss=158050107938.1333 entropy=17.5606 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 25800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441894.4 mean_steps=14.4
|
|
[Episode 25810] reward=-115424228.4 actor_loss=0.3001 critic_loss=149045902367.0303 entropy=17.5526 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 25820] reward=-122649413.1 actor_loss=0.2895 critic_loss=159042905338.3111 entropy=17.5570 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 25820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533401.0 mean_steps=13.2
|
|
[Episode 25830] reward=-118763654.2 actor_loss=0.3188 critic_loss=155583554796.3077 entropy=17.5762 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25840] reward=-118774969.9 actor_loss=0.1551 critic_loss=150741201351.1111 entropy=17.5689 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 25840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437187.9 mean_steps=14.3
|
|
[Episode 25850] reward=-119232783.8 actor_loss=0.3279 critic_loss=159369534857.8462 entropy=17.5733 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 25860] reward=-120723179.6 actor_loss=0.3304 critic_loss=156430943768.3810 entropy=17.5702 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 25860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-381696.2 mean_steps=17.1
|
|
[Episode 25870] reward=-126538895.7 actor_loss=0.2664 critic_loss=175966986240.0000 entropy=17.5687 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 25880] reward=-118294772.3 actor_loss=0.2163 critic_loss=153489428658.0869 entropy=17.5744 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 25880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-605987.3 mean_steps=13.8
|
|
[Episode 25890] reward=-123423824.4 actor_loss=0.2426 critic_loss=166752600064.0000 entropy=17.5825 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 25900] reward=-118236084.4 actor_loss=0.2511 critic_loss=154459655036.7180 entropy=17.5800 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 25900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-544697.4 mean_steps=15.2
|
|
[Episode 25910] reward=-112817665.0 actor_loss=0.1899 critic_loss=149608688142.6286 entropy=17.5609 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 25920] reward=-115613455.2 actor_loss=0.3514 critic_loss=149887953797.1200 entropy=17.5537 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 25920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417626.5 mean_steps=15.7
|
|
[Episode 25930] reward=-120585517.0 actor_loss=0.3148 critic_loss=159039679926.8571 entropy=17.5474 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 25940] reward=-117545573.4 actor_loss=0.3497 critic_loss=154970406001.7778 entropy=17.5405 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 25940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390464.0 mean_steps=15.4
|
|
[Episode 25950] reward=-120139567.0 actor_loss=0.2518 critic_loss=156902008422.4000 entropy=17.5504 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 25960] reward=-114159885.2 actor_loss=0.3841 critic_loss=149863883571.2000 entropy=17.5607 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 25960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523549.5 mean_steps=13.2
|
|
[Episode 25970] reward=-122022321.7 actor_loss=0.2686 critic_loss=156035118211.2820 entropy=17.5741 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 25980] reward=-116633355.7 actor_loss=0.2826 critic_loss=154600609889.5238 entropy=17.5933 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 25980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-379097.6 mean_steps=14.9
|
|
[Episode 25990] reward=-125186598.3 actor_loss=0.2620 critic_loss=165990335757.4737 entropy=17.5939 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 26000] reward=-116743554.0 actor_loss=0.2340 critic_loss=154097521012.3636 entropy=17.5801 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 26000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535779.8 mean_steps=14.4
|
|
[Episode 26010] reward=-119317038.0 actor_loss=0.1845 critic_loss=156625927695.5151 entropy=17.5794 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 26020] reward=-115555050.6 actor_loss=0.3292 critic_loss=150299744376.4706 entropy=17.5738 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 26020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436700.6 mean_steps=14.8
|
|
[Episode 26030] reward=-119119512.0 actor_loss=0.3029 critic_loss=161091936737.8824 entropy=17.5730 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 26040] reward=-126405913.0 actor_loss=0.1770 critic_loss=284730509312.0000 entropy=17.5732 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 26040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448808.3 mean_steps=14.9
|
|
[Episode 26050] reward=-124717526.0 actor_loss=0.3086 critic_loss=275193719193.6000 entropy=17.5769 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 26060] reward=-117719189.2 actor_loss=0.2504 critic_loss=150623383236.9231 entropy=17.5780 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 26060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416437.6 mean_steps=14.4
|
|
[Episode 26070] reward=-122558582.5 actor_loss=0.1233 critic_loss=162591318343.6800 entropy=17.5799 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 26080] reward=-115888285.0 actor_loss=0.4093 critic_loss=152753263616.0000 entropy=17.5884 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 26080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453682.6 mean_steps=14.7
|
|
[Episode 26090] reward=-117825554.4 actor_loss=0.2032 critic_loss=154535873740.8000 entropy=17.5868 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 26100] reward=-114442589.3 actor_loss=0.3319 critic_loss=146799127853.1765 entropy=17.5849 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 26100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-578178.3 mean_steps=11.9
|
|
[Episode 26110] reward=-110744730.8 actor_loss=0.2778 critic_loss=143083277312.0000 entropy=17.5958 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 26120] reward=-116003206.5 actor_loss=0.4162 critic_loss=149194921472.0000 entropy=17.6059 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 26120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489825.1 mean_steps=14.8
|
|
[Episode 26130] reward=-123062444.5 actor_loss=0.2999 critic_loss=160497060942.7692 entropy=17.6213 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 26140] reward=-120943239.0 actor_loss=0.1907 critic_loss=161156237498.1818 entropy=17.6241 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 26140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-453453.0 mean_steps=16.8
|
|
[Episode 26150] reward=-121147810.1 actor_loss=0.2895 critic_loss=160546161900.3077 entropy=17.6236 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 26160] reward=-121467459.3 actor_loss=0.2940 critic_loss=159225661319.5294 entropy=17.6382 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412761.6 mean_steps=16.4
|
|
[Episode 26170] reward=-115132838.5 actor_loss=0.3028 critic_loss=185107452416.0000 entropy=17.6209 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 26180] reward=-110921393.1 actor_loss=0.3448 critic_loss=142653533561.2632 entropy=17.6079 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-560706.0 mean_steps=15.2
|
|
[Episode 26190] reward=-120122811.0 actor_loss=0.3809 critic_loss=154797480742.7879 entropy=17.6121 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 26200] reward=-117431268.8 actor_loss=0.3065 critic_loss=152776964006.9565 entropy=17.6130 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 26200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602418.3 mean_steps=12.8
|
|
[Episode 26210] reward=-114136700.6 actor_loss=0.2789 critic_loss=141999900262.4000 entropy=17.6099 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 26220] reward=-124762908.1 actor_loss=0.2229 critic_loss=160390449561.6000 entropy=17.6110 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 26220] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-678117.9 mean_steps=12.6
|
|
[Episode 26230] reward=-120027684.8 actor_loss=0.4283 critic_loss=161525338020.9778 entropy=17.6244 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 26240] reward=-116307210.0 actor_loss=0.3049 critic_loss=151696160194.5600 entropy=17.6311 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 26240] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-201028.7 mean_steps=18.9
|
|
[Episode 26250] reward=-124384290.7 actor_loss=0.3207 critic_loss=163445878605.9131 entropy=17.6268 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 26260] reward=-119523334.7 actor_loss=0.2643 critic_loss=159519570147.5555 entropy=17.6270 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 26260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445652.1 mean_steps=16.1
|
|
[Episode 26270] reward=-120692367.3 actor_loss=0.2776 critic_loss=160718686374.0540 entropy=17.6383 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 26280] reward=-119221981.0 actor_loss=0.2370 critic_loss=156983731814.4000 entropy=17.6400 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 26280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-377195.8 mean_steps=17.4
|
|
[Episode 26290] reward=-116944606.3 actor_loss=0.2786 critic_loss=156650616989.5385 entropy=17.6515 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 26300] reward=-114066473.1 actor_loss=0.3036 critic_loss=148250738688.0000 entropy=17.6682 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 26300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590270.5 mean_steps=14.1
|
|
[Episode 26310] reward=-119510309.9 actor_loss=0.2562 critic_loss=152839513338.3111 entropy=17.6839 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 26320] reward=-107965789.2 actor_loss=0.4922 critic_loss=142963247962.8387 entropy=17.6682 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 26320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624884.3 mean_steps=13.6
|
|
[Episode 26330] reward=-123107075.1 actor_loss=0.2608 critic_loss=160948663455.2889 entropy=17.6670 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 26340] reward=-117162481.1 actor_loss=0.2684 critic_loss=152088154290.0869 entropy=17.6900 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 26340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-600739.1 mean_steps=12.7
|
|
[Episode 26350] reward=-119819873.6 actor_loss=0.2223 critic_loss=159094371487.2889 entropy=17.6778 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 26360] reward=-114761974.2 actor_loss=0.3150 critic_loss=150360175775.2889 entropy=17.6815 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 26360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548675.1 mean_steps=13.6
|
|
[Episode 26370] reward=-124224947.3 actor_loss=0.3209 critic_loss=166751154080.7442 entropy=17.6749 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 26380] reward=-113185823.0 actor_loss=0.3051 critic_loss=145444724536.1951 entropy=17.6718 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 26380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396335.3 mean_steps=16.4
|
|
[Episode 26390] reward=-111102064.6 actor_loss=0.2364 critic_loss=144026729235.6923 entropy=17.6746 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 26400] reward=-121642898.8 actor_loss=0.3722 critic_loss=160206979072.0000 entropy=17.6674 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 26400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405006.8 mean_steps=16.1
|
|
[Episode 26410] reward=-120185349.3 actor_loss=0.4282 critic_loss=163359855957.3333 entropy=17.6688 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 26420] reward=-116489177.8 actor_loss=0.2954 critic_loss=155210541443.4595 entropy=17.6630 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 26420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475457.0 mean_steps=13.9
|
|
[Episode 26430] reward=-119622952.3 actor_loss=0.3663 critic_loss=155371046288.6956 entropy=17.6547 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 26440] reward=-121998111.5 actor_loss=0.2233 critic_loss=156073855162.1818 entropy=17.6553 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 26440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549101.9 mean_steps=14.2
|
|
[Episode 26450] reward=-116014540.7 actor_loss=0.2665 critic_loss=152918947157.3333 entropy=17.6766 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 26460] reward=-117279343.1 actor_loss=0.3012 critic_loss=154981140616.5333 entropy=17.6784 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446640.8 mean_steps=15.8
|
|
[Episode 26470] reward=-116283145.8 actor_loss=1.4696 critic_loss=160865672305.7778 entropy=17.6718 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 26480] reward=-120360891.8 actor_loss=0.2511 critic_loss=157783696998.4000 entropy=17.6660 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 26480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418833.3 mean_steps=15.5
|
|
[Episode 26490] reward=-123195751.0 actor_loss=0.3184 critic_loss=162478978311.3143 entropy=17.6695 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 26500] reward=-123086362.6 actor_loss=0.2475 critic_loss=163404573961.4815 entropy=17.6712 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 26500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487080.5 mean_steps=15.2
|
|
[Episode 26510] reward=-125405848.0 actor_loss=0.3331 critic_loss=169377305161.1429 entropy=17.6599 approx_kl=0.0116 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 26520] reward=-117724217.8 actor_loss=0.3365 critic_loss=159190431243.3778 entropy=17.6613 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 26520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504779.9 mean_steps=15.1
|
|
[Episode 26530] reward=-120554607.1 actor_loss=0.2232 critic_loss=153528827904.0000 entropy=17.6717 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 26540] reward=-118848236.1 actor_loss=0.3524 critic_loss=154119672698.4348 entropy=17.6667 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 26540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524934.2 mean_steps=14.4
|
|
[Episode 26550] reward=-118196664.0 actor_loss=0.3002 critic_loss=157163792226.4615 entropy=17.6610 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 26560] reward=-124016996.2 actor_loss=0.3144 critic_loss=166962566197.8947 entropy=17.6728 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 26560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510109.4 mean_steps=15.3
|
|
[Episode 26570] reward=-121782061.9 actor_loss=0.2819 critic_loss=164372326845.2174 entropy=17.6759 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 26580] reward=-125407676.2 actor_loss=0.2502 critic_loss=168848017115.4286 entropy=17.6616 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 26580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602166.0 mean_steps=13.4
|
|
[Episode 26590] reward=-117127384.1 actor_loss=0.2880 critic_loss=146683880407.0400 entropy=17.6635 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 26600] reward=-115543550.3 actor_loss=0.3864 critic_loss=148279538910.6087 entropy=17.6627 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 26600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515262.3 mean_steps=14.5
|
|
[Episode 26610] reward=-122960391.5 actor_loss=0.2125 critic_loss=161132837091.5555 entropy=17.6591 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 26620] reward=-120292837.3 actor_loss=0.2851 critic_loss=158382631594.6667 entropy=17.6707 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 26620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-396643.3 mean_steps=16.6
|
|
[Episode 26630] reward=-123437857.6 actor_loss=0.2448 critic_loss=163990960537.6000 entropy=17.6905 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 26640] reward=-123211678.8 actor_loss=0.2806 critic_loss=164479986565.1200 entropy=17.7080 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-388043.1 mean_steps=16.6
|
|
[Episode 26650] reward=-115402127.1 actor_loss=0.3175 critic_loss=154393165111.6522 entropy=17.7111 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 26660] reward=-119202793.2 actor_loss=0.3310 critic_loss=163053291362.4615 entropy=17.6884 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573655.7 mean_steps=13.7
|
|
[Episode 26670] reward=-123104267.7 actor_loss=0.3575 critic_loss=169568297807.4483 entropy=17.6875 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 26680] reward=-123078489.7 actor_loss=0.1911 critic_loss=173483762328.2162 entropy=17.6975 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 26680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411169.3 mean_steps=15.4
|
|
[Episode 26690] reward=-118434283.1 actor_loss=0.3484 critic_loss=160821496490.6667 entropy=17.6939 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 26700] reward=-116449361.4 actor_loss=0.3441 critic_loss=161172527396.5714 entropy=17.6928 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 26700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472084.8 mean_steps=13.9
|
|
[Episode 26710] reward=-119766815.7 actor_loss=0.2920 critic_loss=161221290861.7143 entropy=17.7054 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 26720] reward=-120481976.5 actor_loss=0.2208 critic_loss=156823047545.2632 entropy=17.7040 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 26720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-419233.9 mean_steps=16.8
|
|
[Episode 26730] reward=-118374797.8 actor_loss=0.2445 critic_loss=156605461065.1429 entropy=17.7164 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 26740] reward=-112156287.1 actor_loss=0.2409 critic_loss=147227191796.6222 entropy=17.7132 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 26740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-642288.9 mean_steps=12.2
|
|
[Episode 26750] reward=-123225681.4 actor_loss=0.2678 critic_loss=164961945600.0000 entropy=17.7069 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 26760] reward=-120303590.0 actor_loss=0.3053 critic_loss=160602228053.3333 entropy=17.7118 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 26760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394337.8 mean_steps=16.4
|
|
[Episode 26770] reward=-120028172.5 actor_loss=0.2268 critic_loss=158895293976.3810 entropy=17.7300 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 26780] reward=-120193528.8 actor_loss=0.2841 critic_loss=155846934528.0000 entropy=17.7266 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 26780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-468121.8 mean_steps=15.7
|
|
[Episode 26790] reward=-117736352.4 actor_loss=0.2826 critic_loss=161196744347.8261 entropy=17.7323 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 26800] reward=-115442418.0 actor_loss=0.3422 critic_loss=148914047658.6667 entropy=17.7365 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 26800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-530526.3 mean_steps=14.9
|
|
[Episode 26810] reward=-119486769.6 actor_loss=0.2715 critic_loss=156877800734.7200 entropy=17.7340 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 26820] reward=-122677876.8 actor_loss=0.2993 critic_loss=160823126846.2703 entropy=17.7271 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 26820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-368696.2 mean_steps=16.9
|
|
[Episode 26830] reward=-115343808.2 actor_loss=0.2671 critic_loss=151724756536.8889 entropy=17.7447 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 26840] reward=-122900281.9 actor_loss=0.2648 critic_loss=161113589005.4737 entropy=17.7392 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 26840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-443152.3 mean_steps=17.1
|
|
[Episode 26850] reward=-115780838.4 actor_loss=0.3821 critic_loss=158751978207.1795 entropy=17.7135 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 26860] reward=-124208509.1 actor_loss=0.2794 critic_loss=167904673185.1852 entropy=17.7292 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 26860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-551513.9 mean_steps=14.2
|
|
[Episode 26870] reward=-120762322.2 actor_loss=0.3716 critic_loss=160413116734.5778 entropy=17.7211 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 26880] reward=-117009956.3 actor_loss=0.2326 critic_loss=159344106536.9600 entropy=17.7178 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 26880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469840.2 mean_steps=13.8
|
|
[Episode 26890] reward=-121191525.5 actor_loss=0.2390 critic_loss=159093330235.0769 entropy=17.7263 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 26900] reward=-120241015.6 actor_loss=0.2626 critic_loss=152253605252.4138 entropy=17.7256 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 26900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351768.4 mean_steps=15.9
|
|
[Episode 26910] reward=-127814248.1 actor_loss=0.1804 critic_loss=178137171324.3429 entropy=17.7295 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 26920] reward=-125117111.3 actor_loss=0.3098 critic_loss=172737531588.9231 entropy=17.7230 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 26920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376716.5 mean_steps=16.6
|
|
[Episode 26930] reward=-117856206.1 actor_loss=0.2830 critic_loss=157155635785.1429 entropy=17.7289 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 26940] reward=-118378987.4 actor_loss=0.3353 critic_loss=155428154709.3333 entropy=17.7296 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 26940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483661.1 mean_steps=15.9
|
|
[Episode 26950] reward=-121501197.4 actor_loss=0.3449 critic_loss=164835702647.4667 entropy=17.7372 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 26960] reward=-119042464.7 actor_loss=0.2491 critic_loss=152488131615.0303 entropy=17.7266 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 26960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556173.6 mean_steps=13.7
|
|
[Episode 26970] reward=-116595732.9 actor_loss=0.3599 critic_loss=158665487397.9259 entropy=17.7381 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 26980] reward=-117741328.1 actor_loss=0.2577 critic_loss=157283998896.5517 entropy=17.7420 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 26980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-404685.5 mean_steps=15.6
|
|
[Episode 26990] reward=-120823992.4 actor_loss=0.2972 critic_loss=160480744061.1555 entropy=17.7345 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 27000] reward=-123341022.7 actor_loss=0.1955 critic_loss=165045159568.4102 entropy=17.7391 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 27000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462711.9 mean_steps=16.4
|
|
[Episode 27010] reward=-117146273.3 actor_loss=0.3417 critic_loss=155302653291.3548 entropy=17.7516 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 27020] reward=-125580898.5 actor_loss=0.2060 critic_loss=170044377626.9474 entropy=17.7598 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 27020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-372900.6 mean_steps=15.4
|
|
[Episode 27030] reward=-118987228.1 actor_loss=0.3174 critic_loss=160152389278.8965 entropy=17.7665 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27040] reward=-122862939.1 actor_loss=0.3241 critic_loss=159258497969.2308 entropy=17.7789 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 27040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525161.0 mean_steps=14.2
|
|
[Episode 27050] reward=-117936062.8 actor_loss=0.3568 critic_loss=156285221782.0690 entropy=17.7843 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 27060] reward=-121752571.6 actor_loss=0.1855 critic_loss=164052702003.2000 entropy=17.7830 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 27060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457228.4 mean_steps=15.0
|
|
[Episode 27070] reward=-123429459.1 actor_loss=0.2334 critic_loss=202547439518.4762 entropy=17.7801 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 27080] reward=-123492199.2 actor_loss=0.3241 critic_loss=381420988006.4000 entropy=17.7777 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 27080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542816.0 mean_steps=13.3
|
|
[Episode 27090] reward=-117909184.9 actor_loss=0.3261 critic_loss=168158636168.5333 entropy=17.7638 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 27100] reward=-120526194.7 actor_loss=0.2245 critic_loss=262274873967.3044 entropy=17.7756 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 27100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401697.7 mean_steps=16.3
|
|
[Episode 27110] reward=-117389443.6 actor_loss=0.2630 critic_loss=152349509404.4445 entropy=17.7612 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 27120] reward=-112944927.0 actor_loss=0.3812 critic_loss=148408351920.5517 entropy=17.7491 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 27120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-601414.1 mean_steps=12.2
|
|
[Episode 27130] reward=-120277569.2 actor_loss=0.2545 critic_loss=157146573630.2703 entropy=17.7286 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 27140] reward=-118183415.1 actor_loss=0.3845 critic_loss=153495154050.8445 entropy=17.7026 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 27140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364812.1 mean_steps=16.1
|
|
[Episode 27150] reward=-118018804.2 actor_loss=0.3181 critic_loss=161415259022.2222 entropy=17.7002 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 27160] reward=-117172096.2 actor_loss=0.3630 critic_loss=152400709924.5714 entropy=17.7106 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 27160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467403.2 mean_steps=14.9
|
|
[Episode 27170] reward=-115595031.2 actor_loss=0.3937 critic_loss=153374740388.9778 entropy=17.7284 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 27180] reward=-118666810.3 actor_loss=0.2366 critic_loss=155835218273.1035 entropy=17.7414 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 27180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-339012.6 mean_steps=16.8
|
|
[Episode 27190] reward=-122622213.7 actor_loss=0.2786 critic_loss=159679216065.5610 entropy=17.7303 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27200] reward=-117805424.5 actor_loss=0.3323 critic_loss=155639788885.3333 entropy=17.7413 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 27200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463717.4 mean_steps=16.1
|
|
[Episode 27210] reward=-117147487.8 actor_loss=0.2861 critic_loss=175437111113.9556 entropy=17.7427 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 27220] reward=-122679942.3 actor_loss=0.2311 critic_loss=171537268736.0000 entropy=17.7350 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 27220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450835.9 mean_steps=15.1
|
|
[Episode 27230] reward=-115224308.5 actor_loss=0.3492 critic_loss=154002895793.2308 entropy=17.7307 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 27240] reward=-126327742.8 actor_loss=0.3375 critic_loss=170058389549.5111 entropy=17.7383 approx_kl=0.0115 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 27240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577206.5 mean_steps=13.7
|
|
[Episode 27250] reward=-117448170.2 actor_loss=0.4207 critic_loss=156830692050.8235 entropy=17.7263 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 27260] reward=-123157914.9 actor_loss=0.2807 critic_loss=159786518664.5333 entropy=17.7416 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 27260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-473709.9 mean_steps=15.9
|
|
[Episode 27270] reward=-122636648.4 actor_loss=0.2836 critic_loss=161804290548.6222 entropy=17.7484 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 27280] reward=-121442630.0 actor_loss=0.3588 critic_loss=160512207530.6667 entropy=17.7400 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 27280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438514.8 mean_steps=14.4
|
|
[Episode 27290] reward=-111551769.5 actor_loss=0.3653 critic_loss=145826174293.3333 entropy=17.7457 approx_kl=0.0051 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 27300] reward=-120058282.8 actor_loss=0.2440 critic_loss=159387902498.1333 entropy=17.7700 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 27300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494117.4 mean_steps=13.8
|
|
[Episode 27310] reward=-118135592.9 actor_loss=0.3303 critic_loss=156552936015.6444 entropy=17.7622 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 27320] reward=-114433848.6 actor_loss=0.2854 critic_loss=148446450483.2000 entropy=17.7456 approx_kl=0.0059 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 27320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551297.5 mean_steps=13.7
|
|
[Episode 27330] reward=-123218747.4 actor_loss=0.2302 critic_loss=161007360045.5111 entropy=17.7418 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 27340] reward=-116650169.7 actor_loss=0.3654 critic_loss=154247494041.6000 entropy=17.7334 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 27340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539451.2 mean_steps=14.2
|
|
[Episode 27350] reward=-119458907.1 actor_loss=0.2283 critic_loss=155902952152.1778 entropy=17.7443 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 27360] reward=-117271102.6 actor_loss=0.2822 critic_loss=157701702360.1778 entropy=17.7490 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 27360] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-366472.5 mean_steps=17.6
|
|
[Episode 27370] reward=-124172375.0 actor_loss=0.3086 critic_loss=164817855326.3158 entropy=17.7540 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 27380] reward=-121390130.7 actor_loss=0.2748 critic_loss=155638161590.0444 entropy=17.7524 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 27380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496260.5 mean_steps=14.2
|
|
[Episode 27390] reward=-124801790.9 actor_loss=0.2548 critic_loss=165824474498.8445 entropy=17.7612 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 27400] reward=-121444583.1 actor_loss=0.3182 critic_loss=159107804182.7556 entropy=17.7472 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 27400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561767.0 mean_steps=13.1
|
|
[Episode 27410] reward=-119317525.6 actor_loss=0.2880 critic_loss=156399763456.0000 entropy=17.7250 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 27420] reward=-122192699.1 actor_loss=0.2717 critic_loss=160618817378.4615 entropy=17.7234 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 27420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554950.5 mean_steps=13.4
|
|
[Episode 27430] reward=-126811038.6 actor_loss=0.2415 critic_loss=202068863238.5641 entropy=17.7137 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 27440] reward=-119049847.4 actor_loss=0.3824 critic_loss=178937985858.3704 entropy=17.7205 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 27440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417646.9 mean_steps=15.4
|
|
[Episode 27450] reward=-121980576.9 actor_loss=0.2987 critic_loss=159174897390.9333 entropy=17.7290 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27460] reward=-125790009.8 actor_loss=0.1553 critic_loss=168109280033.3913 entropy=17.7291 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 27460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425661.5 mean_steps=15.8
|
|
[Episode 27470] reward=-120321028.9 actor_loss=0.2697 critic_loss=156931091114.6667 entropy=17.7292 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 27480] reward=-118952197.7 actor_loss=0.1786 critic_loss=153841270272.0000 entropy=17.7347 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 27480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-650319.9 mean_steps=13.2
|
|
[Episode 27490] reward=-118496142.6 actor_loss=0.2202 critic_loss=192504186470.4000 entropy=17.7215 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 27500] reward=-121561159.4 actor_loss=0.2382 critic_loss=162935914496.0000 entropy=17.7106 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 27500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549689.3 mean_steps=13.7
|
|
[Episode 27510] reward=-119240963.5 actor_loss=0.2039 critic_loss=158232837415.8222 entropy=17.7036 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 27520] reward=-121294823.4 actor_loss=0.2728 critic_loss=165132674048.0000 entropy=17.7035 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 27520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400791.3 mean_steps=16.6
|
|
[Episode 27530] reward=-116657056.8 actor_loss=0.2520 critic_loss=154201784832.0000 entropy=17.6952 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 27540] reward=-124742699.9 actor_loss=0.2352 critic_loss=166234882048.0000 entropy=17.6814 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 27540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-607137.9 mean_steps=11.8
|
|
[Episode 27550] reward=-118458235.2 actor_loss=0.2455 critic_loss=157391359908.9778 entropy=17.6918 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 27560] reward=-123388494.9 actor_loss=0.2258 critic_loss=166481129256.4211 entropy=17.7046 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 27560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-418587.5 mean_steps=15.2
|
|
[Episode 27570] reward=-114903799.6 actor_loss=0.3253 critic_loss=152136557714.2857 entropy=17.6983 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 27580] reward=-115291706.8 actor_loss=0.2669 critic_loss=153621670818.9091 entropy=17.7013 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 27580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-578444.0 mean_steps=14.3
|
|
[Episode 27590] reward=-117041654.1 actor_loss=0.2597 critic_loss=150447084339.2000 entropy=17.6855 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 27600] reward=-121670385.4 actor_loss=0.2370 critic_loss=154192074433.4222 entropy=17.6892 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 27600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-316898.5 mean_steps=17.5
|
|
[Episode 27610] reward=-123524036.3 actor_loss=0.2837 critic_loss=168042233435.8974 entropy=17.6941 approx_kl=0.0114 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 27620] reward=-123075550.8 actor_loss=0.2557 critic_loss=165267341312.0000 entropy=17.7121 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 27620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521133.0 mean_steps=15.2
|
|
[Episode 27630] reward=-116490863.6 actor_loss=0.3381 critic_loss=160730980783.1579 entropy=17.7170 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 27640] reward=-121660425.0 actor_loss=0.3250 critic_loss=160790917575.1111 entropy=17.6971 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 27640] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-276524.9 mean_steps=17.4
|
|
[Episode 27650] reward=-122297294.6 actor_loss=0.3367 critic_loss=158632277442.5600 entropy=17.6891 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 27660] reward=-116906624.1 actor_loss=0.3206 critic_loss=151297606724.2667 entropy=17.6819 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 27660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529694.6 mean_steps=13.6
|
|
[Episode 27670] reward=-122409612.1 actor_loss=0.2639 critic_loss=155740169830.4000 entropy=17.6785 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 27680] reward=-115276505.3 actor_loss=0.2527 critic_loss=149774048038.7879 entropy=17.6642 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 27680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540231.5 mean_steps=14.6
|
|
[Episode 27690] reward=-121373627.9 actor_loss=0.2990 critic_loss=161834763500.3077 entropy=17.6777 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27700] reward=-120680342.1 actor_loss=0.3491 critic_loss=153904181411.8400 entropy=17.6738 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 27700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-374374.6 mean_steps=16.0
|
|
[Episode 27710] reward=-116841683.9 actor_loss=0.3310 critic_loss=148219871663.1579 entropy=17.6792 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 27720] reward=-125338471.6 actor_loss=0.2594 critic_loss=171168144315.7333 entropy=17.6873 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 27720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-373864.1 mean_steps=16.3
|
|
[Episode 27730] reward=-114151608.0 actor_loss=0.3484 critic_loss=146771006586.8800 entropy=17.6772 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27740] reward=-121407662.1 actor_loss=0.2786 critic_loss=159585950105.6000 entropy=17.6738 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 27740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422189.0 mean_steps=15.8
|
|
[Episode 27750] reward=-123172532.7 actor_loss=0.2551 critic_loss=161106083059.8095 entropy=17.6658 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 27760] reward=-125117096.7 actor_loss=0.2328 critic_loss=159582162833.2973 entropy=17.6655 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 27760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578230.5 mean_steps=12.8
|
|
[Episode 27770] reward=-118092414.5 actor_loss=0.1499 critic_loss=153566312497.9512 entropy=17.6773 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 27780] reward=-118430696.8 actor_loss=0.2829 critic_loss=156933925194.3226 entropy=17.6777 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 27780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493193.2 mean_steps=14.7
|
|
[Episode 27790] reward=-118999365.5 actor_loss=0.3231 critic_loss=156174980073.2444 entropy=17.6507 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 27800] reward=-119602534.1 actor_loss=0.3212 critic_loss=154054821595.4286 entropy=17.6598 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 27800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392227.0 mean_steps=16.6
|
|
[Episode 27810] reward=-118584703.4 actor_loss=0.2235 critic_loss=154855868006.4000 entropy=17.6694 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 27820] reward=-124810714.9 actor_loss=0.2643 critic_loss=161470706574.2222 entropy=17.6725 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 27820] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-283968.1 mean_steps=17.2
|
|
[Episode 27830] reward=-119751265.4 actor_loss=0.3111 critic_loss=155210027463.1111 entropy=17.6772 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 27840] reward=-120441063.0 actor_loss=0.2270 critic_loss=151970809036.8000 entropy=17.6777 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 27840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-610672.0 mean_steps=12.2
|
|
[Episode 27850] reward=-118099781.7 actor_loss=0.3381 critic_loss=157203943282.7586 entropy=17.6984 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 27860] reward=-118250103.7 actor_loss=0.3075 critic_loss=157296522854.4000 entropy=17.6996 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 27860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-433793.1 mean_steps=14.0
|
|
[Episode 27870] reward=-119478748.3 actor_loss=0.3439 critic_loss=153668498525.0909 entropy=17.6992 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 27880] reward=-125120670.5 actor_loss=0.1851 critic_loss=162181800218.4828 entropy=17.6901 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 27880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480435.1 mean_steps=15.1
|
|
[Episode 27890] reward=-121545876.4 actor_loss=0.3244 critic_loss=163154412830.7200 entropy=17.6819 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 27900] reward=-117576988.9 actor_loss=0.2784 critic_loss=160289144832.0000 entropy=17.6666 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 27900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552801.9 mean_steps=14.2
|
|
[Episode 27910] reward=-120779405.9 actor_loss=0.2929 critic_loss=158594042450.5807 entropy=17.6585 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 27920] reward=-122537422.8 actor_loss=0.2881 critic_loss=157115159347.2000 entropy=17.6689 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 27920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567809.3 mean_steps=12.9
|
|
[Episode 27930] reward=-121485412.8 actor_loss=0.3809 critic_loss=157253627904.0000 entropy=17.6644 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 27940] reward=-118093551.4 actor_loss=0.3029 critic_loss=165768272749.7143 entropy=17.6701 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 27940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581453.0 mean_steps=12.7
|
|
[Episode 27950] reward=-120983453.8 actor_loss=0.3430 critic_loss=161140111223.4667 entropy=17.6652 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 27960] reward=-117834262.0 actor_loss=0.2858 critic_loss=158905546251.3778 entropy=17.6578 approx_kl=0.0101 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 27960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-318540.8 mean_steps=16.8
|
|
[Episode 27970] reward=-121138339.9 actor_loss=0.2461 critic_loss=158778501074.4889 entropy=17.6458 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 27980] reward=-125176541.1 actor_loss=0.3549 critic_loss=166806012534.1538 entropy=17.6520 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 27980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485278.5 mean_steps=14.7
|
|
[Episode 27990] reward=-116990041.8 actor_loss=0.2404 critic_loss=154855868006.4000 entropy=17.6427 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 28000] reward=-112918158.9 actor_loss=0.2918 critic_loss=149288599187.9111 entropy=17.6187 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 28000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532696.3 mean_steps=14.4
|
|
[Episode 28010] reward=-121319975.5 actor_loss=0.2535 critic_loss=161189456164.5714 entropy=17.6236 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 28020] reward=-122254552.9 actor_loss=0.2712 critic_loss=207914108518.4000 entropy=17.6280 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 28020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-447531.2 mean_steps=16.9
|
|
[Episode 28030] reward=-138678497.0 actor_loss=0.4028 critic_loss=1137393206846.4390 entropy=17.6366 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 28040] reward=-122017438.6 actor_loss=0.2422 critic_loss=159491744256.0000 entropy=17.6345 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 28040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424284.3 mean_steps=14.7
|
|
[Episode 28050] reward=-119704053.9 actor_loss=0.2864 critic_loss=155698768190.5778 entropy=17.6267 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 28060] reward=-121506373.5 actor_loss=0.3357 critic_loss=181581054862.2222 entropy=17.6059 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 28060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367489.9 mean_steps=16.8
|
|
[Episode 28070] reward=-115808481.5 actor_loss=0.3125 critic_loss=162281500146.8718 entropy=17.5976 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 28080] reward=-121618708.3 actor_loss=0.2736 critic_loss=155673916098.2069 entropy=17.5968 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 28080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534347.1 mean_steps=13.5
|
|
[Episode 28090] reward=-120949417.5 actor_loss=0.3996 critic_loss=161144244360.5333 entropy=17.6111 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 28100] reward=-119941956.9 actor_loss=0.2207 critic_loss=155362447732.3636 entropy=17.5968 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 28100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409632.8 mean_steps=15.8
|
|
[Episode 28110] reward=-121297011.4 actor_loss=0.3271 critic_loss=156985966592.0000 entropy=17.6001 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 28120] reward=-120974612.3 actor_loss=0.2998 critic_loss=157344442660.5714 entropy=17.6221 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 28120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-593876.1 mean_steps=13.8
|
|
[Episode 28130] reward=-122878771.1 actor_loss=0.3132 critic_loss=160610853494.1538 entropy=17.6191 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 28140] reward=-117645135.3 actor_loss=0.3000 critic_loss=150896934547.9111 entropy=17.6031 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 28140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423176.3 mean_steps=14.8
|
|
[Episode 28150] reward=-118827986.3 actor_loss=0.3005 critic_loss=155039730098.4243 entropy=17.5959 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 28160] reward=-122579085.7 actor_loss=0.3110 critic_loss=156858931467.1304 entropy=17.5930 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 28160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-425082.6 mean_steps=14.6
|
|
[Episode 28170] reward=-118500240.3 actor_loss=0.3268 critic_loss=161720864312.8889 entropy=17.5908 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 28180] reward=-122839568.8 actor_loss=0.2600 critic_loss=158049645468.9032 entropy=17.5861 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 28180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393770.1 mean_steps=16.6
|
|
[Episode 28190] reward=-125023401.1 actor_loss=0.2833 critic_loss=167538254370.1333 entropy=17.5856 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 28200] reward=-117819721.9 actor_loss=0.3162 critic_loss=151869381911.2727 entropy=17.5881 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 28200] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-577605.1 mean_steps=10.9
|
|
[Episode 28210] reward=-122438012.3 actor_loss=0.2059 critic_loss=153218502656.0000 entropy=17.5775 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 28220] reward=-119084924.7 actor_loss=0.3355 critic_loss=157954877801.4118 entropy=17.5726 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 28220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439206.8 mean_steps=14.8
|
|
[Episode 28230] reward=-123543705.6 actor_loss=0.2104 critic_loss=160387739999.0857 entropy=17.5740 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 28240] reward=-123413767.4 actor_loss=0.2034 critic_loss=161554916966.4000 entropy=17.5624 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 28240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416468.5 mean_steps=16.6
|
|
[Episode 28250] reward=-119554134.7 actor_loss=0.3531 critic_loss=152959312554.6667 entropy=17.5647 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 28260] reward=-121609199.6 actor_loss=0.2592 critic_loss=158236183210.6667 entropy=17.5531 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 28260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518264.5 mean_steps=14.1
|
|
[Episode 28270] reward=-119224754.8 actor_loss=0.3125 critic_loss=149964939264.0000 entropy=17.5521 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 28280] reward=-123762327.4 actor_loss=0.3101 critic_loss=163327773536.7111 entropy=17.5607 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 28280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620216.3 mean_steps=12.2
|
|
[Episode 28290] reward=-120308799.1 actor_loss=0.2760 critic_loss=151885852398.9333 entropy=17.5691 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 28300] reward=-122634715.1 actor_loss=0.3146 critic_loss=182878731377.7778 entropy=17.5630 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 28300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545913.8 mean_steps=13.3
|
|
[Episode 28310] reward=-120881439.6 actor_loss=0.2376 critic_loss=153864587946.6667 entropy=17.5591 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 28320] reward=-118183211.3 actor_loss=0.3816 critic_loss=152038021012.2105 entropy=17.5565 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 28320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500541.5 mean_steps=14.2
|
|
[Episode 28330] reward=-118205972.0 actor_loss=0.4131 critic_loss=154993219291.4286 entropy=17.5593 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 28340] reward=-114111386.0 actor_loss=0.3177 critic_loss=145657373876.7059 entropy=17.5690 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 28340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470435.5 mean_steps=14.1
|
|
[Episode 28350] reward=-117171300.0 actor_loss=0.3507 critic_loss=152107427237.6471 entropy=17.5787 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 28360] reward=-121736614.9 actor_loss=0.3313 critic_loss=160479178536.4211 entropy=17.5807 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 28360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-576621.4 mean_steps=12.8
|
|
[Episode 28370] reward=-118961301.5 actor_loss=0.2377 critic_loss=153461975401.4118 entropy=17.5838 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 28380] reward=-120744435.6 actor_loss=0.2548 critic_loss=153235604626.2857 entropy=17.5885 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 28380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500383.7 mean_steps=15.5
|
|
[Episode 28390] reward=-121450488.1 actor_loss=0.2440 critic_loss=156616989478.7879 entropy=17.5688 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 28400] reward=-121983359.0 actor_loss=0.3042 critic_loss=160607448064.0000 entropy=17.5855 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 28400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553424.6 mean_steps=13.5
|
|
[Episode 28410] reward=-118342241.6 actor_loss=0.3385 critic_loss=248580081956.5714 entropy=17.5733 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 28420] reward=-121589340.1 actor_loss=0.2480 critic_loss=157326450991.4074 entropy=17.5718 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 28420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504429.1 mean_steps=15.2
|
|
[Episode 28430] reward=-118355364.5 actor_loss=0.3334 critic_loss=158842327625.1429 entropy=17.5719 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 28440] reward=-120857850.8 actor_loss=0.3733 critic_loss=154600612271.1579 entropy=17.5732 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 28440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528845.8 mean_steps=13.3
|
|
[Episode 28450] reward=-121979866.7 actor_loss=0.1473 critic_loss=159085388946.2857 entropy=17.5707 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 28460] reward=-114990467.7 actor_loss=0.3720 critic_loss=165305282344.4211 entropy=17.5746 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 28460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477634.1 mean_steps=14.1
|
|
[Episode 28470] reward=-117771232.4 actor_loss=0.3680 critic_loss=153356847786.6667 entropy=17.5785 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 28480] reward=-114539945.2 actor_loss=0.3302 critic_loss=147620627062.1538 entropy=17.5679 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 28480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-450646.6 mean_steps=16.8
|
|
[Episode 28490] reward=-119222007.4 actor_loss=0.1505 critic_loss=159666474188.8000 entropy=17.5572 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 28500] reward=-118884168.5 actor_loss=0.2617 critic_loss=151264703199.1795 entropy=17.5565 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 28500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523508.5 mean_steps=14.4
|
|
[Episode 28510] reward=-114886663.2 actor_loss=0.3200 critic_loss=145177968904.2581 entropy=17.5508 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 28520] reward=-120589375.7 actor_loss=0.2444 critic_loss=160057996447.2889 entropy=17.5426 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 28520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413796.0 mean_steps=15.4
|
|
[Episode 28530] reward=-121046007.3 actor_loss=0.3003 critic_loss=159117568318.5778 entropy=17.5418 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 28540] reward=-118176244.0 actor_loss=0.3579 critic_loss=150872816753.7778 entropy=17.5354 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 28540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-446604.8 mean_steps=13.9
|
|
[Episode 28550] reward=-124338943.7 actor_loss=0.2759 critic_loss=173714325504.0000 entropy=17.5358 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 28560] reward=-117562653.2 actor_loss=0.3774 critic_loss=154490345472.0000 entropy=17.5299 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 28560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-485897.6 mean_steps=16.2
|
|
[Episode 28570] reward=-117738523.4 actor_loss=0.3009 critic_loss=152117584262.0952 entropy=17.5300 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28580] reward=-116718721.8 actor_loss=0.4291 critic_loss=153515240106.6667 entropy=17.5199 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 28580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365293.4 mean_steps=16.5
|
|
[Episode 28590] reward=-113035248.7 actor_loss=0.3352 critic_loss=142399783470.5454 entropy=17.5330 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28600] reward=-119920816.7 actor_loss=0.2762 critic_loss=155135311451.8974 entropy=17.5218 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 28600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444293.1 mean_steps=15.6
|
|
[Episode 28610] reward=-119868899.2 actor_loss=0.2524 critic_loss=160735532373.3333 entropy=17.5388 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 28620] reward=-120470060.5 actor_loss=0.2476 critic_loss=155487216360.7273 entropy=17.5448 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 28620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-376942.2 mean_steps=17.3
|
|
[Episode 28630] reward=-118392847.9 actor_loss=0.2980 critic_loss=147890304812.1379 entropy=17.5564 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28640] reward=-113869635.6 actor_loss=0.2268 critic_loss=154380254412.8000 entropy=17.5486 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 28640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530246.2 mean_steps=13.3
|
|
[Episode 28650] reward=-119690400.2 actor_loss=0.2489 critic_loss=149072094822.4000 entropy=17.5440 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 28660] reward=-119761036.8 actor_loss=0.3842 critic_loss=157147692635.8974 entropy=17.5399 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 28660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439482.6 mean_steps=15.8
|
|
[Episode 28670] reward=-121314752.7 actor_loss=0.2072 critic_loss=157368838046.4762 entropy=17.5274 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 28680] reward=-116121535.2 actor_loss=0.2850 critic_loss=152601892788.1482 entropy=17.5183 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 28680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-474867.8 mean_steps=15.9
|
|
[Episode 28690] reward=-119443956.2 actor_loss=0.2698 critic_loss=155275487339.7895 entropy=17.5031 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 28700] reward=-124410069.7 actor_loss=0.1621 critic_loss=164342200230.9565 entropy=17.5048 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 28700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425213.4 mean_steps=15.7
|
|
[Episode 28710] reward=-114569761.1 actor_loss=0.3488 critic_loss=152869420119.7714 entropy=17.4981 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 28720] reward=-120822204.8 actor_loss=0.2476 critic_loss=160359981537.8824 entropy=17.5174 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 28720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527957.3 mean_steps=14.4
|
|
[Episode 28730] reward=-120286658.5 actor_loss=0.3000 critic_loss=155259073783.1724 entropy=17.5370 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 28740] reward=-124957537.8 actor_loss=0.3304 critic_loss=165316722688.0000 entropy=17.5440 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 28740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-331519.1 mean_steps=16.9
|
|
[Episode 28750] reward=-115359782.2 actor_loss=0.2804 critic_loss=154256695296.0000 entropy=17.5500 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 28760] reward=-119531517.8 actor_loss=0.3362 critic_loss=154082830238.4762 entropy=17.5528 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 28760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-597232.3 mean_steps=13.4
|
|
[Episode 28770] reward=-124048760.5 actor_loss=0.2947 critic_loss=168227167709.8667 entropy=17.5347 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 28780] reward=-115502786.5 actor_loss=0.2749 critic_loss=149329110630.4000 entropy=17.5506 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 28780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436812.7 mean_steps=15.8
|
|
[Episode 28790] reward=-123007611.9 actor_loss=0.2582 critic_loss=164855067685.9259 entropy=17.5577 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 28800] reward=-119604737.7 actor_loss=0.3431 critic_loss=166617875212.1905 entropy=17.5751 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 28800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457827.1 mean_steps=16.4
|
|
[Episode 28810] reward=-125085123.7 actor_loss=0.3017 critic_loss=163714874709.3333 entropy=17.5793 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28820] reward=-118178563.8 actor_loss=0.3091 critic_loss=149168858908.4445 entropy=17.5713 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 28820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-451456.3 mean_steps=16.9
|
|
[Episode 28830] reward=-115643043.6 actor_loss=0.1992 critic_loss=146538019603.6923 entropy=17.5709 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 28840] reward=-113989269.9 actor_loss=0.2921 critic_loss=144294722515.4783 entropy=17.5594 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 28840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-467902.8 mean_steps=15.6
|
|
[Episode 28850] reward=-119890862.5 actor_loss=0.2247 critic_loss=157130115936.7111 entropy=17.5578 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 28860] reward=-120211011.3 actor_loss=0.3128 critic_loss=155529578320.4572 entropy=17.5554 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 28860] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-661571.2 mean_steps=11.7
|
|
[Episode 28870] reward=-119340974.2 actor_loss=0.4017 critic_loss=155240068133.9259 entropy=17.5583 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 28880] reward=-126304092.5 actor_loss=0.2660 critic_loss=473364144559.1579 entropy=17.5587 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 28880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-665492.4 mean_steps=12.3
|
|
[Episode 28890] reward=-113259162.4 actor_loss=0.3387 critic_loss=148629126686.1176 entropy=17.5483 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 28900] reward=-122169578.8 actor_loss=0.2970 critic_loss=159883982555.4286 entropy=17.5457 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 28900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411973.4 mean_steps=15.6
|
|
[Episode 28910] reward=-121113369.7 actor_loss=0.2596 critic_loss=152513897358.2222 entropy=17.5340 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28920] reward=-117512351.9 actor_loss=0.3270 critic_loss=146584318771.2000 entropy=17.5600 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 28920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521584.6 mean_steps=15.1
|
|
[Episode 28930] reward=-118479007.0 actor_loss=0.2749 critic_loss=149301612916.3636 entropy=17.5504 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 28940] reward=-121707604.8 actor_loss=0.2561 critic_loss=158288360711.3143 entropy=17.5489 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 28940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-562231.6 mean_steps=14.5
|
|
[Episode 28950] reward=-116305881.8 actor_loss=0.3285 critic_loss=145592431738.8800 entropy=17.5448 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 28960] reward=-112940673.9 actor_loss=0.3744 critic_loss=149647297461.0732 entropy=17.5367 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 28960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431184.0 mean_steps=15.8
|
|
[Episode 28970] reward=-123708589.9 actor_loss=0.1741 critic_loss=164088424920.6154 entropy=17.5326 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 28980] reward=-116784089.0 actor_loss=0.3444 critic_loss=152731759411.2000 entropy=17.5244 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 28980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504830.6 mean_steps=14.5
|
|
[Episode 28990] reward=-122165564.0 actor_loss=0.3106 critic_loss=161937031168.0000 entropy=17.5272 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 29000] reward=-116030077.1 actor_loss=0.2787 critic_loss=153850237106.0869 entropy=17.5331 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 29000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591774.1 mean_steps=12.8
|
|
[Episode 29010] reward=-116066397.4 actor_loss=0.3538 critic_loss=146825486767.1579 entropy=17.5284 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 29020] reward=-117588833.6 actor_loss=0.3503 critic_loss=157494012586.6667 entropy=17.5421 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 29020] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-349559.6 mean_steps=17.9
|
|
[Episode 29030] reward=-119688694.1 actor_loss=0.3695 critic_loss=152871066009.6000 entropy=17.5371 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 29040] reward=-116433354.4 actor_loss=0.2650 critic_loss=290940960112.6400 entropy=17.5104 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 29040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395421.9 mean_steps=16.4
|
|
[Episode 29050] reward=-120590254.0 actor_loss=0.2279 critic_loss=153423314944.0000 entropy=17.5132 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 29060] reward=-120832221.2 actor_loss=0.2282 critic_loss=155398280169.2444 entropy=17.5124 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 29060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481777.0 mean_steps=14.6
|
|
[Episode 29070] reward=-115713741.4 actor_loss=0.3034 critic_loss=144312602715.0222 entropy=17.5265 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 29080] reward=-118734506.6 actor_loss=0.3806 critic_loss=157264828097.4222 entropy=17.5224 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 29080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402764.3 mean_steps=15.3
|
|
[Episode 29090] reward=-119637224.4 actor_loss=0.2695 critic_loss=155169879287.1724 entropy=17.5135 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 29100] reward=-118429880.5 actor_loss=0.2689 critic_loss=178438387712.0000 entropy=17.5155 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 29100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-532318.5 mean_steps=15.7
|
|
[Episode 29110] reward=-121006156.2 actor_loss=0.2974 critic_loss=157313238416.6956 entropy=17.5190 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 29120] reward=-123946106.8 actor_loss=0.2304 critic_loss=158499849284.2667 entropy=17.5345 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 29120] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-309675.1 mean_steps=16.4
|
|
[Episode 29130] reward=-113900721.0 actor_loss=0.2574 critic_loss=151305812591.3044 entropy=17.5328 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 29140] reward=-106847582.7 actor_loss=0.2880 critic_loss=141534028068.5714 entropy=17.5351 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 29140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599742.7 mean_steps=12.8
|
|
[Episode 29150] reward=-118710451.8 actor_loss=0.2735 critic_loss=160139267954.7586 entropy=17.5335 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 29160] reward=-111187673.4 actor_loss=0.3299 critic_loss=157829944442.8800 entropy=17.5355 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 29160] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-667703.6 mean_steps=11.6
|
|
[Episode 29170] reward=-119985558.0 actor_loss=0.1965 critic_loss=153040319186.8235 entropy=17.5397 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 29180] reward=-120216638.3 actor_loss=0.3207 critic_loss=156909128908.8000 entropy=17.5377 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 29180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-396169.0 mean_steps=17.4
|
|
[Episode 29190] reward=-115449073.2 actor_loss=0.2585 critic_loss=148794249707.5200 entropy=17.5262 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 29200] reward=-125118975.4 actor_loss=0.3010 critic_loss=285562200808.7273 entropy=17.5428 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 29200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571494.4 mean_steps=12.9
|
|
[Episode 29210] reward=-117330168.1 actor_loss=0.2743 critic_loss=149420229089.8824 entropy=17.5330 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 29220] reward=-119399874.2 actor_loss=0.3481 critic_loss=155123913781.8947 entropy=17.5146 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 29220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571957.7 mean_steps=12.7
|
|
[Episode 29230] reward=-113432792.5 actor_loss=0.4044 critic_loss=145283818564.2667 entropy=17.5266 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 29240] reward=-120338220.9 actor_loss=0.2551 critic_loss=154515781924.5714 entropy=17.5306 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 29240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513065.1 mean_steps=13.7
|
|
[Episode 29250] reward=-120007105.2 actor_loss=0.2845 critic_loss=157490205354.6667 entropy=17.5405 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 29260] reward=-121398054.4 actor_loss=0.2032 critic_loss=154243416064.0000 entropy=17.5293 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 29260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577348.4 mean_steps=13.4
|
|
[Episode 29270] reward=-114820259.5 actor_loss=0.3340 critic_loss=149160259899.0769 entropy=17.5325 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 29280] reward=-115669397.6 actor_loss=0.3528 critic_loss=149524082211.7209 entropy=17.5113 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 29280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-537550.1 mean_steps=15.4
|
|
[Episode 29290] reward=-119957290.4 actor_loss=0.3010 critic_loss=149714142120.2286 entropy=17.5188 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 29300] reward=-115473355.4 actor_loss=0.3410 critic_loss=145257218048.0000 entropy=17.5240 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 29300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432721.5 mean_steps=16.6
|
|
[Episode 29310] reward=-118211721.1 actor_loss=0.2626 critic_loss=148605462370.4615 entropy=17.5220 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 29320] reward=-118584009.3 actor_loss=0.3733 critic_loss=164851217050.7907 entropy=17.5284 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 29320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575208.1 mean_steps=12.8
|
|
[Episode 29330] reward=-117624467.1 actor_loss=0.3288 critic_loss=154757241124.5714 entropy=17.5173 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 29340] reward=-121087252.6 actor_loss=0.3259 critic_loss=150147915511.7419 entropy=17.5070 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 29340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-418823.1 mean_steps=16.4
|
|
[Episode 29350] reward=-120859495.6 actor_loss=0.2437 critic_loss=156776432745.9310 entropy=17.5329 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 29360] reward=-114358593.3 actor_loss=0.3485 critic_loss=146275382476.8000 entropy=17.5381 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 29360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400335.4 mean_steps=15.2
|
|
[Episode 29370] reward=-122791021.3 actor_loss=0.1778 critic_loss=155530308078.3448 entropy=17.5360 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 29380] reward=-121326257.8 actor_loss=0.2282 critic_loss=158663884435.9111 entropy=17.5419 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 29380] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-747914.8 mean_steps=10.7
|
|
[Episode 29390] reward=-111928718.6 actor_loss=0.3357 critic_loss=144546454127.3044 entropy=17.5445 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 29400] reward=-115426873.2 actor_loss=0.4802 critic_loss=142709505325.1765 entropy=17.5352 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1536 front_blocked=0
|
|
[Eval 29400] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-773147.4 mean_steps=10.8
|
|
[Episode 29410] reward=-116745046.0 actor_loss=0.3656 critic_loss=152983891321.2632 entropy=17.5379 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 29420] reward=-122539739.6 actor_loss=0.3208 critic_loss=155083889868.8000 entropy=17.5416 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 29420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391912.7 mean_steps=16.1
|
|
[Episode 29430] reward=-117962851.6 actor_loss=0.3563 critic_loss=151484238506.6667 entropy=17.5482 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 29440] reward=-119495781.4 actor_loss=0.2751 critic_loss=155205801984.0000 entropy=17.5397 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 29440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462162.6 mean_steps=15.8
|
|
[Episode 29450] reward=-122448226.3 actor_loss=0.2361 critic_loss=158723844050.4889 entropy=17.5518 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 29460] reward=-119594175.7 actor_loss=0.3428 critic_loss=157043544746.6667 entropy=17.5477 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 29460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454861.8 mean_steps=14.8
|
|
[Episode 29470] reward=-116920102.7 actor_loss=0.2861 critic_loss=147007506022.4000 entropy=17.5450 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 29480] reward=-135665848.2 actor_loss=0.3287 critic_loss=1222109965698.8445 entropy=17.5729 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 29480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-459395.0 mean_steps=16.5
|
|
[Episode 29490] reward=-118010969.0 actor_loss=0.3296 critic_loss=154025839820.8000 entropy=17.5730 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 29500] reward=-118032768.7 actor_loss=0.2661 critic_loss=170918167113.1429 entropy=17.5839 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 29500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-477750.0 mean_steps=16.1
|
|
[Episode 29510] reward=-118209791.1 actor_loss=0.3615 critic_loss=155935562865.7778 entropy=17.5800 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 29520] reward=-115794981.1 actor_loss=0.3732 critic_loss=147010627356.4445 entropy=17.5671 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 29520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-354340.5 mean_steps=17.0
|
|
[Episode 29530] reward=-118470759.4 actor_loss=0.3074 critic_loss=165312765470.1176 entropy=17.5801 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 29540] reward=-129654806.8 actor_loss=0.3058 critic_loss=616958965987.5555 entropy=17.5820 approx_kl=0.0038 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 29540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457806.3 mean_steps=15.2
|
|
[Episode 29550] reward=-120816780.1 actor_loss=0.2607 critic_loss=157795341516.8000 entropy=17.5785 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 29560] reward=-116425727.1 actor_loss=0.3465 critic_loss=149233159469.1765 entropy=17.5784 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 29560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-491727.1 mean_steps=16.1
|
|
[Episode 29570] reward=-118748929.8 actor_loss=0.2231 critic_loss=181226501605.0526 entropy=17.5798 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 29580] reward=-130850968.8 actor_loss=0.2770 critic_loss=752540053876.3636 entropy=17.5711 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 29580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-406825.7 mean_steps=17.4
|
|
[Episode 29590] reward=-157876259.3 actor_loss=5.4085 critic_loss=2453367384425.4116 entropy=17.5743 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 29600] reward=-120915913.9 actor_loss=0.2228 critic_loss=152692258749.9355 entropy=17.5772 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 29600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490945.8 mean_steps=14.4
|
|
[Episode 29610] reward=-116463394.2 actor_loss=0.2391 critic_loss=151607091785.1429 entropy=17.5884 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 29620] reward=-112391755.3 actor_loss=0.3072 critic_loss=145280297642.6667 entropy=17.5882 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 29620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484303.6 mean_steps=15.2
|
|
[Episode 29630] reward=-115106976.3 actor_loss=0.3891 critic_loss=146897150279.6800 entropy=17.5864 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 29640] reward=-122033554.2 actor_loss=0.3031 critic_loss=158466241331.2000 entropy=17.5841 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 29640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448624.2 mean_steps=14.7
|
|
[Episode 29650] reward=-154092127.2 actor_loss=7.9229 critic_loss=4383887529797.8184 entropy=17.5831 approx_kl=0.0026 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 29660] reward=-116918179.4 actor_loss=0.3095 critic_loss=152197979297.6842 entropy=17.5673 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 29660] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-321928.0 mean_steps=16.7
|
|
[Episode 29670] reward=-117891336.5 actor_loss=0.2722 critic_loss=343945834390.9744 entropy=17.5880 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 29680] reward=-118992943.3 actor_loss=0.2472 critic_loss=155816032413.5385 entropy=17.5965 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 29680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389040.9 mean_steps=16.7
|
|
[Episode 29690] reward=-121362162.7 actor_loss=0.3250 critic_loss=160828424936.7273 entropy=17.5980 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 29700] reward=-121502997.5 actor_loss=0.2582 critic_loss=157125838620.4445 entropy=17.5960 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 29700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-372857.1 mean_steps=16.3
|
|
[Episode 29710] reward=-124866259.9 actor_loss=0.2736 critic_loss=162414623675.7333 entropy=17.5808 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 29720] reward=-119394420.7 actor_loss=0.2802 critic_loss=158274335175.1111 entropy=17.5807 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 29720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470488.5 mean_steps=13.8
|
|
[Episode 29730] reward=-114913214.5 actor_loss=0.3466 critic_loss=145182248140.8000 entropy=17.5864 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 29740] reward=-112654333.2 actor_loss=0.3424 critic_loss=144290318654.5778 entropy=17.5756 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 29740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487966.1 mean_steps=15.1
|
|
[Episode 29750] reward=-119888460.5 actor_loss=0.2762 critic_loss=153715552451.0476 entropy=17.5727 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 29760] reward=-114408887.2 actor_loss=0.3294 critic_loss=143990864099.5555 entropy=17.5660 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 29760] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-309946.5 mean_steps=18.1
|
|
[Episode 29770] reward=-119262557.9 actor_loss=0.2397 critic_loss=153170102044.4445 entropy=17.5572 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 29780] reward=-115397154.3 actor_loss=0.3634 critic_loss=154981179392.0000 entropy=17.5480 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 29780] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-343024.1 mean_steps=17.8
|
|
[Episode 29790] reward=-126696663.2 actor_loss=0.3360 critic_loss=580149864448.0000 entropy=17.5504 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 29800] reward=-119739938.2 actor_loss=0.3212 critic_loss=155090043335.1111 entropy=17.5435 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 29800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518788.1 mean_steps=15.3
|
|
[Episode 29810] reward=-120049881.9 actor_loss=0.2737 critic_loss=157357203655.8049 entropy=17.5467 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 29820] reward=-124283306.5 actor_loss=0.2931 critic_loss=158360149767.7576 entropy=17.5403 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 29820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437574.1 mean_steps=15.5
|
|
[Episode 29830] reward=-114210994.0 actor_loss=0.4017 critic_loss=144981881287.1111 entropy=17.5588 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 29840] reward=-120137858.2 actor_loss=0.2564 critic_loss=152711739164.4445 entropy=17.5617 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 29840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-390522.7 mean_steps=16.3
|
|
[Episode 29850] reward=-120262630.7 actor_loss=0.1889 critic_loss=195113710855.3143 entropy=17.5645 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 29860] reward=-118028432.2 actor_loss=0.2863 critic_loss=150704894043.0222 entropy=17.5448 approx_kl=0.0054 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 29860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-433894.7 mean_steps=16.8
|
|
[Episode 29870] reward=-116723804.2 actor_loss=0.3066 critic_loss=146055656877.4193 entropy=17.5490 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 29880] reward=-120564485.3 actor_loss=0.2961 critic_loss=152500616123.7333 entropy=17.5368 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 29880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-354841.2 mean_steps=17.1
|
|
[Episode 29890] reward=-124647960.5 actor_loss=0.2508 critic_loss=162242255005.5385 entropy=17.5392 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 29900] reward=-124945351.2 actor_loss=0.2089 critic_loss=167554011011.8788 entropy=17.5625 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 29900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-459093.1 mean_steps=14.1
|
|
[Episode 29910] reward=-117103774.3 actor_loss=0.2074 critic_loss=149736920117.8947 entropy=17.5654 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 29920] reward=-117180868.4 actor_loss=0.2066 critic_loss=145802060946.2857 entropy=17.5682 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 29920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522437.1 mean_steps=14.8
|
|
[Episode 29930] reward=-117533696.6 actor_loss=0.3779 critic_loss=149106908160.0000 entropy=17.5587 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 29940] reward=-126604128.1 actor_loss=0.1954 critic_loss=160890915939.0968 entropy=17.5777 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 29940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-580106.5 mean_steps=14.8
|
|
[Episode 29950] reward=-118167170.3 actor_loss=0.4117 critic_loss=147686120001.6410 entropy=17.5872 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 29960] reward=-120096400.6 actor_loss=0.3555 critic_loss=162451204778.6667 entropy=17.5895 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 29960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502477.3 mean_steps=15.6
|
|
[Episode 29970] reward=-118755768.2 actor_loss=0.3808 critic_loss=150476503121.9200 entropy=17.5785 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 29980] reward=-119517598.3 actor_loss=0.2931 critic_loss=148782786969.6000 entropy=17.5675 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 29980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467527.3 mean_steps=14.9
|
|
[Episode 29990] reward=-118757994.6 actor_loss=0.2384 critic_loss=167607024139.9070 entropy=17.5792 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 30000] reward=-118946948.8 actor_loss=0.2670 critic_loss=149405152779.3778 entropy=17.5887 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 30000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-485512.7 mean_steps=13.1
|
|
[Episode 30010] reward=-112382040.0 actor_loss=0.4756 critic_loss=146231693403.0222 entropy=17.5839 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 30020] reward=-120356959.0 actor_loss=0.2098 critic_loss=169840199452.4445 entropy=17.5884 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 30020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520500.0 mean_steps=14.3
|
|
[Episode 30030] reward=-115913239.4 actor_loss=0.3586 critic_loss=161956631717.1613 entropy=17.5950 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 30040] reward=-119105652.9 actor_loss=0.2779 critic_loss=153350621128.6487 entropy=17.6054 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 30040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-346970.4 mean_steps=16.1
|
|
[Episode 30050] reward=-122431662.4 actor_loss=0.3006 critic_loss=164282655464.7273 entropy=17.6074 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 30060] reward=-115181211.0 actor_loss=0.3740 critic_loss=144016193324.1379 entropy=17.5944 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 30060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493355.5 mean_steps=15.4
|
|
[Episode 30070] reward=-120429985.4 actor_loss=0.3437 critic_loss=151792213805.9487 entropy=17.6044 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 30080] reward=-123959968.8 actor_loss=0.2481 critic_loss=161420073005.5111 entropy=17.6209 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 30080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-639410.5 mean_steps=12.1
|
|
[Episode 30090] reward=-118210756.8 actor_loss=0.3210 critic_loss=147316128335.6444 entropy=17.6231 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 30100] reward=-123193064.8 actor_loss=0.2244 critic_loss=157575782096.5926 entropy=17.6330 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 30100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-470228.8 mean_steps=16.1
|
|
[Episode 30110] reward=-122824275.2 actor_loss=0.2238 critic_loss=158602893312.0000 entropy=17.6314 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 30120] reward=-120189815.7 actor_loss=0.2551 critic_loss=155593057621.3333 entropy=17.6253 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 30120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-652736.2 mean_steps=124.2
|
|
[Episode 30130] reward=-119070628.4 actor_loss=0.2819 critic_loss=155459332143.6279 entropy=17.6301 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30140] reward=-120184999.7 actor_loss=0.2943 critic_loss=174915451617.2800 entropy=17.6367 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 30140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420668.9 mean_steps=16.4
|
|
[Episode 30150] reward=-119779242.4 actor_loss=0.3436 critic_loss=153859881483.9070 entropy=17.6559 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 30160] reward=-123805479.0 actor_loss=0.2487 critic_loss=246953815341.1765 entropy=17.6577 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 30160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524826.5 mean_steps=14.1
|
|
[Episode 30170] reward=-117385213.5 actor_loss=0.2940 critic_loss=157157397065.1429 entropy=17.6603 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30180] reward=-119595985.7 actor_loss=0.2439 critic_loss=151348561001.9310 entropy=17.6543 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 30180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507414.4 mean_steps=14.6
|
|
[Episode 30190] reward=-120632169.7 actor_loss=0.3488 critic_loss=159622661916.4445 entropy=17.6543 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 30200] reward=-122572819.6 actor_loss=0.3042 critic_loss=157322628995.8788 entropy=17.6578 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 30200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595068.9 mean_steps=13.1
|
|
[Episode 30210] reward=-124755943.7 actor_loss=0.2340 critic_loss=164889589248.0000 entropy=17.6529 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 30220] reward=-117614367.3 actor_loss=0.3335 critic_loss=161084549438.5778 entropy=17.6572 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 30220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552871.6 mean_steps=13.3
|
|
[Episode 30230] reward=-123882692.5 actor_loss=0.2541 critic_loss=161963851776.0000 entropy=17.6589 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 30240] reward=-121603452.7 actor_loss=0.2948 critic_loss=151612083278.7692 entropy=17.6595 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 30240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-528571.2 mean_steps=16.1
|
|
[Episode 30250] reward=-119616657.0 actor_loss=0.3065 critic_loss=152256445719.2727 entropy=17.6485 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 30260] reward=-121175673.7 actor_loss=0.2439 critic_loss=152335548416.0000 entropy=17.6430 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 30260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543201.6 mean_steps=13.5
|
|
[Episode 30270] reward=-120992357.7 actor_loss=0.2859 critic_loss=158653490220.5217 entropy=17.6453 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 30280] reward=-117272737.1 actor_loss=0.3463 critic_loss=151185124693.3333 entropy=17.6550 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 30280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644776.2 mean_steps=12.2
|
|
[Episode 30290] reward=-121071561.7 actor_loss=0.2811 critic_loss=151656356977.7778 entropy=17.6566 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 30300] reward=-126374576.7 actor_loss=0.2404 critic_loss=167378860987.7333 entropy=17.6581 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 30300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586906.0 mean_steps=12.8
|
|
[Episode 30310] reward=-120953639.3 actor_loss=0.3342 critic_loss=159128160467.8621 entropy=17.6506 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 30320] reward=-122074116.8 actor_loss=0.2346 critic_loss=156578580980.6222 entropy=17.6584 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 30320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510220.7 mean_steps=14.4
|
|
[Episode 30330] reward=-115247969.7 actor_loss=0.3500 critic_loss=146883228516.8485 entropy=17.6532 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 30340] reward=-118662188.6 actor_loss=0.3101 critic_loss=156708475392.0000 entropy=17.6616 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 30340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-512982.8 mean_steps=13.2
|
|
[Episode 30350] reward=-119092923.9 actor_loss=0.3420 critic_loss=161659122639.2381 entropy=17.6727 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 30360] reward=-122143197.0 actor_loss=0.3274 critic_loss=160159816817.7778 entropy=17.6882 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 30360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-584640.6 mean_steps=13.9
|
|
[Episode 30370] reward=-117184067.9 actor_loss=0.2814 critic_loss=150714450550.1538 entropy=17.6874 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 30380] reward=-112152121.9 actor_loss=0.2897 critic_loss=147585622926.2222 entropy=17.7007 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 30380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430945.4 mean_steps=14.7
|
|
[Episode 30390] reward=-123531103.3 actor_loss=0.2526 critic_loss=160107491028.2927 entropy=17.6900 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30400] reward=-126813351.2 actor_loss=0.2524 critic_loss=165375608273.4546 entropy=17.6711 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 30400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513243.7 mean_steps=13.6
|
|
[Episode 30410] reward=-122715918.6 actor_loss=0.2693 critic_loss=164534946084.5714 entropy=17.6659 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 30420] reward=-125068752.7 actor_loss=0.2228 critic_loss=162436238155.2941 entropy=17.6603 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 30420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-390690.7 mean_steps=16.4
|
|
[Episode 30430] reward=-120073715.8 actor_loss=0.3350 critic_loss=153079874398.3158 entropy=17.6576 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 30440] reward=-116039513.1 actor_loss=0.2817 critic_loss=151560047993.2632 entropy=17.6617 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 30440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520227.4 mean_steps=14.3
|
|
[Episode 30450] reward=-122308990.7 actor_loss=0.2089 critic_loss=160878716369.4546 entropy=17.6732 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 30460] reward=-125411961.5 actor_loss=0.3044 critic_loss=218266782168.6154 entropy=17.6826 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 30460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541671.8 mean_steps=13.4
|
|
[Episode 30470] reward=-125474500.4 actor_loss=0.1806 critic_loss=163149684736.0000 entropy=17.6976 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 30480] reward=-123141721.6 actor_loss=0.2413 critic_loss=155506607340.3077 entropy=17.6808 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 30480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-544997.7 mean_steps=12.2
|
|
[Episode 30490] reward=-119164073.8 actor_loss=0.3710 critic_loss=153262446182.4000 entropy=17.6779 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 30500] reward=-126479898.1 actor_loss=0.3351 critic_loss=233147270609.4546 entropy=17.6753 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 30500] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-298514.7 mean_steps=17.8
|
|
[Episode 30510] reward=-119138587.8 actor_loss=0.2849 critic_loss=161493795328.0000 entropy=17.6711 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30520] reward=-116886773.0 actor_loss=0.2421 critic_loss=144333420005.0526 entropy=17.6593 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 30520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366545.1 mean_steps=17.1
|
|
[Episode 30530] reward=-117177323.7 actor_loss=0.3743 critic_loss=149656867089.0667 entropy=17.6683 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 30540] reward=-120065967.4 actor_loss=0.2543 critic_loss=153269479610.1818 entropy=17.6637 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 30540] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-579507.5 mean_steps=12.1
|
|
[Episode 30550] reward=-123260398.0 actor_loss=0.3048 critic_loss=165877809152.0000 entropy=17.6703 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 30560] reward=-122820870.0 actor_loss=0.2005 critic_loss=156402721450.6667 entropy=17.6693 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 30560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473834.0 mean_steps=15.1
|
|
[Episode 30570] reward=-114240478.9 actor_loss=0.3853 critic_loss=146599163851.4872 entropy=17.6670 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 30580] reward=-118105839.8 actor_loss=0.3895 critic_loss=176747481588.6222 entropy=17.6505 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 30580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463332.6 mean_steps=14.6
|
|
[Episode 30590] reward=-114051404.5 actor_loss=0.4309 critic_loss=155665257130.6667 entropy=17.6465 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 30600] reward=-118875572.3 actor_loss=0.2369 critic_loss=157310690918.4000 entropy=17.6450 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 30600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550333.8 mean_steps=13.6
|
|
[Episode 30610] reward=-123326170.0 actor_loss=0.2535 critic_loss=158580053333.3333 entropy=17.6450 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30620] reward=-125001737.9 actor_loss=0.2018 critic_loss=159365171266.0645 entropy=17.6377 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 30620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-590351.3 mean_steps=14.7
|
|
[Episode 30630] reward=-120497824.7 actor_loss=0.2755 critic_loss=157494480804.9778 entropy=17.6348 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 30640] reward=-118946871.9 actor_loss=0.3481 critic_loss=153108592453.8182 entropy=17.6217 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 30640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560706.6 mean_steps=12.8
|
|
[Episode 30650] reward=-119594090.0 actor_loss=0.3087 critic_loss=153606517191.1111 entropy=17.6242 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 30660] reward=-116400494.9 actor_loss=0.2847 critic_loss=150662607583.1795 entropy=17.6144 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 30660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519152.6 mean_steps=13.3
|
|
[Episode 30670] reward=-121058952.3 actor_loss=0.2930 critic_loss=151013850391.2727 entropy=17.6110 approx_kl=0.0125 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 30680] reward=-125065737.6 actor_loss=0.2360 critic_loss=165602011818.6667 entropy=17.6188 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 30680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-404966.0 mean_steps=15.7
|
|
[Episode 30690] reward=-121403377.5 actor_loss=0.2343 critic_loss=150660053947.7333 entropy=17.6192 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 30700] reward=-117476463.4 actor_loss=0.2506 critic_loss=146472925696.0000 entropy=17.6154 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 30700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539914.5 mean_steps=14.4
|
|
[Episode 30710] reward=-122211260.6 actor_loss=0.2922 critic_loss=259745810064.4102 entropy=17.6237 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 30720] reward=-126865117.0 actor_loss=0.2711 critic_loss=311059110661.6889 entropy=17.6340 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 30720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501165.7 mean_steps=15.3
|
|
[Episode 30730] reward=-118940297.8 actor_loss=0.3135 critic_loss=158660553386.6667 entropy=17.6370 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 30740] reward=-115614351.4 actor_loss=0.3307 critic_loss=151481807494.7368 entropy=17.6434 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 30740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463853.5 mean_steps=16.1
|
|
[Episode 30750] reward=-114034532.2 actor_loss=0.2487 critic_loss=147170841941.3333 entropy=17.6380 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 30760] reward=-115250983.6 actor_loss=0.3029 critic_loss=152534034750.5778 entropy=17.6272 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 30760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-514183.7 mean_steps=13.3
|
|
[Episode 30770] reward=-120779052.2 actor_loss=0.2533 critic_loss=158520170177.4222 entropy=17.6406 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 30780] reward=-118748691.9 actor_loss=0.2770 critic_loss=156228361849.9048 entropy=17.6374 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 30780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545831.3 mean_steps=12.9
|
|
[Episode 30790] reward=-119149445.7 actor_loss=0.3802 critic_loss=154895688424.7273 entropy=17.6334 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 30800] reward=-122724468.2 actor_loss=0.3374 critic_loss=158682240474.5366 entropy=17.6266 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 30800] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-375348.6 mean_steps=17.7
|
|
[Episode 30810] reward=-115273373.5 actor_loss=0.3638 critic_loss=143198450119.1111 entropy=17.6201 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 30820] reward=-119857299.5 actor_loss=0.2808 critic_loss=153747143975.8222 entropy=17.6402 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 30820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437155.1 mean_steps=14.8
|
|
[Episode 30830] reward=-115087819.9 actor_loss=0.3486 critic_loss=156865078272.0000 entropy=17.6375 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30840] reward=-119531823.9 actor_loss=0.2864 critic_loss=166603644928.0000 entropy=17.6524 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 30840] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-369539.1 mean_steps=19.4
|
|
[Episode 30850] reward=-123163433.7 actor_loss=0.2833 critic_loss=161097920603.0222 entropy=17.6597 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 30860] reward=-115559838.6 actor_loss=0.3128 critic_loss=144663065941.3333 entropy=17.6460 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 30860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523963.3 mean_steps=14.4
|
|
[Episode 30870] reward=-115077808.2 actor_loss=0.3727 critic_loss=144778947788.8000 entropy=17.6655 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 30880] reward=-118284542.7 actor_loss=0.3503 critic_loss=154276889356.1905 entropy=17.6517 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 30880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560477.7 mean_steps=12.9
|
|
[Episode 30890] reward=-123999543.8 actor_loss=0.2600 critic_loss=158900001905.7778 entropy=17.6459 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 30900] reward=-112100371.2 actor_loss=0.4270 critic_loss=137638555010.8445 entropy=17.6529 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 30900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-552837.7 mean_steps=12.8
|
|
[Episode 30910] reward=-114529156.0 actor_loss=0.3021 critic_loss=144699748998.7368 entropy=17.6506 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 30920] reward=-123254788.2 actor_loss=0.3028 critic_loss=160400846475.6364 entropy=17.6521 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 30920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526946.3 mean_steps=13.2
|
|
[Episode 30930] reward=-119349059.6 actor_loss=0.2000 critic_loss=153273004851.2000 entropy=17.6444 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 30940] reward=-116733928.4 actor_loss=0.3677 critic_loss=159817894461.4400 entropy=17.6358 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 30940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427542.3 mean_steps=15.2
|
|
[Episode 30950] reward=-124611788.9 actor_loss=0.1917 critic_loss=166646825441.8824 entropy=17.6228 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 30960] reward=-119603513.2 actor_loss=0.2993 critic_loss=152888894733.4737 entropy=17.6171 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 30960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377340.8 mean_steps=16.7
|
|
[Episode 30970] reward=-117898963.0 actor_loss=0.3791 critic_loss=154760756519.8222 entropy=17.6205 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 30980] reward=-121205973.4 actor_loss=0.2942 critic_loss=159026658963.9111 entropy=17.6451 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 30980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468182.1 mean_steps=15.2
|
|
[Episode 30990] reward=-119520066.7 actor_loss=0.3577 critic_loss=156415307138.8445 entropy=17.6361 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 31000] reward=-121522804.8 actor_loss=0.3224 critic_loss=159946810254.2222 entropy=17.6468 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 31000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541671.3 mean_steps=14.3
|
|
[Episode 31010] reward=-122495626.6 actor_loss=0.2141 critic_loss=157440036329.7391 entropy=17.6403 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 31020] reward=-117668192.2 actor_loss=0.3240 critic_loss=147959223427.2820 entropy=17.6406 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 31020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410838.0 mean_steps=15.7
|
|
[Episode 31030] reward=-119792835.7 actor_loss=0.3360 critic_loss=156153498828.8000 entropy=17.6285 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 31040] reward=-115834916.3 actor_loss=0.2558 critic_loss=142414606973.1555 entropy=17.6267 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 31040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501442.9 mean_steps=14.2
|
|
[Episode 31050] reward=-119874784.9 actor_loss=0.1982 critic_loss=153014487540.6222 entropy=17.6543 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 31060] reward=-120349380.9 actor_loss=0.2245 critic_loss=156256508313.6000 entropy=17.6522 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 31060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517777.2 mean_steps=13.3
|
|
[Episode 31070] reward=-117894130.5 actor_loss=0.2869 critic_loss=151126007417.9048 entropy=17.6560 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 31080] reward=-118197188.8 actor_loss=0.3236 critic_loss=148554435361.3913 entropy=17.6528 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 31080] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-269287.9 mean_steps=17.4
|
|
[Episode 31090] reward=-110983345.4 actor_loss=0.4013 critic_loss=141990749289.9310 entropy=17.6469 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 31100] reward=-122174767.1 actor_loss=0.2865 critic_loss=161602835069.1555 entropy=17.6560 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 31100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447432.0 mean_steps=15.9
|
|
[Episode 31110] reward=-122023019.5 actor_loss=0.2605 critic_loss=160456090081.8824 entropy=17.6564 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 31120] reward=-118822910.4 actor_loss=0.3870 critic_loss=156212015344.9412 entropy=17.6446 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 31120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407446.9 mean_steps=16.7
|
|
[Episode 31130] reward=-118546193.5 actor_loss=0.2529 critic_loss=152353902807.5789 entropy=17.6492 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 31140] reward=-118568015.6 actor_loss=0.3419 critic_loss=154755014246.4000 entropy=17.6338 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 31140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-527272.2 mean_steps=15.8
|
|
[Episode 31150] reward=-122814267.9 actor_loss=0.2896 critic_loss=159221937493.3333 entropy=17.6263 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 31160] reward=-118389029.1 actor_loss=0.3257 critic_loss=149743050379.6364 entropy=17.6291 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 31160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541017.8 mean_steps=14.8
|
|
[Episode 31170] reward=-115722250.2 actor_loss=0.3402 critic_loss=147579229696.0000 entropy=17.6202 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 31180] reward=-119174986.9 actor_loss=0.2463 critic_loss=149115922432.0000 entropy=17.6296 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 31180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619712.4 mean_steps=13.6
|
|
[Episode 31190] reward=-110359376.3 actor_loss=0.3804 critic_loss=141880490914.9091 entropy=17.6297 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 31200] reward=-119579504.0 actor_loss=0.2451 critic_loss=155090677122.8445 entropy=17.6495 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 31200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435163.9 mean_steps=14.4
|
|
[Episode 31210] reward=-119459075.2 actor_loss=0.2549 critic_loss=154417778041.2632 entropy=17.6677 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 31220] reward=-122734095.7 actor_loss=0.2602 critic_loss=157073665325.1765 entropy=17.6752 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31220] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-774039.8 mean_steps=10.9
|
|
[Episode 31230] reward=-119210695.7 actor_loss=0.3273 critic_loss=156904756565.3333 entropy=17.6932 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 31240] reward=-118760222.0 actor_loss=0.2856 critic_loss=150837553834.6667 entropy=17.6948 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 31240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-417030.9 mean_steps=14.6
|
|
[Episode 31250] reward=-121320807.5 actor_loss=0.2379 critic_loss=154800699099.4286 entropy=17.6740 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 31260] reward=-123709977.6 actor_loss=0.2603 critic_loss=163094883826.8718 entropy=17.6886 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 31260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440231.4 mean_steps=14.9
|
|
[Episode 31270] reward=-116423408.8 actor_loss=0.3411 critic_loss=150766798524.6316 entropy=17.6771 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 31280] reward=-122529099.2 actor_loss=0.2895 critic_loss=164174306645.3333 entropy=17.6659 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611046.6 mean_steps=13.1
|
|
[Episode 31290] reward=-120774294.6 actor_loss=0.2901 critic_loss=153524083916.8000 entropy=17.6683 approx_kl=0.0111 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 31300] reward=-123953521.8 actor_loss=0.1630 critic_loss=158283277653.3333 entropy=17.6806 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 31300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-434350.2 mean_steps=16.6
|
|
[Episode 31310] reward=-113201835.0 actor_loss=0.3695 critic_loss=146352665629.2571 entropy=17.6644 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 31320] reward=-119722802.6 actor_loss=0.2517 critic_loss=155014158534.1935 entropy=17.6588 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 31320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-585978.8 mean_steps=11.7
|
|
[Episode 31330] reward=-122416907.4 actor_loss=0.3194 critic_loss=159116118308.5714 entropy=17.6619 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 31340] reward=-116737836.6 actor_loss=0.3214 critic_loss=148371861048.8889 entropy=17.6557 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 31340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468616.8 mean_steps=14.3
|
|
[Episode 31350] reward=-120457161.3 actor_loss=0.2698 critic_loss=152683642880.0000 entropy=17.6504 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 31360] reward=-120951336.6 actor_loss=0.2770 critic_loss=156493333904.6956 entropy=17.6366 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 31360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-506607.9 mean_steps=15.2
|
|
[Episode 31370] reward=-125955555.6 actor_loss=0.2119 critic_loss=162019226965.3333 entropy=17.6261 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 31380] reward=-114011196.9 actor_loss=0.2982 critic_loss=143650555611.4286 entropy=17.6251 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 31380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-372120.8 mean_steps=14.3
|
|
[Episode 31390] reward=-116844914.1 actor_loss=0.3269 critic_loss=149133585354.1053 entropy=17.6233 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 31400] reward=-117741213.6 actor_loss=0.2343 critic_loss=153178287445.3333 entropy=17.6128 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 31400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541850.7 mean_steps=14.4
|
|
[Episode 31410] reward=-122316751.3 actor_loss=0.2734 critic_loss=152851255296.0000 entropy=17.6025 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 31420] reward=-115650372.2 actor_loss=0.3677 critic_loss=145962952583.5294 entropy=17.6082 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 31420] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-330678.9 mean_steps=18.2
|
|
[Episode 31430] reward=-112139246.1 actor_loss=0.3154 critic_loss=145464774314.6667 entropy=17.6098 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 31440] reward=-124904317.7 actor_loss=0.2323 critic_loss=163002078208.0000 entropy=17.6167 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 31440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443612.8 mean_steps=15.3
|
|
[Episode 31450] reward=-124460803.4 actor_loss=0.3133 critic_loss=159735632817.2308 entropy=17.6146 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 31460] reward=-118438072.9 actor_loss=0.2684 critic_loss=148935565047.7419 entropy=17.6031 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 31460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534994.7 mean_steps=13.3
|
|
[Episode 31470] reward=-119439318.2 actor_loss=0.3456 critic_loss=150316818863.1579 entropy=17.6000 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 31480] reward=-120977798.0 actor_loss=0.2577 critic_loss=153533688490.6667 entropy=17.6004 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419104.1 mean_steps=15.6
|
|
[Episode 31490] reward=-122661387.4 actor_loss=0.2161 critic_loss=157194242048.0000 entropy=17.6012 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 31500] reward=-117259351.1 actor_loss=0.2801 critic_loss=150536722659.5555 entropy=17.6158 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498928.0 mean_steps=14.0
|
|
[Episode 31510] reward=-119958629.8 actor_loss=0.2936 critic_loss=151564944998.4000 entropy=17.6019 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 31520] reward=-120751726.7 actor_loss=0.3023 critic_loss=154049873920.0000 entropy=17.6072 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 31520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398197.2 mean_steps=15.6
|
|
[Episode 31530] reward=-109834489.4 actor_loss=0.3047 critic_loss=141259508184.6154 entropy=17.5867 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 31540] reward=-120000042.6 actor_loss=0.3678 critic_loss=193108735707.4286 entropy=17.5920 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 31540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566668.3 mean_steps=12.4
|
|
[Episode 31550] reward=-123067001.4 actor_loss=0.2609 critic_loss=154084636847.5428 entropy=17.5963 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 31560] reward=-119143150.6 actor_loss=0.2658 critic_loss=155970763629.7143 entropy=17.5967 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 31560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492718.9 mean_steps=15.1
|
|
[Episode 31570] reward=-119789975.4 actor_loss=0.2684 critic_loss=150857870774.8571 entropy=17.5981 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 31580] reward=-120967527.9 actor_loss=0.3972 critic_loss=156406143730.5263 entropy=17.5999 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 31580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511602.4 mean_steps=14.8
|
|
[Episode 31590] reward=-120604817.4 actor_loss=0.3209 critic_loss=158346716119.0400 entropy=17.5946 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 31600] reward=-126435888.7 actor_loss=0.1582 critic_loss=160956290389.3333 entropy=17.5966 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 31600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490350.8 mean_steps=14.8
|
|
[Episode 31610] reward=-118913557.9 actor_loss=0.2620 critic_loss=148634315161.6000 entropy=17.5958 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 31620] reward=-120399344.0 actor_loss=0.2791 critic_loss=151983585603.3684 entropy=17.5941 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492708.3 mean_steps=14.0
|
|
[Episode 31630] reward=-117423178.9 actor_loss=0.3378 critic_loss=148378183972.5714 entropy=17.5886 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 31640] reward=-119842585.0 actor_loss=0.3288 critic_loss=152644584913.4546 entropy=17.5833 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 31640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-455020.4 mean_steps=13.2
|
|
[Episode 31650] reward=-123581527.7 actor_loss=0.2065 critic_loss=164125655040.0000 entropy=17.5833 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 31660] reward=-120776476.9 actor_loss=0.2308 critic_loss=150122784085.3333 entropy=17.5891 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 31660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-466565.4 mean_steps=16.3
|
|
[Episode 31670] reward=-124662414.1 actor_loss=0.3069 critic_loss=161154155941.6471 entropy=17.5900 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 31680] reward=-123166152.5 actor_loss=0.2681 critic_loss=159221637120.0000 entropy=17.6127 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 31680] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-368440.6 mean_steps=17.9
|
|
[Episode 31690] reward=-121116459.5 actor_loss=0.3024 critic_loss=154531450060.8000 entropy=17.6219 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 31700] reward=-116828286.5 actor_loss=0.2981 critic_loss=150642957854.1176 entropy=17.6376 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-342320.1 mean_steps=18.1
|
|
[Episode 31710] reward=-124595231.4 actor_loss=0.2764 critic_loss=157822760029.0909 entropy=17.6429 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 31720] reward=-123260742.7 actor_loss=0.1899 critic_loss=159202003482.9474 entropy=17.6415 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 31720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472845.0 mean_steps=15.6
|
|
[Episode 31730] reward=-117219305.1 actor_loss=0.3472 critic_loss=149677060336.9412 entropy=17.6368 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 31740] reward=-118477292.4 actor_loss=0.3060 critic_loss=148009865431.5789 entropy=17.6303 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 31740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580673.7 mean_steps=13.9
|
|
[Episode 31750] reward=-120440132.9 actor_loss=0.2623 critic_loss=155164664438.1538 entropy=17.6321 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 31760] reward=-120610717.3 actor_loss=0.2658 critic_loss=163849147733.3333 entropy=17.6362 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 31760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489149.2 mean_steps=15.1
|
|
[Episode 31770] reward=-118648384.0 actor_loss=0.3487 critic_loss=152554398956.3077 entropy=17.6261 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 31780] reward=-115482198.1 actor_loss=0.2517 critic_loss=145646682824.3478 entropy=17.6217 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 31780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570884.6 mean_steps=13.8
|
|
[Episode 31790] reward=-116202687.6 actor_loss=0.4025 critic_loss=151435167061.3333 entropy=17.6211 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 31800] reward=-121398749.8 actor_loss=0.3133 critic_loss=154694032497.7778 entropy=17.6184 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 31800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577561.8 mean_steps=13.7
|
|
[Episode 31810] reward=-121872331.9 actor_loss=0.3481 critic_loss=157829332445.8667 entropy=17.6358 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 31820] reward=-116854602.0 actor_loss=0.2768 critic_loss=147648062681.2121 entropy=17.6346 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 31820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-422687.2 mean_steps=17.6
|
|
[Episode 31830] reward=-120631039.8 actor_loss=0.2553 critic_loss=157646017877.3333 entropy=17.6433 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 31840] reward=-120057268.0 actor_loss=0.2549 critic_loss=154484837229.7143 entropy=17.6390 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 31840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497506.3 mean_steps=14.9
|
|
[Episode 31850] reward=-120417346.5 actor_loss=0.3473 critic_loss=156338902357.3333 entropy=17.6298 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 31860] reward=-119186812.8 actor_loss=0.2983 critic_loss=152693711238.0952 entropy=17.6207 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 31860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452786.6 mean_steps=15.7
|
|
[Episode 31870] reward=-123161350.9 actor_loss=0.2571 critic_loss=159398151782.4000 entropy=17.6271 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 31880] reward=-114985116.7 actor_loss=0.2503 critic_loss=146992054800.5161 entropy=17.6380 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 31880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457720.2 mean_steps=14.6
|
|
[Episode 31890] reward=-118152068.2 actor_loss=0.4048 critic_loss=150842140964.5714 entropy=17.6449 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 31900] reward=-126584788.8 actor_loss=0.2005 critic_loss=159441819461.8182 entropy=17.6487 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 31900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581346.8 mean_steps=13.7
|
|
[Episode 31910] reward=-120298900.7 actor_loss=0.3543 critic_loss=154065295769.6000 entropy=17.6500 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 31920] reward=-120326599.0 actor_loss=0.3020 critic_loss=155510727566.2222 entropy=17.6543 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 31920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480790.4 mean_steps=14.6
|
|
[Episode 31930] reward=-124125647.1 actor_loss=0.2639 critic_loss=170970677729.8824 entropy=17.6444 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 31940] reward=-122693314.0 actor_loss=0.2846 critic_loss=159850823680.0000 entropy=17.6517 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 31940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532346.3 mean_steps=13.9
|
|
[Episode 31950] reward=-116769788.0 actor_loss=0.2873 critic_loss=151758355543.7714 entropy=17.6599 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 31960] reward=-122640992.8 actor_loss=0.2412 critic_loss=204430403584.0000 entropy=17.6765 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 31960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478167.4 mean_steps=15.1
|
|
[Episode 31970] reward=-123709866.3 actor_loss=0.2260 critic_loss=162627957356.6060 entropy=17.6841 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 31980] reward=-116295534.6 actor_loss=0.4520 critic_loss=147015322880.0000 entropy=17.6791 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Eval 31980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527751.1 mean_steps=14.6
|
|
[Episode 31990] reward=-119764459.1 actor_loss=0.3067 critic_loss=157604963151.4483 entropy=17.6840 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 32000] reward=-119378862.2 actor_loss=0.2872 critic_loss=161827918002.0869 entropy=17.6953 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 32000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596866.5 mean_steps=14.0
|
|
[Episode 32010] reward=-123120171.0 actor_loss=0.2583 critic_loss=158243592794.3529 entropy=17.7005 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 32020] reward=-118198434.7 actor_loss=0.2648 critic_loss=148636703493.6889 entropy=17.6993 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 32020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-436507.3 mean_steps=14.6
|
|
[Episode 32030] reward=-120379653.2 actor_loss=0.3115 critic_loss=157377142784.0000 entropy=17.6859 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 32040] reward=-124552220.9 actor_loss=0.2560 critic_loss=157251963866.0741 entropy=17.6932 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 32040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554068.7 mean_steps=13.3
|
|
[Episode 32050] reward=-119232685.4 actor_loss=0.3382 critic_loss=151940841472.0000 entropy=17.6802 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 32060] reward=-123364246.8 actor_loss=0.2233 critic_loss=157268313788.6316 entropy=17.6770 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 32060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-459772.0 mean_steps=13.8
|
|
[Episode 32070] reward=-121404879.8 actor_loss=0.2893 critic_loss=159656176298.6667 entropy=17.6804 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 32080] reward=-122255550.8 actor_loss=0.2032 critic_loss=155878627009.4222 entropy=17.6988 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 32080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548524.2 mean_steps=13.8
|
|
[Episode 32090] reward=-119830626.9 actor_loss=0.4184 critic_loss=161593551257.6000 entropy=17.6987 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 32100] reward=-122831790.7 actor_loss=0.2225 critic_loss=156894591096.4706 entropy=17.6870 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 32100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460459.8 mean_steps=14.0
|
|
[Episode 32110] reward=-118022728.6 actor_loss=0.3007 critic_loss=151679598774.0444 entropy=17.6933 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 32120] reward=-120187395.6 actor_loss=0.2564 critic_loss=150761000406.4865 entropy=17.6781 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 32120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477619.2 mean_steps=14.9
|
|
[Episode 32130] reward=-119427167.4 actor_loss=0.2251 critic_loss=151366025580.0889 entropy=17.6803 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 32140] reward=-117751056.4 actor_loss=0.2525 critic_loss=150020056485.6471 entropy=17.6853 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-652967.0 mean_steps=13.6
|
|
[Episode 32150] reward=-119054329.8 actor_loss=0.2668 critic_loss=151514758204.2353 entropy=17.6805 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 32160] reward=-121600540.0 actor_loss=0.2648 critic_loss=165532826062.4516 entropy=17.6917 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 32160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-568763.6 mean_steps=12.8
|
|
[Episode 32170] reward=-120991278.5 actor_loss=0.2713 critic_loss=157708900165.8182 entropy=17.7030 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 32180] reward=-122739128.7 actor_loss=0.2924 critic_loss=184478154752.0000 entropy=17.6849 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 32180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472270.1 mean_steps=15.2
|
|
[Episode 32190] reward=-123212606.9 actor_loss=0.3603 critic_loss=159920213924.9778 entropy=17.6784 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 32200] reward=-120712877.5 actor_loss=0.3262 critic_loss=155234108254.3158 entropy=17.6940 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 32200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437767.5 mean_steps=15.9
|
|
[Episode 32210] reward=-119484536.5 actor_loss=0.2776 critic_loss=155216604066.9091 entropy=17.6900 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 32220] reward=-120639297.2 actor_loss=0.3870 critic_loss=190252322652.1600 entropy=17.6899 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 32220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402741.5 mean_steps=16.9
|
|
[Episode 32230] reward=-117355923.3 actor_loss=0.3258 critic_loss=150835647186.8235 entropy=17.6893 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 32240] reward=-115846493.0 actor_loss=0.2917 critic_loss=147670901217.8824 entropy=17.6916 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 32240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458049.5 mean_steps=16.3
|
|
[Episode 32250] reward=-117157780.6 actor_loss=0.4178 critic_loss=156780112802.9091 entropy=17.6989 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 32260] reward=-115305148.1 actor_loss=0.3233 critic_loss=143721186878.4390 entropy=17.6981 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 32260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477482.2 mean_steps=14.8
|
|
[Episode 32270] reward=-117723768.7 actor_loss=0.3127 critic_loss=153697186909.0909 entropy=17.6793 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 32280] reward=-115062797.4 actor_loss=0.2776 critic_loss=151534177484.8000 entropy=17.6791 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 32280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-430518.5 mean_steps=16.7
|
|
[Episode 32290] reward=-115853019.5 actor_loss=0.3620 critic_loss=150075943384.6154 entropy=17.6726 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 32300] reward=-116767118.9 actor_loss=0.3119 critic_loss=147425974317.5111 entropy=17.6667 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 32300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490376.7 mean_steps=14.2
|
|
[Episode 32310] reward=-116375951.6 actor_loss=0.4125 critic_loss=150127008699.7333 entropy=17.6571 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 32320] reward=-120745473.5 actor_loss=0.2158 critic_loss=155003474678.5185 entropy=17.6620 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 32320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459384.9 mean_steps=15.3
|
|
[Episode 32330] reward=-120518800.6 actor_loss=0.2469 critic_loss=155906643285.3333 entropy=17.6574 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 32340] reward=-117771366.5 actor_loss=0.3217 critic_loss=147566176135.5294 entropy=17.6620 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 32340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351368.3 mean_steps=16.1
|
|
[Episode 32350] reward=-111681509.4 actor_loss=0.2072 critic_loss=138243031222.0444 entropy=17.6690 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 32360] reward=-118307392.8 actor_loss=0.3679 critic_loss=151278301804.6060 entropy=17.6572 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 32360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523876.5 mean_steps=13.3
|
|
[Episode 32370] reward=-115443082.5 actor_loss=0.2611 critic_loss=150268335809.4222 entropy=17.6680 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 32380] reward=-110776049.6 actor_loss=0.4868 critic_loss=143221566610.2857 entropy=17.6744 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 32380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569505.6 mean_steps=12.6
|
|
[Episode 32390] reward=-121720131.7 actor_loss=0.1793 critic_loss=151717207582.1176 entropy=17.6703 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 32400] reward=-118210389.7 actor_loss=0.2596 critic_loss=152527521978.1818 entropy=17.6634 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574644.5 mean_steps=12.9
|
|
[Episode 32410] reward=-123962697.9 actor_loss=0.2213 critic_loss=161603659776.0000 entropy=17.6593 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 32420] reward=-122084402.2 actor_loss=0.2866 critic_loss=154961653304.8889 entropy=17.6592 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 32420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396337.1 mean_steps=15.8
|
|
[Episode 32430] reward=-117511329.9 actor_loss=0.3436 critic_loss=149702334951.6190 entropy=17.6643 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 32440] reward=-116829656.3 actor_loss=0.3263 critic_loss=149129956556.8000 entropy=17.6550 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 32440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-399351.1 mean_steps=16.1
|
|
[Episode 32450] reward=-117799975.9 actor_loss=0.2176 critic_loss=144203369472.0000 entropy=17.6549 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 32460] reward=-122102448.8 actor_loss=0.2689 critic_loss=156813217336.8889 entropy=17.6590 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 32460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423380.0 mean_steps=14.6
|
|
[Episode 32470] reward=-121883889.4 actor_loss=0.1728 critic_loss=153497164185.6000 entropy=17.6568 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 32480] reward=-120780462.7 actor_loss=0.3156 critic_loss=157677607321.6000 entropy=17.6678 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 32480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-396472.6 mean_steps=17.4
|
|
[Episode 32490] reward=-117396489.8 actor_loss=0.3683 critic_loss=162165006767.1579 entropy=17.6595 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 32500] reward=-120401707.5 actor_loss=0.2279 critic_loss=156158485504.0000 entropy=17.6657 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457505.9 mean_steps=16.1
|
|
[Episode 32510] reward=-119970509.0 actor_loss=0.2963 critic_loss=164277479014.4000 entropy=17.6696 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 32520] reward=-121201787.4 actor_loss=0.2827 critic_loss=162093648802.9091 entropy=17.6629 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 32520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394642.8 mean_steps=15.4
|
|
[Episode 32530] reward=-116097617.8 actor_loss=0.2729 critic_loss=149340696120.8889 entropy=17.6687 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 32540] reward=-121744266.4 actor_loss=0.3133 critic_loss=154814111262.1176 entropy=17.6688 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 32540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469331.5 mean_steps=14.4
|
|
[Episode 32550] reward=-120206259.8 actor_loss=0.3009 critic_loss=160829108758.2609 entropy=17.6578 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 32560] reward=-117341195.3 actor_loss=0.3236 critic_loss=145824696222.4762 entropy=17.6640 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 32560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483670.2 mean_steps=15.1
|
|
[Episode 32570] reward=-119952291.3 actor_loss=0.3385 critic_loss=153530322488.8889 entropy=17.6813 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 32580] reward=-121261527.6 actor_loss=0.3190 critic_loss=151425516339.2000 entropy=17.6739 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 32580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576502.9 mean_steps=14.2
|
|
[Episode 32590] reward=-117944690.8 actor_loss=0.3853 critic_loss=155018304065.6410 entropy=17.6714 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 32600] reward=-117157627.6 actor_loss=0.3009 critic_loss=154165941589.3333 entropy=17.6752 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 32600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574940.2 mean_steps=14.1
|
|
[Episode 32610] reward=-117762624.6 actor_loss=0.2934 critic_loss=148463506525.0909 entropy=17.6813 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 32620] reward=-112836990.5 actor_loss=0.3563 critic_loss=144929863530.1463 entropy=17.6772 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 32620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459986.5 mean_steps=15.5
|
|
[Episode 32630] reward=-121129180.7 actor_loss=0.2734 critic_loss=152863205218.4615 entropy=17.6663 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 32640] reward=-118773731.0 actor_loss=0.2492 critic_loss=152105802865.7778 entropy=17.6814 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506782.3 mean_steps=14.3
|
|
[Episode 32650] reward=-111220325.7 actor_loss=0.3649 critic_loss=141608436986.3111 entropy=17.6873 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 32660] reward=-120421032.3 actor_loss=0.2593 critic_loss=161079413760.0000 entropy=17.6816 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378116.8 mean_steps=16.3
|
|
[Episode 32670] reward=-120922667.0 actor_loss=0.3105 critic_loss=153304212366.2222 entropy=17.6743 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 32680] reward=-117173825.0 actor_loss=0.2313 critic_loss=154736548717.7143 entropy=17.6706 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 32680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589717.5 mean_steps=13.6
|
|
[Episode 32690] reward=-120696408.2 actor_loss=0.2222 critic_loss=154639335424.0000 entropy=17.6782 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 32700] reward=-113274890.4 actor_loss=0.2924 critic_loss=146145157120.0000 entropy=17.6869 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 32700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429063.8 mean_steps=15.8
|
|
[Episode 32710] reward=-121129945.8 actor_loss=0.1719 critic_loss=153108894913.7297 entropy=17.6751 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 32720] reward=-113207041.0 actor_loss=0.3134 critic_loss=142440446464.0000 entropy=17.6781 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 32720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545370.6 mean_steps=13.9
|
|
[Episode 32730] reward=-124712525.3 actor_loss=0.2816 critic_loss=164657197511.1111 entropy=17.6722 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 32740] reward=-118037822.2 actor_loss=0.3173 critic_loss=146917420236.8000 entropy=17.6728 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 32740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465017.9 mean_steps=14.8
|
|
[Episode 32750] reward=-119306453.6 actor_loss=0.3217 critic_loss=153122969413.8182 entropy=17.6666 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 32760] reward=-113310899.5 actor_loss=0.2431 critic_loss=149088701293.7143 entropy=17.6707 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 32760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349535.5 mean_steps=17.1
|
|
[Episode 32770] reward=-116497006.0 actor_loss=0.2271 critic_loss=146850790679.2727 entropy=17.6717 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 32780] reward=-110594738.5 actor_loss=0.3081 critic_loss=140567397351.0244 entropy=17.6826 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422634.9 mean_steps=16.9
|
|
[Episode 32790] reward=-117100508.9 actor_loss=0.3025 critic_loss=151748237721.6000 entropy=17.6759 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 32800] reward=-115470782.6 actor_loss=0.3027 critic_loss=154764923997.0909 entropy=17.6581 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 32800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413858.8 mean_steps=16.9
|
|
[Episode 32810] reward=-121062814.5 actor_loss=0.2556 critic_loss=158765487133.2571 entropy=17.6534 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 32820] reward=-122928195.9 actor_loss=0.1766 critic_loss=158451405619.2000 entropy=17.6571 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 32820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-506724.1 mean_steps=15.2
|
|
[Episode 32830] reward=-116755136.6 actor_loss=0.1859 critic_loss=151375388672.0000 entropy=17.6607 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 32840] reward=-117525507.0 actor_loss=0.2864 critic_loss=151932042899.9111 entropy=17.6609 approx_kl=0.0103 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 32840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538947.8 mean_steps=14.3
|
|
[Episode 32850] reward=-123516917.8 actor_loss=0.3002 critic_loss=158250504192.0000 entropy=17.6588 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 32860] reward=-122015309.6 actor_loss=0.2767 critic_loss=160012021174.8571 entropy=17.6602 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 32860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395120.1 mean_steps=16.8
|
|
[Episode 32870] reward=-117160066.3 actor_loss=0.3587 critic_loss=151871650876.2353 entropy=17.6512 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 32880] reward=-119920331.6 actor_loss=0.2661 critic_loss=151944709120.0000 entropy=17.6558 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 32880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-402722.2 mean_steps=14.7
|
|
[Episode 32890] reward=-122161247.6 actor_loss=0.2441 critic_loss=157028191609.2632 entropy=17.6665 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 32900] reward=-122168495.7 actor_loss=0.3364 critic_loss=154479481978.8800 entropy=17.6614 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 32900] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-187662.7 mean_steps=18.2
|
|
[Episode 32910] reward=-121106195.6 actor_loss=0.2872 critic_loss=153281745169.0667 entropy=17.6477 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 32920] reward=-114408108.0 actor_loss=0.3738 critic_loss=141288568331.3778 entropy=17.6435 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 32920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637993.8 mean_steps=12.2
|
|
[Episode 32930] reward=-124069224.2 actor_loss=0.2790 critic_loss=157161942944.7442 entropy=17.6430 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 32940] reward=-120602158.3 actor_loss=0.2504 critic_loss=154834662951.3846 entropy=17.6440 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 32940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-676160.9 mean_steps=11.9
|
|
[Episode 32950] reward=-119544048.7 actor_loss=0.2610 critic_loss=149757520956.2353 entropy=17.6469 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 32960] reward=-122634393.8 actor_loss=0.3179 critic_loss=155263665438.7200 entropy=17.6461 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 32960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360091.4 mean_steps=16.1
|
|
[Episode 32970] reward=-123399441.6 actor_loss=0.3343 critic_loss=159395920164.5714 entropy=17.6544 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 32980] reward=-115587184.6 actor_loss=0.2813 critic_loss=149254021551.1579 entropy=17.6187 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 32980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470143.3 mean_steps=15.2
|
|
[Episode 32990] reward=-119598001.2 actor_loss=0.2955 critic_loss=154354808452.7408 entropy=17.6107 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33000] reward=-118183415.4 actor_loss=0.3731 critic_loss=154091260705.3913 entropy=17.6162 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 33000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407948.8 mean_steps=16.6
|
|
[Episode 33010] reward=-115538199.0 actor_loss=0.3119 critic_loss=142094974976.0000 entropy=17.6182 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33020] reward=-121362259.4 actor_loss=0.3250 critic_loss=156229124096.0000 entropy=17.6127 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 33020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540419.9 mean_steps=13.5
|
|
[Episode 33030] reward=-122937434.5 actor_loss=0.1915 critic_loss=153638905173.3333 entropy=17.6060 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 33040] reward=-118782166.4 actor_loss=0.3423 critic_loss=159121628553.8462 entropy=17.6001 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 33040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469011.7 mean_steps=14.6
|
|
[Episode 33050] reward=-118968005.8 actor_loss=0.3362 critic_loss=152063649867.8518 entropy=17.6043 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 33060] reward=-119779173.0 actor_loss=0.1782 critic_loss=152495090723.3103 entropy=17.6045 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 33060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392637.9 mean_steps=16.3
|
|
[Episode 33070] reward=-122819818.7 actor_loss=0.2895 critic_loss=157596275143.1111 entropy=17.6075 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 33080] reward=-128253502.2 actor_loss=0.3218 critic_loss=170273502108.9032 entropy=17.6060 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 33080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-343720.8 mean_steps=15.8
|
|
[Episode 33090] reward=-122683917.6 actor_loss=0.2828 critic_loss=156166956646.4000 entropy=17.6033 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 33100] reward=-117974565.8 actor_loss=0.2461 critic_loss=149691837741.1765 entropy=17.6022 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 33100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537812.9 mean_steps=13.5
|
|
[Episode 33110] reward=-118834812.2 actor_loss=0.2432 critic_loss=146257290308.2667 entropy=17.6073 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 33120] reward=-122963709.5 actor_loss=0.2576 critic_loss=156984716194.9091 entropy=17.6147 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 33120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-624891.9 mean_steps=13.1
|
|
[Episode 33130] reward=-117027969.7 actor_loss=0.4237 critic_loss=149479224964.7408 entropy=17.6218 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 33140] reward=-121344060.4 actor_loss=0.3637 critic_loss=156570573027.5555 entropy=17.6336 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 33140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-360100.8 mean_steps=17.4
|
|
[Episode 33150] reward=-118568939.9 actor_loss=0.2982 critic_loss=147697967104.0000 entropy=17.6403 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33160] reward=-118153848.7 actor_loss=0.2842 critic_loss=148661219523.0476 entropy=17.6416 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 33160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518070.4 mean_steps=15.6
|
|
[Episode 33170] reward=-120782912.3 actor_loss=0.2570 critic_loss=154481986937.2632 entropy=17.6485 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 33180] reward=-123840198.0 actor_loss=0.2354 critic_loss=156573249677.2414 entropy=17.6485 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 33180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-563442.3 mean_steps=14.9
|
|
[Episode 33190] reward=-124660023.1 actor_loss=0.2910 critic_loss=162219400192.0000 entropy=17.6526 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33200] reward=-122657847.0 actor_loss=0.2120 critic_loss=166827112510.0606 entropy=17.6490 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 33200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487437.8 mean_steps=15.3
|
|
[Episode 33210] reward=-116444214.9 actor_loss=0.2515 critic_loss=146989838336.0000 entropy=17.6560 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 33220] reward=-122522832.8 actor_loss=0.3232 critic_loss=158338917420.5217 entropy=17.6730 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 33220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537177.2 mean_steps=14.3
|
|
[Episode 33230] reward=-117816884.6 actor_loss=0.3364 critic_loss=155330578350.0800 entropy=17.6803 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 33240] reward=-129404344.8 actor_loss=0.2632 critic_loss=167861186104.8889 entropy=17.6731 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 33240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417814.9 mean_steps=15.8
|
|
[Episode 33250] reward=-118071718.3 actor_loss=0.3418 critic_loss=151185746250.3226 entropy=17.6594 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 33260] reward=-114657844.1 actor_loss=0.3275 critic_loss=147412865609.1429 entropy=17.6665 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 33260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-392278.9 mean_steps=17.2
|
|
[Episode 33270] reward=-117192008.7 actor_loss=0.2377 critic_loss=153937477914.4828 entropy=17.6698 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 33280] reward=-119965390.0 actor_loss=0.2694 critic_loss=154454811945.2903 entropy=17.6644 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 33280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-638390.8 mean_steps=12.1
|
|
[Episode 33290] reward=-124577540.4 actor_loss=0.1806 critic_loss=157216830727.3143 entropy=17.6631 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 33300] reward=-116108395.5 actor_loss=0.3262 critic_loss=151434216387.7647 entropy=17.6602 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 33300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549126.7 mean_steps=14.8
|
|
[Episode 33310] reward=-120186632.3 actor_loss=0.2165 critic_loss=150759115161.6000 entropy=17.6667 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 33320] reward=-119012761.7 actor_loss=0.3725 critic_loss=148910363209.1429 entropy=17.6680 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 33320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450564.7 mean_steps=15.8
|
|
[Episode 33330] reward=-118323333.1 actor_loss=0.2380 critic_loss=149340479223.7419 entropy=17.6708 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 33340] reward=-118655862.3 actor_loss=0.3095 critic_loss=157458873986.9767 entropy=17.6778 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 33340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349000.6 mean_steps=17.0
|
|
[Episode 33350] reward=-117580467.4 actor_loss=0.3233 critic_loss=148427532288.0000 entropy=17.6784 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 33360] reward=-118070253.8 actor_loss=0.3437 critic_loss=147037668588.3077 entropy=17.6752 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 33360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-543399.6 mean_steps=14.2
|
|
[Episode 33370] reward=-118464508.1 actor_loss=0.3806 critic_loss=149340275712.0000 entropy=17.6884 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 33380] reward=-124337407.7 actor_loss=0.3284 critic_loss=157270986536.4211 entropy=17.6815 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 33380] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-281710.6 mean_steps=17.4
|
|
[Episode 33390] reward=-122363650.9 actor_loss=0.3050 critic_loss=157526020004.9778 entropy=17.6841 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 33400] reward=-121449052.2 actor_loss=0.2341 critic_loss=157726446569.2444 entropy=17.6901 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 33400] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-318761.9 mean_steps=17.6
|
|
[Episode 33410] reward=-120404908.7 actor_loss=0.2486 critic_loss=151936553332.3636 entropy=17.6939 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 33420] reward=-121321562.1 actor_loss=0.2403 critic_loss=152802810105.7561 entropy=17.6843 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 33420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590493.4 mean_steps=13.7
|
|
[Episode 33430] reward=-118338666.9 actor_loss=0.2798 critic_loss=149516233386.6667 entropy=17.6998 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 33440] reward=-122311329.0 actor_loss=0.2581 critic_loss=156876505088.0000 entropy=17.6936 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 33440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504698.6 mean_steps=14.1
|
|
[Episode 33450] reward=-121490808.7 actor_loss=0.3912 critic_loss=155366907904.0000 entropy=17.7047 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 33460] reward=-121084133.8 actor_loss=0.3007 critic_loss=159646387541.3333 entropy=17.7078 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 33460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507827.2 mean_steps=13.2
|
|
[Episode 33470] reward=-114996974.0 actor_loss=0.3995 critic_loss=145902072445.1555 entropy=17.7139 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 33480] reward=-115199806.2 actor_loss=0.3494 critic_loss=147699527875.0476 entropy=17.7141 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 33480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458258.2 mean_steps=15.1
|
|
[Episode 33490] reward=-119585119.2 actor_loss=0.2812 critic_loss=152938864375.7419 entropy=17.7195 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 33500] reward=-120907326.9 actor_loss=0.3524 critic_loss=152973252289.4222 entropy=17.7100 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 33500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481202.9 mean_steps=14.1
|
|
[Episode 33510] reward=-117393718.2 actor_loss=0.2508 critic_loss=150091385405.4400 entropy=17.6995 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 33520] reward=-117035997.2 actor_loss=0.2918 critic_loss=154957455360.0000 entropy=17.7085 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 33520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451777.5 mean_steps=15.0
|
|
[Episode 33530] reward=-116508951.1 actor_loss=0.3362 critic_loss=147414476288.0000 entropy=17.7048 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 33540] reward=-121906563.9 actor_loss=0.3342 critic_loss=156047104773.6889 entropy=17.7073 approx_kl=0.0055 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 33540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-343601.9 mean_steps=15.9
|
|
[Episode 33550] reward=-123407963.0 actor_loss=0.2454 critic_loss=155650700083.2000 entropy=17.7033 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 33560] reward=-117804048.6 actor_loss=0.2908 critic_loss=148371610737.7778 entropy=17.6858 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 33560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-364204.2 mean_steps=15.3
|
|
[Episode 33570] reward=-119611027.8 actor_loss=0.2602 critic_loss=154973234062.2222 entropy=17.6885 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 33580] reward=-119170835.5 actor_loss=0.3417 critic_loss=152991406762.6667 entropy=17.6959 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 33580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417082.4 mean_steps=16.7
|
|
[Episode 33590] reward=-111740220.2 actor_loss=0.3258 critic_loss=138397962240.0000 entropy=17.6900 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 33600] reward=-117946515.8 actor_loss=0.1740 critic_loss=151190729386.6667 entropy=17.6935 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 33600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521471.9 mean_steps=14.7
|
|
[Episode 33610] reward=-120750779.3 actor_loss=0.3477 critic_loss=160804805395.6923 entropy=17.6802 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 33620] reward=-119897298.3 actor_loss=0.2722 critic_loss=157328476296.5333 entropy=17.6945 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 33620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-363168.8 mean_steps=16.3
|
|
[Episode 33630] reward=-115904222.1 actor_loss=0.3267 critic_loss=147196552442.3111 entropy=17.7000 approx_kl=0.0108 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 33640] reward=-117598727.3 actor_loss=0.2402 critic_loss=145897486090.2400 entropy=17.7166 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 33640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-609131.5 mean_steps=13.3
|
|
[Episode 33650] reward=-115554447.3 actor_loss=0.2805 critic_loss=146019538625.4222 entropy=17.6965 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 33660] reward=-115521577.1 actor_loss=0.3085 critic_loss=149242744964.1290 entropy=17.6881 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 33660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484424.8 mean_steps=15.5
|
|
[Episode 33670] reward=-119179888.9 actor_loss=0.2896 critic_loss=150147047033.9048 entropy=17.6826 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 33680] reward=-120560311.9 actor_loss=0.2704 critic_loss=153246317706.3784 entropy=17.7018 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 33680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-346192.0 mean_steps=17.6
|
|
[Episode 33690] reward=-118368955.3 actor_loss=0.3339 critic_loss=151098423919.3044 entropy=17.7050 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 33700] reward=-120066671.9 actor_loss=0.2884 critic_loss=150345880991.1351 entropy=17.6993 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 33700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543586.6 mean_steps=13.5
|
|
[Episode 33710] reward=-124556831.9 actor_loss=0.2846 critic_loss=168747017604.4138 entropy=17.7016 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33720] reward=-116442732.5 actor_loss=0.3241 critic_loss=149857373024.7111 entropy=17.7079 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 33720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529961.3 mean_steps=14.6
|
|
[Episode 33730] reward=-119978314.0 actor_loss=0.3077 critic_loss=152526277451.2941 entropy=17.7083 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 33740] reward=-120533561.2 actor_loss=0.2367 critic_loss=156503024360.7273 entropy=17.7088 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 33740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-653188.5 mean_steps=11.8
|
|
[Episode 33750] reward=-118137618.5 actor_loss=0.1967 critic_loss=145447072381.1555 entropy=17.7115 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 33760] reward=-114915423.3 actor_loss=0.2840 critic_loss=141868941552.9412 entropy=17.7215 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 33760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-406064.9 mean_steps=15.6
|
|
[Episode 33770] reward=-124267448.7 actor_loss=0.2206 critic_loss=162892286464.0000 entropy=17.7230 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 33780] reward=-121161768.1 actor_loss=0.2175 critic_loss=155075650082.1333 entropy=17.7335 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 33780] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-354166.3 mean_steps=17.4
|
|
[Episode 33790] reward=-118801770.2 actor_loss=0.2403 critic_loss=151883035158.2609 entropy=17.7222 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 33800] reward=-120785356.7 actor_loss=0.2252 critic_loss=153858316180.2105 entropy=17.7360 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 33800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626983.5 mean_steps=11.9
|
|
[Episode 33810] reward=-118215371.4 actor_loss=0.2974 critic_loss=146040622080.0000 entropy=17.7332 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33820] reward=-114629610.9 actor_loss=0.2508 critic_loss=149274819047.6190 entropy=17.7217 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 33820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-640999.8 mean_steps=13.5
|
|
[Episode 33830] reward=-120380899.7 actor_loss=0.2340 critic_loss=153214748113.4546 entropy=17.7142 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 33840] reward=-116959187.6 actor_loss=0.3083 critic_loss=149116672577.6410 entropy=17.7160 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 33840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407145.1 mean_steps=16.6
|
|
[Episode 33850] reward=-116882585.0 actor_loss=0.2865 critic_loss=148958610500.2667 entropy=17.7126 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 33860] reward=-117944227.2 actor_loss=0.2993 critic_loss=150598426441.9556 entropy=17.6846 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 33860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623943.7 mean_steps=12.2
|
|
[Episode 33870] reward=-122179244.0 actor_loss=0.2829 critic_loss=160900263757.9131 entropy=17.6811 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 33880] reward=-122830419.8 actor_loss=0.2558 critic_loss=158565914669.5111 entropy=17.6724 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 33880] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-290117.4 mean_steps=17.9
|
|
[Episode 33890] reward=-120144870.7 actor_loss=0.3026 critic_loss=151269301288.9600 entropy=17.6708 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 33900] reward=-123567938.0 actor_loss=0.2921 critic_loss=154874343739.0769 entropy=17.6726 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 33900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501054.8 mean_steps=13.4
|
|
[Episode 33910] reward=-117215167.1 actor_loss=0.4129 critic_loss=150147161829.5172 entropy=17.6605 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 33920] reward=-117675985.6 actor_loss=0.2644 critic_loss=152668458866.7586 entropy=17.6628 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 33920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473356.8 mean_steps=14.2
|
|
[Episode 33930] reward=-116443428.7 actor_loss=0.2430 critic_loss=152932491745.8824 entropy=17.6618 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 33940] reward=-121015431.4 actor_loss=0.2379 critic_loss=154049942528.0000 entropy=17.6494 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 33940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450307.8 mean_steps=13.9
|
|
[Episode 33950] reward=-119291250.5 actor_loss=0.2473 critic_loss=151223789158.4000 entropy=17.6297 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 33960] reward=-114101065.3 actor_loss=0.3554 critic_loss=143350496987.4286 entropy=17.6215 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 33960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581360.2 mean_steps=13.8
|
|
[Episode 33970] reward=-116984922.8 actor_loss=0.3000 critic_loss=158496445644.8000 entropy=17.6207 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 33980] reward=-118020078.2 actor_loss=0.2018 critic_loss=154640299493.0526 entropy=17.6080 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 33980] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-261928.5 mean_steps=16.6
|
|
[Episode 33990] reward=-122162607.4 actor_loss=0.2920 critic_loss=157560860672.0000 entropy=17.6073 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 34000] reward=-121654766.7 actor_loss=0.2383 critic_loss=154985883045.6471 entropy=17.6060 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 34000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510628.3 mean_steps=14.2
|
|
[Episode 34010] reward=-116912271.8 actor_loss=0.3204 critic_loss=144860454456.8889 entropy=17.5986 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 34020] reward=-120027095.0 actor_loss=0.1825 critic_loss=148586614889.0256 entropy=17.5936 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 34020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389709.5 mean_steps=16.2
|
|
[Episode 34030] reward=-116612950.8 actor_loss=0.2143 critic_loss=147341926400.0000 entropy=17.5951 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 34040] reward=-116982087.7 actor_loss=0.2220 critic_loss=148649757144.6154 entropy=17.5752 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 34040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444192.7 mean_steps=14.6
|
|
[Episode 34050] reward=-121881558.6 actor_loss=0.2229 critic_loss=161915863752.3478 entropy=17.5710 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 34060] reward=-118109717.5 actor_loss=0.2475 critic_loss=145780088597.9429 entropy=17.5716 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 34060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479421.2 mean_steps=15.7
|
|
[Episode 34070] reward=-118526375.2 actor_loss=0.2961 critic_loss=145322505947.4286 entropy=17.5805 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 34080] reward=-111456007.8 actor_loss=0.2975 critic_loss=136968358689.3913 entropy=17.5866 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 34080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430901.7 mean_steps=15.2
|
|
[Episode 34090] reward=-121293349.0 actor_loss=0.3053 critic_loss=157682171904.0000 entropy=17.5853 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 34100] reward=-118123458.6 actor_loss=0.2882 critic_loss=145100655360.0000 entropy=17.5783 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 34100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453842.5 mean_steps=15.6
|
|
[Episode 34110] reward=-114171729.5 actor_loss=0.3569 critic_loss=153082744246.8571 entropy=17.5745 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 34120] reward=-118364364.2 actor_loss=0.2368 critic_loss=153378359296.0000 entropy=17.5956 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 34120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423332.0 mean_steps=15.6
|
|
[Episode 34130] reward=-120386130.8 actor_loss=0.2706 critic_loss=150058489173.3333 entropy=17.5981 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 34140] reward=-117474407.2 actor_loss=0.3140 critic_loss=151230494247.3846 entropy=17.6088 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 34140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484828.1 mean_steps=15.3
|
|
[Episode 34150] reward=-119690261.1 actor_loss=0.3860 critic_loss=149925269655.7037 entropy=17.6157 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 34160] reward=-112313019.8 actor_loss=0.3865 critic_loss=145821799765.3333 entropy=17.6168 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 34160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494295.6 mean_steps=14.0
|
|
[Episode 34170] reward=-120915255.8 actor_loss=0.2308 critic_loss=156933950668.8000 entropy=17.6120 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 34180] reward=-117071865.1 actor_loss=0.3603 critic_loss=158141704601.6000 entropy=17.6199 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 34180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-492192.8 mean_steps=15.9
|
|
[Episode 34190] reward=-114906201.3 actor_loss=0.3678 critic_loss=153960910848.0000 entropy=17.6177 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 34200] reward=-121062374.7 actor_loss=0.2603 critic_loss=156036432964.2667 entropy=17.6166 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 34200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-471282.3 mean_steps=13.2
|
|
[Episode 34210] reward=-118126519.9 actor_loss=0.2767 critic_loss=147422922384.4102 entropy=17.6307 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 34220] reward=-119664555.2 actor_loss=0.3029 critic_loss=149030415661.1765 entropy=17.6328 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 34220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548414.7 mean_steps=14.4
|
|
[Episode 34230] reward=-121547577.7 actor_loss=0.3091 critic_loss=156505184148.2105 entropy=17.6266 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 34240] reward=-121442508.8 actor_loss=0.3263 critic_loss=155768982186.6667 entropy=17.6247 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 34240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510826.1 mean_steps=14.7
|
|
[Episode 34250] reward=-121155937.6 actor_loss=0.2775 critic_loss=149450225891.5555 entropy=17.6208 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 34260] reward=-117398029.6 actor_loss=0.3611 critic_loss=145123564657.7778 entropy=17.6070 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 34260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516410.4 mean_steps=15.1
|
|
[Episode 34270] reward=-113379217.3 actor_loss=0.3927 critic_loss=145961177460.3636 entropy=17.5955 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 34280] reward=-124112335.0 actor_loss=0.2943 critic_loss=151983597977.6000 entropy=17.6003 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 34280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399644.1 mean_steps=15.5
|
|
[Episode 34290] reward=-122018402.6 actor_loss=0.1880 critic_loss=151487496192.0000 entropy=17.6012 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 34300] reward=-121472393.5 actor_loss=0.2877 critic_loss=149750396084.7059 entropy=17.5938 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 34300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-563601.3 mean_steps=12.6
|
|
[Episode 34310] reward=-118232711.4 actor_loss=0.2713 critic_loss=148574145194.6667 entropy=17.5892 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 34320] reward=-120861388.8 actor_loss=0.2809 critic_loss=152095276032.0000 entropy=17.5934 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 34320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441331.4 mean_steps=15.8
|
|
[Episode 34330] reward=-121751852.6 actor_loss=0.2828 critic_loss=155686420480.0000 entropy=17.6031 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 34340] reward=-126560824.1 actor_loss=0.1751 critic_loss=160275684894.1176 entropy=17.5946 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 34340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420060.8 mean_steps=15.7
|
|
[Episode 34350] reward=-120307409.0 actor_loss=0.3331 critic_loss=151755999524.5714 entropy=17.5986 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 34360] reward=-120385429.2 actor_loss=0.3449 critic_loss=155175655833.6000 entropy=17.6085 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 34360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541120.5 mean_steps=14.4
|
|
[Episode 34370] reward=-119631750.1 actor_loss=0.2472 critic_loss=151104428786.5263 entropy=17.6033 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34380] reward=-110772663.6 actor_loss=0.3423 critic_loss=143341561232.6956 entropy=17.6041 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 34380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450933.5 mean_steps=14.7
|
|
[Episode 34390] reward=-122968219.1 actor_loss=0.2696 critic_loss=161201455668.9655 entropy=17.6135 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 34400] reward=-118685677.9 actor_loss=0.3137 critic_loss=152559061284.5714 entropy=17.6121 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 34400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535794.0 mean_steps=13.5
|
|
[Episode 34410] reward=-119788174.4 actor_loss=0.1693 critic_loss=147475702559.2195 entropy=17.6262 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 34420] reward=-116503959.7 actor_loss=0.4143 critic_loss=143137901681.7778 entropy=17.6339 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 34420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-456803.7 mean_steps=15.8
|
|
[Episode 34430] reward=-118484781.6 actor_loss=0.2209 critic_loss=144999558277.5652 entropy=17.6201 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 34440] reward=-122336784.9 actor_loss=0.3024 critic_loss=159391390999.2727 entropy=17.6250 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 34440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454261.0 mean_steps=14.8
|
|
[Episode 34450] reward=-123321062.3 actor_loss=0.3272 critic_loss=163230103552.0000 entropy=17.6174 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 34460] reward=-119632664.0 actor_loss=0.2870 critic_loss=149165015040.0000 entropy=17.6069 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 34460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572336.3 mean_steps=12.4
|
|
[Episode 34470] reward=-117826131.5 actor_loss=0.3504 critic_loss=146992958781.7931 entropy=17.6084 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 34480] reward=-118319825.9 actor_loss=0.3325 critic_loss=147710496475.4286 entropy=17.6081 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 34480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-489450.7 mean_steps=16.1
|
|
[Episode 34490] reward=-124243599.4 actor_loss=0.2724 critic_loss=157042686634.6667 entropy=17.6040 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 34500] reward=-113112105.3 actor_loss=0.3353 critic_loss=149675239833.6000 entropy=17.6072 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 34500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562331.2 mean_steps=13.3
|
|
[Episode 34510] reward=-117417316.1 actor_loss=0.3703 critic_loss=159525433344.0000 entropy=17.6030 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 34520] reward=-114592315.3 actor_loss=0.2647 critic_loss=147199040365.7143 entropy=17.5998 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 34520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-532462.5 mean_steps=14.2
|
|
[Episode 34530] reward=-122254091.3 actor_loss=0.3288 critic_loss=154056296537.0435 entropy=17.6025 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 34540] reward=-116292982.5 actor_loss=0.3370 critic_loss=142715199698.0513 entropy=17.5814 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 34540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490105.7 mean_steps=14.1
|
|
[Episode 34550] reward=-119290550.2 actor_loss=0.2538 critic_loss=150079195570.4243 entropy=17.5765 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 34560] reward=-117957759.9 actor_loss=0.2881 critic_loss=150621307699.2000 entropy=17.5757 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 34560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462740.7 mean_steps=14.8
|
|
[Episode 34570] reward=-118143971.2 actor_loss=0.3127 critic_loss=149302280192.0000 entropy=17.5734 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 34580] reward=-122609232.1 actor_loss=0.2627 critic_loss=156306629238.1538 entropy=17.5806 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 34580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-405839.4 mean_steps=14.8
|
|
[Episode 34590] reward=-114765840.0 actor_loss=0.2914 critic_loss=145396139559.3846 entropy=17.5724 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 34600] reward=-126303209.5 actor_loss=0.2953 critic_loss=265477685854.8148 entropy=17.5824 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 34600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412426.8 mean_steps=16.4
|
|
[Episode 34610] reward=-113900364.5 actor_loss=0.2272 critic_loss=140998694229.3333 entropy=17.5892 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 34620] reward=-122563448.2 actor_loss=0.2198 critic_loss=154492230509.7143 entropy=17.5864 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 34620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519622.8 mean_steps=14.4
|
|
[Episode 34630] reward=-119771601.5 actor_loss=0.1751 critic_loss=150660629342.3158 entropy=17.5882 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 34640] reward=-117729281.5 actor_loss=0.2690 critic_loss=151601101677.7143 entropy=17.5736 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 34640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397288.5 mean_steps=15.4
|
|
[Episode 34650] reward=-119153464.7 actor_loss=0.2759 critic_loss=152178839155.6129 entropy=17.5684 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 34660] reward=-116542252.9 actor_loss=0.3014 critic_loss=144220119586.1333 entropy=17.5594 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 34660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488013.5 mean_steps=14.3
|
|
[Episode 34670] reward=-119012590.1 actor_loss=0.2943 critic_loss=152091478488.6154 entropy=17.5757 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 34680] reward=-114708606.1 actor_loss=0.3656 critic_loss=144562354930.5263 entropy=17.5832 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 34680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-278057.7 mean_steps=16.9
|
|
[Episode 34690] reward=-114873526.7 actor_loss=0.2891 critic_loss=143917817173.3333 entropy=17.5695 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34700] reward=-121445383.6 actor_loss=0.3294 critic_loss=149124686506.6667 entropy=17.5714 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 34700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510405.0 mean_steps=13.3
|
|
[Episode 34710] reward=-117048475.2 actor_loss=0.3660 critic_loss=144887831015.6190 entropy=17.5739 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 34720] reward=-117831661.3 actor_loss=0.2383 critic_loss=146034766643.2000 entropy=17.5761 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 34720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-613169.3 mean_steps=12.1
|
|
[Episode 34730] reward=-116688238.5 actor_loss=0.3751 critic_loss=144075250688.0000 entropy=17.5778 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 34740] reward=-115587967.3 actor_loss=0.3039 critic_loss=145078906060.8000 entropy=17.5696 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 34740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359265.8 mean_steps=16.9
|
|
[Episode 34750] reward=-115673548.6 actor_loss=0.2715 critic_loss=141492697586.1622 entropy=17.5697 approx_kl=0.0109 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34760] reward=-123753752.1 actor_loss=0.2529 critic_loss=154922418176.0000 entropy=17.5755 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 34760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-410854.4 mean_steps=17.4
|
|
[Episode 34770] reward=-121064485.5 actor_loss=0.3017 critic_loss=148517123959.4667 entropy=17.5759 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 34780] reward=-120735476.1 actor_loss=0.2798 critic_loss=148591964475.0769 entropy=17.5759 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 34780] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-232048.1 mean_steps=19.4
|
|
[Episode 34790] reward=-120771896.9 actor_loss=0.3915 critic_loss=158937080135.6800 entropy=17.5930 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 34800] reward=-118340016.5 actor_loss=0.2748 critic_loss=149525097858.8445 entropy=17.5844 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 34800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446822.4 mean_steps=15.7
|
|
[Episode 34810] reward=-116181161.4 actor_loss=0.3338 critic_loss=145816196189.0909 entropy=17.5782 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 34820] reward=-120790954.8 actor_loss=0.2436 critic_loss=149822402651.0222 entropy=17.5811 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 34820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547861.8 mean_steps=13.4
|
|
[Episode 34830] reward=-123234647.4 actor_loss=0.2775 critic_loss=157694160440.8889 entropy=17.5830 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 34840] reward=-118821597.6 actor_loss=0.3290 critic_loss=153713642458.0741 entropy=17.5777 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 34840] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-306813.0 mean_steps=16.8
|
|
[Episode 34850] reward=-119763758.1 actor_loss=0.2451 critic_loss=147869334291.6923 entropy=17.5778 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34860] reward=-119746253.2 actor_loss=0.2579 critic_loss=150408412091.7333 entropy=17.5730 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 34860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465960.6 mean_steps=14.9
|
|
[Episode 34870] reward=-113087107.7 actor_loss=0.2977 critic_loss=138527632822.8571 entropy=17.5689 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 34880] reward=-119521142.2 actor_loss=0.1743 critic_loss=149831385088.0000 entropy=17.5653 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 34880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-465272.8 mean_steps=16.0
|
|
[Episode 34890] reward=-119904785.5 actor_loss=0.2477 critic_loss=153012877019.4286 entropy=17.5603 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 34900] reward=-117097115.3 actor_loss=0.2773 critic_loss=143042275689.4118 entropy=17.5463 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 34900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-535567.0 mean_steps=12.6
|
|
[Episode 34910] reward=-112574617.5 actor_loss=0.3286 critic_loss=149590156902.4000 entropy=17.5442 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 34920] reward=-115681847.7 actor_loss=0.3587 critic_loss=140845922424.4706 entropy=17.5574 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 34920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-542250.3 mean_steps=12.7
|
|
[Episode 34930] reward=-115009730.4 actor_loss=0.2879 critic_loss=148538836204.3077 entropy=17.5533 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34940] reward=-121630797.3 actor_loss=0.2366 critic_loss=152028170406.0540 entropy=17.5591 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 34940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570793.8 mean_steps=13.4
|
|
[Episode 34950] reward=-123275132.9 actor_loss=0.2364 critic_loss=498733312361.4117 entropy=17.5491 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 34960] reward=-120348353.0 actor_loss=0.2891 critic_loss=152562883242.6667 entropy=17.5577 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 34960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553431.2 mean_steps=13.4
|
|
[Episode 34970] reward=-119686812.8 actor_loss=0.3004 critic_loss=152414700612.2667 entropy=17.5413 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 34980] reward=-122315541.8 actor_loss=0.3195 critic_loss=158863410412.3077 entropy=17.5302 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 34980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-627572.5 mean_steps=13.3
|
|
[Episode 34990] reward=-118382646.8 actor_loss=0.2552 critic_loss=145379247718.4000 entropy=17.5369 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 35000] reward=-114166224.7 actor_loss=0.4499 critic_loss=149956256488.7273 entropy=17.5465 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 35000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511405.5 mean_steps=14.2
|
|
[Episode 35010] reward=-120125765.2 actor_loss=0.2752 critic_loss=151930738005.3333 entropy=17.5540 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 35020] reward=-117859354.6 actor_loss=0.3013 critic_loss=149760147968.0000 entropy=17.5681 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 35020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-611116.5 mean_steps=14.1
|
|
[Episode 35030] reward=-117531275.9 actor_loss=0.2737 critic_loss=148847414649.2632 entropy=17.5806 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 35040] reward=-115839550.8 actor_loss=0.1713 critic_loss=148905074980.5714 entropy=17.5713 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Eval 35040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395861.8 mean_steps=16.6
|
|
[Episode 35050] reward=-115341526.5 actor_loss=0.2879 critic_loss=146013387807.0303 entropy=17.5779 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 35060] reward=-119115540.2 actor_loss=0.3010 critic_loss=148966360350.7200 entropy=17.5892 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 35060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588997.9 mean_steps=12.8
|
|
[Episode 35070] reward=-118244429.6 actor_loss=0.3018 critic_loss=149348126185.7391 entropy=17.5950 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 35080] reward=-117106815.5 actor_loss=0.2571 critic_loss=147833986161.7778 entropy=17.5960 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 35080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619824.8 mean_steps=13.2
|
|
[Episode 35090] reward=-122396811.4 actor_loss=0.3536 critic_loss=157397246935.0400 entropy=17.5909 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 35100] reward=-120157048.2 actor_loss=0.2691 critic_loss=151410301952.0000 entropy=17.6000 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 35100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-531298.4 mean_steps=15.1
|
|
[Episode 35110] reward=-123932247.5 actor_loss=0.2824 critic_loss=164365628967.3846 entropy=17.6139 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 35120] reward=-119387850.7 actor_loss=0.2313 critic_loss=153016469094.4000 entropy=17.6314 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 35120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-582998.2 mean_steps=12.0
|
|
[Episode 35130] reward=-117083201.3 actor_loss=0.3145 critic_loss=145989148407.7419 entropy=17.6331 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 35140] reward=-116994913.9 actor_loss=0.3376 critic_loss=146550486445.4193 entropy=17.6283 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 35140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-404119.7 mean_steps=17.1
|
|
[Episode 35150] reward=-121476584.8 actor_loss=0.2511 critic_loss=159365336441.2632 entropy=17.6165 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 35160] reward=-118433265.1 actor_loss=0.3311 critic_loss=151789741812.8696 entropy=17.6229 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 35160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504344.3 mean_steps=15.2
|
|
[Episode 35170] reward=-121885234.8 actor_loss=0.2346 critic_loss=158390273489.4546 entropy=17.6216 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 35180] reward=-117147894.7 actor_loss=0.2571 critic_loss=149171606869.3333 entropy=17.6281 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 35180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523201.5 mean_steps=13.5
|
|
[Episode 35190] reward=-116351969.7 actor_loss=0.3514 critic_loss=148885100357.8182 entropy=17.6269 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 35200] reward=-120533841.2 actor_loss=0.2590 critic_loss=157061643806.1176 entropy=17.6233 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 35200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361779.4 mean_steps=16.9
|
|
[Episode 35210] reward=-119945465.5 actor_loss=0.2251 critic_loss=150472844676.4138 entropy=17.6150 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 35220] reward=-119088589.6 actor_loss=0.3641 critic_loss=148227049244.4445 entropy=17.6068 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 35220] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-332401.4 mean_steps=17.6
|
|
[Episode 35230] reward=-117794012.3 actor_loss=0.2905 critic_loss=152955329299.6923 entropy=17.6097 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 35240] reward=-119471365.6 actor_loss=0.2278 critic_loss=152225138777.0435 entropy=17.5923 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 35240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452140.5 mean_steps=15.6
|
|
[Episode 35250] reward=-117982444.7 actor_loss=0.3207 critic_loss=142037816349.2571 entropy=17.5858 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 35260] reward=-114532217.4 actor_loss=0.4011 critic_loss=137326611611.1515 entropy=17.6045 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 35260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-398250.2 mean_steps=16.4
|
|
[Episode 35270] reward=-119064049.4 actor_loss=0.3462 critic_loss=172585930379.6364 entropy=17.6060 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 35280] reward=-120014974.6 actor_loss=0.1983 critic_loss=146118157700.4138 entropy=17.6065 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 35280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438015.8 mean_steps=14.5
|
|
[Episode 35290] reward=-115751393.1 actor_loss=0.3032 critic_loss=144134148505.6000 entropy=17.6039 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 35300] reward=-119305556.1 actor_loss=0.1784 critic_loss=153767756458.6667 entropy=17.6123 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 35300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399805.9 mean_steps=15.2
|
|
[Episode 35310] reward=-121381348.1 actor_loss=0.2870 critic_loss=148509851283.9111 entropy=17.6268 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 35320] reward=-114202429.9 actor_loss=0.2957 critic_loss=140788788489.4815 entropy=17.6267 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 35320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457661.0 mean_steps=14.2
|
|
[Episode 35330] reward=-123118100.1 actor_loss=0.2636 critic_loss=154799820093.7931 entropy=17.6227 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 35340] reward=-118503904.6 actor_loss=0.2984 critic_loss=148425782110.3158 entropy=17.6218 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 35340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532672.3 mean_steps=13.2
|
|
[Episode 35350] reward=-122499533.6 actor_loss=0.2506 critic_loss=152360362356.3636 entropy=17.6139 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 35360] reward=-117997985.0 actor_loss=0.2733 critic_loss=147175822677.3333 entropy=17.6097 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 35360] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-685544.5 mean_steps=11.8
|
|
[Episode 35370] reward=-121503113.6 actor_loss=0.2154 critic_loss=151324560854.4865 entropy=17.6019 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 35380] reward=-112200080.7 actor_loss=0.3454 critic_loss=141810957425.7778 entropy=17.6169 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 35380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468711.6 mean_steps=13.9
|
|
[Episode 35390] reward=-117448263.5 actor_loss=0.3483 critic_loss=148829935479.4667 entropy=17.6244 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 35400] reward=-113965912.0 actor_loss=0.2223 critic_loss=148131365608.7273 entropy=17.6226 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 35400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594733.9 mean_steps=12.8
|
|
[Episode 35410] reward=-118523360.2 actor_loss=0.2433 critic_loss=149237683253.8947 entropy=17.6353 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 35420] reward=-122485757.0 actor_loss=0.2991 critic_loss=159349442560.0000 entropy=17.6333 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 35420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553626.7 mean_steps=13.7
|
|
[Episode 35430] reward=-118546746.5 actor_loss=0.2148 critic_loss=149510545152.0000 entropy=17.6443 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 35440] reward=-122560398.9 actor_loss=0.2239 critic_loss=151113486921.1429 entropy=17.6479 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 35440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385634.1 mean_steps=16.1
|
|
[Episode 35450] reward=-119278591.2 actor_loss=0.4584 critic_loss=155834544368.9412 entropy=17.6447 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 35460] reward=-126236144.5 actor_loss=0.2427 critic_loss=161054950088.3478 entropy=17.6569 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 35460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-691414.3 mean_steps=12.3
|
|
[Episode 35470] reward=-124857427.6 actor_loss=0.2438 critic_loss=157500014592.0000 entropy=17.6589 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 35480] reward=-116152570.2 actor_loss=0.4101 critic_loss=148865669643.9070 entropy=17.6600 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 35480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470610.8 mean_steps=15.1
|
|
[Episode 35490] reward=-118475742.2 actor_loss=0.3008 critic_loss=150598758400.0000 entropy=17.6488 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 35500] reward=-122490866.1 actor_loss=0.3032 critic_loss=156139944423.6190 entropy=17.6531 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 35500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393967.7 mean_steps=16.1
|
|
[Episode 35510] reward=-120031425.0 actor_loss=0.2567 critic_loss=153543712950.0444 entropy=17.6484 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 35520] reward=-119487190.3 actor_loss=0.3496 critic_loss=156394352054.8571 entropy=17.6522 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 35520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385160.0 mean_steps=16.6
|
|
[Episode 35530] reward=-117695475.1 actor_loss=0.2262 critic_loss=152670383854.9333 entropy=17.6320 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 35540] reward=-121098588.1 actor_loss=0.2777 critic_loss=149826812391.6190 entropy=17.6278 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 35540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540273.9 mean_steps=14.0
|
|
[Episode 35550] reward=-117619314.3 actor_loss=0.3752 critic_loss=165659185834.6667 entropy=17.6253 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 35560] reward=-116731455.5 actor_loss=0.3687 critic_loss=150155513036.8000 entropy=17.6285 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 35560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-538377.5 mean_steps=12.7
|
|
[Episode 35570] reward=-118858418.0 actor_loss=0.2453 critic_loss=150659741696.0000 entropy=17.6311 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 35580] reward=-115747029.1 actor_loss=0.3593 critic_loss=145335225314.7429 entropy=17.6314 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 35580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-626071.7 mean_steps=12.4
|
|
[Episode 35590] reward=-115429736.5 actor_loss=0.3421 critic_loss=151376951854.5454 entropy=17.6317 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 35600] reward=-120961950.9 actor_loss=0.2838 critic_loss=155802580805.8182 entropy=17.6472 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 35600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367881.5 mean_steps=16.5
|
|
[Episode 35610] reward=-123942237.6 actor_loss=0.2877 critic_loss=161928088700.8781 entropy=17.6452 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 35620] reward=-118925944.6 actor_loss=0.3914 critic_loss=150519245902.7692 entropy=17.6357 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 35620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430989.1 mean_steps=14.8
|
|
[Episode 35630] reward=-116529096.0 actor_loss=0.2667 critic_loss=147598465181.5385 entropy=17.6456 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 35640] reward=-122392257.9 actor_loss=0.2698 critic_loss=151676062626.9091 entropy=17.6582 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 35640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455550.8 mean_steps=15.9
|
|
[Episode 35650] reward=-113262718.7 actor_loss=0.2645 critic_loss=134266785300.4800 entropy=17.6590 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 35660] reward=-118435993.4 actor_loss=0.2990 critic_loss=149167905555.6923 entropy=17.6543 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 35660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479227.6 mean_steps=15.4
|
|
[Episode 35670] reward=-120268939.8 actor_loss=0.2942 critic_loss=153205661461.9429 entropy=17.6527 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 35680] reward=-120272974.7 actor_loss=0.2209 critic_loss=154536531441.3714 entropy=17.6508 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 35680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544270.9 mean_steps=13.8
|
|
[Episode 35690] reward=-122234677.1 actor_loss=0.2557 critic_loss=155739603412.1143 entropy=17.6464 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 35700] reward=-118241246.1 actor_loss=0.2928 critic_loss=150085206016.0000 entropy=17.6421 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 35700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364968.9 mean_steps=16.4
|
|
[Episode 35710] reward=-119721324.7 actor_loss=0.2916 critic_loss=151425993634.9091 entropy=17.6267 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 35720] reward=-116812919.3 actor_loss=0.2441 critic_loss=154236536508.6316 entropy=17.6301 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 35720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432811.0 mean_steps=15.9
|
|
[Episode 35730] reward=-127666688.5 actor_loss=0.2547 critic_loss=158474028974.0800 entropy=17.6349 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 35740] reward=-123312779.8 actor_loss=0.1469 critic_loss=293763434632.5333 entropy=17.6378 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 35740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-446861.6 mean_steps=15.8
|
|
[Episode 35750] reward=-117584568.5 actor_loss=0.3222 critic_loss=151646832286.8965 entropy=17.6404 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 35760] reward=-116658335.8 actor_loss=0.3050 critic_loss=142414232780.8000 entropy=17.6417 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 35760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417728.0 mean_steps=16.6
|
|
[Episode 35770] reward=-119652112.3 actor_loss=0.2917 critic_loss=152859076198.4000 entropy=17.6448 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 35780] reward=-118754785.4 actor_loss=0.3300 critic_loss=147976443221.3333 entropy=17.6158 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 35780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-458596.1 mean_steps=14.2
|
|
[Episode 35790] reward=-119887146.2 actor_loss=0.2492 critic_loss=151328575703.5789 entropy=17.6175 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 35800] reward=-120705899.6 actor_loss=0.3079 critic_loss=150716966034.2857 entropy=17.6061 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 35800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383444.1 mean_steps=16.5
|
|
[Episode 35810] reward=-122838039.8 actor_loss=0.2695 critic_loss=157847599591.6190 entropy=17.5896 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 35820] reward=-124515694.7 actor_loss=0.1803 critic_loss=159187239367.1111 entropy=17.5865 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 35820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566735.0 mean_steps=12.8
|
|
[Episode 35830] reward=-117217998.0 actor_loss=0.2696 critic_loss=157280091504.6400 entropy=17.5904 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 35840] reward=-119539162.5 actor_loss=0.3061 critic_loss=151483116677.5652 entropy=17.5947 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 35840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-432275.2 mean_steps=13.7
|
|
[Episode 35850] reward=-117952093.8 actor_loss=0.3211 critic_loss=142986881280.0000 entropy=17.5995 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 35860] reward=-119764540.6 actor_loss=0.3117 critic_loss=152854291692.3077 entropy=17.5907 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 35860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545355.3 mean_steps=13.8
|
|
[Episode 35870] reward=-120759239.5 actor_loss=0.2709 critic_loss=150386989810.5263 entropy=17.5893 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 35880] reward=-120316959.5 actor_loss=0.3632 critic_loss=149423194697.1429 entropy=17.5943 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 35880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599848.3 mean_steps=13.1
|
|
[Episode 35890] reward=-121423101.8 actor_loss=0.3196 critic_loss=169018493220.5714 entropy=17.5942 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 35900] reward=-114765493.4 actor_loss=0.2144 critic_loss=142455533568.0000 entropy=17.5995 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 35900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-511013.3 mean_steps=12.7
|
|
[Episode 35910] reward=-122481119.4 actor_loss=0.3017 critic_loss=173701679880.8276 entropy=17.5889 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 35920] reward=-117449436.7 actor_loss=0.2274 critic_loss=146534699380.3636 entropy=17.6030 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 35920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585711.3 mean_steps=12.8
|
|
[Episode 35930] reward=-125436775.0 actor_loss=0.1861 critic_loss=161531856542.8965 entropy=17.6036 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 35940] reward=-123849337.9 actor_loss=0.2675 critic_loss=161581059389.7931 entropy=17.6007 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 35940] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-687422.4 mean_steps=11.5
|
|
[Episode 35950] reward=-117841035.5 actor_loss=0.3517 critic_loss=149752771677.0909 entropy=17.6154 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 35960] reward=-117769399.3 actor_loss=0.4419 critic_loss=150357068458.6667 entropy=17.6127 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 35960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511489.4 mean_steps=13.4
|
|
[Episode 35970] reward=-121842819.2 actor_loss=0.2964 critic_loss=151796789069.9131 entropy=17.6201 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 35980] reward=-122152764.2 actor_loss=0.2993 critic_loss=157096356864.0000 entropy=17.6236 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 35980] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-306894.2 mean_steps=18.1
|
|
[Episode 35990] reward=-118291941.4 actor_loss=0.2763 critic_loss=161886537240.3810 entropy=17.6277 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 36000] reward=-120298718.0 actor_loss=0.2434 critic_loss=156505341152.7805 entropy=17.6388 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 36000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510611.1 mean_steps=14.6
|
|
[Episode 36010] reward=-120102013.4 actor_loss=0.3486 critic_loss=163961903149.5111 entropy=17.6378 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 36020] reward=-118764394.0 actor_loss=0.3469 critic_loss=146858825045.3333 entropy=17.6495 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 36020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517061.9 mean_steps=14.2
|
|
[Episode 36030] reward=-120830017.7 actor_loss=0.1869 critic_loss=153229310464.0000 entropy=17.6425 approx_kl=0.0113 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 36040] reward=-119943432.7 actor_loss=0.3049 critic_loss=157723555653.8182 entropy=17.6367 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 36040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421970.2 mean_steps=16.5
|
|
[Episode 36050] reward=-118419069.9 actor_loss=0.4137 critic_loss=149236889127.3846 entropy=17.6362 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1504 front_blocked=0
|
|
[Episode 36060] reward=-124218280.2 actor_loss=0.2268 critic_loss=159170002944.0000 entropy=17.6402 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 36060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-519354.3 mean_steps=16.4
|
|
[Episode 36070] reward=-119230471.3 actor_loss=0.3577 critic_loss=150846122302.5778 entropy=17.6450 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 36080] reward=-119978512.9 actor_loss=0.3883 critic_loss=151941210112.0000 entropy=17.6471 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 36080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513008.4 mean_steps=13.6
|
|
[Episode 36090] reward=-123898400.9 actor_loss=0.2529 critic_loss=151407966435.5555 entropy=17.6430 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 36100] reward=-120893200.7 actor_loss=0.2291 critic_loss=153226044666.3111 entropy=17.6628 approx_kl=0.0107 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 36100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528792.9 mean_steps=14.6
|
|
[Episode 36110] reward=-125728896.1 actor_loss=0.2906 critic_loss=160816779537.0667 entropy=17.6637 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36120] reward=-119541506.9 actor_loss=0.3021 critic_loss=154453376581.1892 entropy=17.6556 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 36120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556937.9 mean_steps=13.4
|
|
[Episode 36130] reward=-119380341.1 actor_loss=0.3652 critic_loss=146495237142.7556 entropy=17.6646 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 36140] reward=-116765389.7 actor_loss=0.3282 critic_loss=144630116352.0000 entropy=17.6662 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 36140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540181.1 mean_steps=13.8
|
|
[Episode 36150] reward=-128901020.5 actor_loss=0.2934 critic_loss=259773109589.3333 entropy=17.6612 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36160] reward=-118204586.2 actor_loss=0.3575 critic_loss=146064440631.6522 entropy=17.6796 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 36160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-545328.7 mean_steps=14.8
|
|
[Episode 36170] reward=-119747919.6 actor_loss=0.3483 critic_loss=149423588752.6956 entropy=17.6835 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 36180] reward=-123682340.0 actor_loss=0.2801 critic_loss=156315381760.0000 entropy=17.6822 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 36180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392659.9 mean_steps=16.6
|
|
[Episode 36190] reward=-118162872.2 actor_loss=0.3610 critic_loss=150714105856.0000 entropy=17.6836 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 36200] reward=-121213491.0 actor_loss=0.2201 critic_loss=146274515968.0000 entropy=17.7004 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 36200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577203.9 mean_steps=12.8
|
|
[Episode 36210] reward=-123100690.9 actor_loss=0.3057 critic_loss=164786684586.6667 entropy=17.7035 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36220] reward=-118246673.3 actor_loss=0.2501 critic_loss=155115334602.1053 entropy=17.7142 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 36220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382388.8 mean_steps=16.4
|
|
[Episode 36230] reward=-125792233.7 actor_loss=0.2237 critic_loss=160783681182.8965 entropy=17.7079 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 36240] reward=-121091681.5 actor_loss=0.4092 critic_loss=167691529216.0000 entropy=17.7129 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 36240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535167.3 mean_steps=14.4
|
|
[Episode 36250] reward=-119253097.2 actor_loss=0.2917 critic_loss=143459625642.6667 entropy=17.7265 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 36260] reward=-112320439.5 actor_loss=0.2679 critic_loss=138972806582.8571 entropy=17.7304 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 36260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-605653.7 mean_steps=12.8
|
|
[Episode 36270] reward=-122312763.6 actor_loss=0.3282 critic_loss=160173725403.4286 entropy=17.7368 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 36280] reward=-119399103.7 actor_loss=0.2447 critic_loss=145366506074.3529 entropy=17.7432 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 36280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-610469.5 mean_steps=13.9
|
|
[Episode 36290] reward=-118781644.6 actor_loss=0.4063 critic_loss=151676772352.0000 entropy=17.7314 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 36300] reward=-123466987.9 actor_loss=0.2302 critic_loss=159783699160.1778 entropy=17.7254 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 36300] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-340672.0 mean_steps=17.0
|
|
[Episode 36310] reward=-120865575.2 actor_loss=0.2921 critic_loss=154232258078.1176 entropy=17.7369 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36320] reward=-121981731.8 actor_loss=0.3129 critic_loss=153144724992.0000 entropy=17.7348 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 36320] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-382695.6 mean_steps=17.5
|
|
[Episode 36330] reward=-123947696.7 actor_loss=0.2948 critic_loss=158041257797.8182 entropy=17.7423 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36340] reward=-118244413.1 actor_loss=0.1508 critic_loss=145844615031.4667 entropy=17.7477 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 36340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360940.6 mean_steps=16.2
|
|
[Episode 36350] reward=-125080682.5 actor_loss=0.3346 critic_loss=406267296481.2800 entropy=17.7380 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 36360] reward=-122281132.2 actor_loss=0.2386 critic_loss=150832336310.8571 entropy=17.7551 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 36360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578012.9 mean_steps=13.8
|
|
[Episode 36370] reward=-119518511.2 actor_loss=0.3249 critic_loss=147165875248.7619 entropy=17.7567 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 36380] reward=-115444922.2 actor_loss=0.2863 critic_loss=147301114402.1333 entropy=17.7536 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 36380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508855.2 mean_steps=14.7
|
|
[Episode 36390] reward=-123519047.3 actor_loss=0.2373 critic_loss=155947816810.1463 entropy=17.7496 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 36400] reward=-121045082.3 actor_loss=0.2322 critic_loss=178757462698.6667 entropy=17.7395 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 36400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407783.1 mean_steps=16.6
|
|
[Episode 36410] reward=-119316343.1 actor_loss=0.3784 critic_loss=161307032289.2800 entropy=17.7344 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 36420] reward=-120909028.9 actor_loss=0.3427 critic_loss=156277102273.4222 entropy=17.7340 approx_kl=0.0108 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 36420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568130.4 mean_steps=13.4
|
|
[Episode 36430] reward=-115673059.9 actor_loss=0.3715 critic_loss=139882784954.1818 entropy=17.7307 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 36440] reward=-120025694.0 actor_loss=0.3656 critic_loss=154372407296.0000 entropy=17.7237 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 36440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-483689.6 mean_steps=12.9
|
|
[Episode 36450] reward=-118590280.3 actor_loss=0.1819 critic_loss=150270985648.3556 entropy=17.7159 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 36460] reward=-114798519.5 actor_loss=0.2603 critic_loss=155981769750.7556 entropy=17.7087 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 36460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433118.3 mean_steps=14.7
|
|
[Episode 36470] reward=-118649306.1 actor_loss=0.2144 critic_loss=155889048064.0000 entropy=17.6905 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 36480] reward=-116609813.5 actor_loss=0.3208 critic_loss=143054507520.0000 entropy=17.6838 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 36480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491220.4 mean_steps=15.0
|
|
[Episode 36490] reward=-123873862.6 actor_loss=0.2245 critic_loss=151401905265.7778 entropy=17.6943 approx_kl=0.0117 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 36500] reward=-118353597.2 actor_loss=0.3759 critic_loss=147817598976.0000 entropy=17.7012 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 36500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434689.6 mean_steps=15.8
|
|
[Episode 36510] reward=-114864788.4 actor_loss=0.3643 critic_loss=144110806285.4737 entropy=17.6934 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 36520] reward=-117250631.5 actor_loss=0.2250 critic_loss=138897947528.9302 entropy=17.6897 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 36520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547492.2 mean_steps=14.2
|
|
[Episode 36530] reward=-116862185.7 actor_loss=0.2310 critic_loss=144156688384.0000 entropy=17.6982 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 36540] reward=-117708152.1 actor_loss=0.3619 critic_loss=146092713707.2433 entropy=17.6871 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 36540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554661.1 mean_steps=13.6
|
|
[Episode 36550] reward=-121201368.6 actor_loss=0.2422 critic_loss=153229739349.3333 entropy=17.6947 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 36560] reward=-120061530.1 actor_loss=0.2891 critic_loss=149980978471.8222 entropy=17.7018 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 36560] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320989.0 mean_steps=17.1
|
|
[Episode 36570] reward=-119313440.1 actor_loss=0.3503 critic_loss=145130587386.3111 entropy=17.7043 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 36580] reward=-118890095.0 actor_loss=0.1731 critic_loss=142639238348.8000 entropy=17.7041 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 36580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429617.5 mean_steps=14.8
|
|
[Episode 36590] reward=-117862125.4 actor_loss=0.2735 critic_loss=148404099395.3684 entropy=17.6981 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 36600] reward=-117848502.7 actor_loss=0.2876 critic_loss=144568919381.3333 entropy=17.7096 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 36600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507949.9 mean_steps=14.6
|
|
[Episode 36610] reward=-121221828.5 actor_loss=0.1458 critic_loss=148137542144.0000 entropy=17.7170 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 36620] reward=-126208057.1 actor_loss=0.2370 critic_loss=187311194112.0000 entropy=17.7150 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 36620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501089.5 mean_steps=14.1
|
|
[Episode 36630] reward=-117146959.5 actor_loss=0.2604 critic_loss=146249131961.3793 entropy=17.7130 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 36640] reward=-121044629.1 actor_loss=0.3284 critic_loss=167536206180.1739 entropy=17.7230 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 36640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561342.7 mean_steps=13.3
|
|
[Episode 36650] reward=-130053280.9 actor_loss=0.3386 critic_loss=349942054912.0000 entropy=17.7142 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 36660] reward=-125089279.9 actor_loss=0.2623 critic_loss=161058394772.6452 entropy=17.7112 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 36660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486715.6 mean_steps=14.1
|
|
[Episode 36670] reward=-120309614.9 actor_loss=0.2764 critic_loss=148587485076.2105 entropy=17.7063 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 36680] reward=-119682985.4 actor_loss=0.3570 critic_loss=151907588388.5714 entropy=17.7132 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 36680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498760.8 mean_steps=14.4
|
|
[Episode 36690] reward=-122315583.7 actor_loss=0.2865 critic_loss=161204911010.9091 entropy=17.7152 approx_kl=0.0117 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 36700] reward=-119491555.2 actor_loss=0.2890 critic_loss=148496073728.0000 entropy=17.7280 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 36700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-553964.9 mean_steps=14.2
|
|
[Episode 36710] reward=-117470462.0 actor_loss=0.3212 critic_loss=149183636275.2000 entropy=17.7246 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 36720] reward=-121197517.9 actor_loss=0.3655 critic_loss=149797630634.6667 entropy=17.7206 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 36720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-336062.4 mean_steps=15.8
|
|
[Episode 36730] reward=-118049147.9 actor_loss=0.2726 critic_loss=142802884608.0000 entropy=17.7142 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 36740] reward=-118683364.4 actor_loss=0.3318 critic_loss=152229996544.0000 entropy=17.7101 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 36740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479770.0 mean_steps=15.8
|
|
[Episode 36750] reward=-118854161.2 actor_loss=0.2962 critic_loss=147996612113.6552 entropy=17.7047 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 36760] reward=-122328895.6 actor_loss=0.3027 critic_loss=150789295308.8000 entropy=17.7141 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 36760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525825.0 mean_steps=14.3
|
|
[Episode 36770] reward=-119271850.0 actor_loss=0.3048 critic_loss=147632014872.3810 entropy=17.7199 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 36780] reward=-124421950.0 actor_loss=0.2530 critic_loss=158897504256.0000 entropy=17.7270 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 36780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-588093.2 mean_steps=11.9
|
|
[Episode 36790] reward=-119330670.3 actor_loss=0.3911 critic_loss=148871019373.7143 entropy=17.7230 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 36800] reward=-119379647.1 actor_loss=0.2544 critic_loss=146906390528.0000 entropy=17.7194 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 36800] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-272580.3 mean_steps=18.6
|
|
[Episode 36810] reward=-126242126.8 actor_loss=0.2745 critic_loss=160058395033.6000 entropy=17.7314 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 36820] reward=-119886528.3 actor_loss=0.2729 critic_loss=161210322670.9333 entropy=17.7238 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 36820] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-282119.3 mean_steps=18.2
|
|
[Episode 36830] reward=-115802912.1 actor_loss=0.3438 critic_loss=148427590860.8000 entropy=17.7151 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 36840] reward=-125129056.9 actor_loss=0.2812 critic_loss=157391507671.5789 entropy=17.7315 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 36840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-618063.6 mean_steps=12.3
|
|
[Episode 36850] reward=-122149705.8 actor_loss=0.2930 critic_loss=156631348175.2381 entropy=17.7270 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 36860] reward=-125185671.2 actor_loss=0.3283 critic_loss=249508831514.4828 entropy=17.7293 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 36860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-363616.9 mean_steps=15.3
|
|
[Episode 36870] reward=-117433212.1 actor_loss=0.3513 critic_loss=147595935744.0000 entropy=17.7326 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 36880] reward=-122462987.0 actor_loss=0.3228 critic_loss=176328987382.5185 entropy=17.7272 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 36880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523884.3 mean_steps=14.3
|
|
[Episode 36890] reward=-119849493.7 actor_loss=0.3044 critic_loss=149051654144.0000 entropy=17.7277 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 36900] reward=-123120119.3 actor_loss=0.2679 critic_loss=156564020857.9048 entropy=17.7245 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 36900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567594.3 mean_steps=13.8
|
|
[Episode 36910] reward=-121800697.3 actor_loss=0.2689 critic_loss=148360866762.1053 entropy=17.7237 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 36920] reward=-121320234.6 actor_loss=0.3252 critic_loss=152451413333.3333 entropy=17.7279 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 36920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450905.3 mean_steps=16.1
|
|
[Episode 36930] reward=-119359574.6 actor_loss=0.2562 critic_loss=147216580823.5789 entropy=17.7239 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 36940] reward=-122129478.5 actor_loss=0.2783 critic_loss=150978027237.5172 entropy=17.7447 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 36940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518729.8 mean_steps=14.4
|
|
[Episode 36950] reward=-122377142.9 actor_loss=0.2584 critic_loss=148557010167.1724 entropy=17.7521 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 36960] reward=-120896039.4 actor_loss=0.3147 critic_loss=154973000704.0000 entropy=17.7506 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 36960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510673.8 mean_steps=14.9
|
|
[Episode 36970] reward=-121027131.7 actor_loss=0.2419 critic_loss=150775651328.0000 entropy=17.7538 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 36980] reward=-123399164.2 actor_loss=0.3330 critic_loss=159969443840.0000 entropy=17.7595 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 36980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552019.1 mean_steps=13.6
|
|
[Episode 36990] reward=-117680874.3 actor_loss=0.4328 critic_loss=144612856263.1111 entropy=17.7565 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 37000] reward=-120540089.2 actor_loss=0.3541 critic_loss=153856557056.0000 entropy=17.7674 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 37000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451989.2 mean_steps=14.8
|
|
[Episode 37010] reward=-120605633.8 actor_loss=0.2741 critic_loss=159137695185.4546 entropy=17.7636 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 37020] reward=-126096809.2 actor_loss=0.1882 critic_loss=156847690805.8947 entropy=17.7630 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 37020] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-300476.5 mean_steps=17.6
|
|
[Episode 37030] reward=-119012059.7 actor_loss=0.1894 critic_loss=157346745995.6364 entropy=17.7638 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1217 front_blocked=0
|
|
[Episode 37040] reward=-120934845.6 actor_loss=0.5936 critic_loss=292711081886.4762 entropy=17.7623 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 37040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-525580.0 mean_steps=15.2
|
|
[Episode 37050] reward=-121450913.6 actor_loss=0.2800 critic_loss=153211988650.6667 entropy=17.7427 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 37060] reward=-119113329.2 actor_loss=0.2226 critic_loss=147180715287.2727 entropy=17.7299 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 37060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465871.1 mean_steps=14.9
|
|
[Episode 37070] reward=-121777467.3 actor_loss=0.3160 critic_loss=153213307904.0000 entropy=17.7115 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 37080] reward=-121243878.9 actor_loss=0.2672 critic_loss=150350462976.0000 entropy=17.7108 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 37080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540941.9 mean_steps=13.4
|
|
[Episode 37090] reward=-120591354.0 actor_loss=0.2512 critic_loss=148909564928.0000 entropy=17.7143 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 37100] reward=-123931818.8 actor_loss=0.2651 critic_loss=154619120088.6154 entropy=17.7235 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 37100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-519669.3 mean_steps=15.2
|
|
[Episode 37110] reward=-116066914.8 actor_loss=0.3240 critic_loss=146728119266.7429 entropy=17.7279 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 37120] reward=-118656395.1 actor_loss=0.2395 critic_loss=145322474556.2353 entropy=17.7454 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 37120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-490729.0 mean_steps=13.4
|
|
[Episode 37130] reward=-117844436.9 actor_loss=0.2603 critic_loss=141837359695.6444 entropy=17.7491 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 37140] reward=-122602939.0 actor_loss=0.3330 critic_loss=155117270109.0909 entropy=17.7459 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 37140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497339.5 mean_steps=14.4
|
|
[Episode 37150] reward=-125366244.0 actor_loss=0.1935 critic_loss=154832304355.5555 entropy=17.7501 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 37160] reward=-121213313.8 actor_loss=0.2341 critic_loss=151892212004.5714 entropy=17.7431 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 37160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597745.7 mean_steps=12.8
|
|
[Episode 37170] reward=-118595259.6 actor_loss=0.3255 critic_loss=164070750208.0000 entropy=17.7307 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 37180] reward=-121938078.4 actor_loss=0.3029 critic_loss=184450985301.3333 entropy=17.7359 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 37180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421798.1 mean_steps=15.5
|
|
[Episode 37190] reward=-120511100.1 actor_loss=0.2064 critic_loss=147223424099.0968 entropy=17.7353 approx_kl=0.0123 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 37200] reward=-121126185.4 actor_loss=0.2769 critic_loss=154194243725.2414 entropy=17.7446 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 37200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449629.3 mean_steps=15.6
|
|
[Episode 37210] reward=-118623496.6 actor_loss=0.3013 critic_loss=153759116537.0811 entropy=17.7465 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 37220] reward=-120679214.6 actor_loss=0.2236 critic_loss=150311524244.2105 entropy=17.7645 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 37220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489977.7 mean_steps=14.7
|
|
[Episode 37230] reward=-121986686.7 actor_loss=0.2719 critic_loss=159187025264.6400 entropy=17.7768 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 37240] reward=-122064712.5 actor_loss=0.3032 critic_loss=161321177978.4348 entropy=17.7752 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 37240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-618971.7 mean_steps=13.0
|
|
[Episode 37250] reward=-119069208.9 actor_loss=0.2059 critic_loss=155006661427.2000 entropy=17.7824 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 37260] reward=-120000881.1 actor_loss=0.4104 critic_loss=150216132987.2592 entropy=17.8047 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 37260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538485.1 mean_steps=13.4
|
|
[Episode 37270] reward=-120478161.1 actor_loss=0.2031 critic_loss=154128974336.0000 entropy=17.7889 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 37280] reward=-119591924.0 actor_loss=0.2844 critic_loss=151229140796.9524 entropy=17.7926 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 37280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481408.9 mean_steps=14.2
|
|
[Episode 37290] reward=-119193525.7 actor_loss=0.2390 critic_loss=157376170522.9474 entropy=17.7895 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 37300] reward=-120189599.0 actor_loss=0.2607 critic_loss=153932689603.0476 entropy=17.7957 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 37300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-480473.2 mean_steps=15.9
|
|
[Episode 37310] reward=-114517846.2 actor_loss=0.3105 critic_loss=154965875097.6000 entropy=17.7812 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 37320] reward=-118891258.4 actor_loss=0.3064 critic_loss=151845431068.4445 entropy=17.7627 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 37320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503385.8 mean_steps=15.5
|
|
[Episode 37330] reward=-121538129.2 actor_loss=0.3008 critic_loss=154301563562.6667 entropy=17.7599 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 37340] reward=-120873121.9 actor_loss=0.2416 critic_loss=152153297884.6897 entropy=17.7487 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 37340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438187.4 mean_steps=15.5
|
|
[Episode 37350] reward=-124574855.7 actor_loss=0.3295 critic_loss=171337607122.4889 entropy=17.7414 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 37360] reward=-117568193.0 actor_loss=0.2538 critic_loss=150186265195.1628 entropy=17.7207 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 37360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-327517.6 mean_steps=16.0
|
|
[Episode 37370] reward=-118154756.9 actor_loss=0.3425 critic_loss=147536189440.0000 entropy=17.7221 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 37380] reward=-115083628.9 actor_loss=0.2963 critic_loss=150433420209.2308 entropy=17.7358 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 37380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-479256.3 mean_steps=16.1
|
|
[Episode 37390] reward=-117935135.0 actor_loss=0.3767 critic_loss=148861992215.2727 entropy=17.7477 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 37400] reward=-115167453.9 actor_loss=0.3621 critic_loss=147640849905.3714 entropy=17.7643 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 37400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-628733.8 mean_steps=14.2
|
|
[Episode 37410] reward=-119426672.6 actor_loss=0.2968 critic_loss=154117295152.7619 entropy=17.7573 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 37420] reward=-119084505.3 actor_loss=0.2889 critic_loss=145031253125.5652 entropy=17.7574 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 37420] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-349143.1 mean_steps=17.6
|
|
[Episode 37430] reward=-122897667.0 actor_loss=0.3211 critic_loss=157820306897.4546 entropy=17.7424 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 37440] reward=-118115771.0 actor_loss=0.3249 critic_loss=153334085093.0526 entropy=17.7283 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 37440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534964.1 mean_steps=13.3
|
|
[Episode 37450] reward=-120835967.3 actor_loss=0.2590 critic_loss=154167982984.9302 entropy=17.7233 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 37460] reward=-116539863.3 actor_loss=0.4555 critic_loss=146235906785.2800 entropy=17.7304 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 37460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439291.7 mean_steps=15.7
|
|
[Episode 37470] reward=-114482564.1 actor_loss=0.2514 critic_loss=136958820923.5349 entropy=17.7316 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 37480] reward=-123783530.0 actor_loss=0.3302 critic_loss=273012422883.5555 entropy=17.7340 approx_kl=0.0060 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 37480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451076.3 mean_steps=14.8
|
|
[Episode 37490] reward=-186499841.5 actor_loss=0.3327 critic_loss=15499335186659.5547 entropy=17.7377 approx_kl=0.0041 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 37500] reward=-131514264.5 actor_loss=0.3033 critic_loss=961068561959.3846 entropy=17.7521 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 37500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572734.8 mean_steps=12.7
|
|
[Episode 37510] reward=-124363488.3 actor_loss=0.1940 critic_loss=154312557012.1143 entropy=17.7596 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 37520] reward=-123105884.0 actor_loss=0.2576 critic_loss=157351670579.2000 entropy=17.7883 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 37520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444152.6 mean_steps=16.0
|
|
[Episode 37530] reward=-121042189.2 actor_loss=0.2714 critic_loss=156928510004.5128 entropy=17.7592 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 37540] reward=-123842038.1 actor_loss=0.2987 critic_loss=173985110173.5385 entropy=17.7524 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 37540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-486722.3 mean_steps=15.6
|
|
[Episode 37550] reward=-120580200.4 actor_loss=0.2139 critic_loss=160715062710.8571 entropy=17.7397 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 37560] reward=-123678523.2 actor_loss=0.2403 critic_loss=154651958227.4783 entropy=17.7354 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 37560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553003.6 mean_steps=13.4
|
|
[Episode 37570] reward=-111858001.8 actor_loss=0.3761 critic_loss=141910517504.0000 entropy=17.7113 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 37580] reward=-120776449.2 actor_loss=0.2616 critic_loss=157500523625.9310 entropy=17.7036 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 37580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359037.6 mean_steps=16.1
|
|
[Episode 37590] reward=-116063636.1 actor_loss=0.4043 critic_loss=146840042291.2000 entropy=17.7028 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 37600] reward=-122604398.6 actor_loss=0.2599 critic_loss=153048150126.7027 entropy=17.6891 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 37600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475543.6 mean_steps=14.9
|
|
[Episode 37610] reward=-118270234.5 actor_loss=0.2667 critic_loss=251719920298.6667 entropy=17.6982 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 37620] reward=-118200648.6 actor_loss=0.3411 critic_loss=146828219572.7059 entropy=17.6880 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 37620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441020.2 mean_steps=15.7
|
|
[Episode 37630] reward=-119101085.0 actor_loss=0.2893 critic_loss=151466027690.6667 entropy=17.6812 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 37640] reward=-120455747.5 actor_loss=0.2147 critic_loss=149232900388.5714 entropy=17.6985 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 37640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-543171.2 mean_steps=15.5
|
|
[Episode 37650] reward=-116681439.5 actor_loss=0.3048 critic_loss=148822329116.4445 entropy=17.6784 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 37660] reward=-121906062.1 actor_loss=0.3278 critic_loss=156920232398.4516 entropy=17.6805 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 37660] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-456003.3 mean_steps=16.9
|
|
[Episode 37670] reward=-116567720.9 actor_loss=0.3699 critic_loss=142719061138.2857 entropy=17.6818 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 37680] reward=-116222075.2 actor_loss=0.2414 critic_loss=146860637608.5854 entropy=17.6710 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 37680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524787.1 mean_steps=13.9
|
|
[Episode 37690] reward=-113376200.7 actor_loss=0.2930 critic_loss=137847751773.0909 entropy=17.6528 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 37700] reward=-119526124.1 actor_loss=0.2657 critic_loss=146526127970.4615 entropy=17.6594 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 37700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349585.5 mean_steps=17.0
|
|
[Episode 37710] reward=-122812314.5 actor_loss=0.2328 critic_loss=155298553309.8667 entropy=17.6462 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 37720] reward=-119325276.2 actor_loss=0.3537 critic_loss=147759053423.3044 entropy=17.6650 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 37720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442858.3 mean_steps=14.9
|
|
[Episode 37730] reward=-114749467.0 actor_loss=0.2864 critic_loss=146629919890.2857 entropy=17.6891 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 37740] reward=-122236938.0 actor_loss=0.1859 critic_loss=155610154780.4445 entropy=17.6871 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 37740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-344323.2 mean_steps=15.8
|
|
[Episode 37750] reward=-121460363.8 actor_loss=0.1412 critic_loss=146149558784.0000 entropy=17.6752 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 37760] reward=-121017221.6 actor_loss=0.3678 critic_loss=152328447441.4546 entropy=17.6719 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 37760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-487375.9 mean_steps=15.2
|
|
[Episode 37770] reward=-117293444.8 actor_loss=0.2467 critic_loss=139617753861.6889 entropy=17.6663 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 37780] reward=-119973929.1 actor_loss=0.2058 critic_loss=150398661142.2609 entropy=17.6712 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 37780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442494.4 mean_steps=15.8
|
|
[Episode 37790] reward=-121396896.8 actor_loss=0.3044 critic_loss=155348750336.0000 entropy=17.6691 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 37800] reward=-121954241.1 actor_loss=0.1546 critic_loss=154045244837.6471 entropy=17.6686 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 37800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536221.5 mean_steps=14.6
|
|
[Episode 37810] reward=-122148398.2 actor_loss=0.2654 critic_loss=153779743766.7556 entropy=17.6782 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 37820] reward=-118682268.8 actor_loss=0.3150 critic_loss=148580551338.6667 entropy=17.6814 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 37820] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-317675.8 mean_steps=17.5
|
|
[Episode 37830] reward=-119082369.3 actor_loss=0.2174 critic_loss=147775975424.0000 entropy=17.6751 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 37840] reward=-114957790.9 actor_loss=0.2221 critic_loss=145608716072.4211 entropy=17.6746 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 37840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448439.2 mean_steps=14.6
|
|
[Episode 37850] reward=-115327005.7 actor_loss=0.3861 critic_loss=141381472138.9714 entropy=17.6729 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 37860] reward=-139348079.0 actor_loss=0.2572 critic_loss=1696365304685.7144 entropy=17.6621 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 37860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419071.0 mean_steps=14.7
|
|
[Episode 37870] reward=-119454741.4 actor_loss=0.1807 critic_loss=166329535977.7391 entropy=17.6530 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 37880] reward=-122343483.3 actor_loss=0.2848 critic_loss=159701673004.5217 entropy=17.6485 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 37880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454091.3 mean_steps=14.8
|
|
[Episode 37890] reward=-117549755.7 actor_loss=0.3678 critic_loss=149313649825.6842 entropy=17.6388 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 37900] reward=-118011124.1 actor_loss=0.2666 critic_loss=151817211708.9524 entropy=17.6423 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 37900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-575034.5 mean_steps=14.6
|
|
[Episode 37910] reward=-120160930.7 actor_loss=0.2292 critic_loss=147018879720.7273 entropy=17.6413 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 37920] reward=-121316983.7 actor_loss=0.2620 critic_loss=150220286397.2174 entropy=17.6447 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 37920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-511601.3 mean_steps=15.2
|
|
[Episode 37930] reward=-122777435.5 actor_loss=0.3086 critic_loss=152403938707.3940 entropy=17.6497 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 37940] reward=-119781670.2 actor_loss=0.2542 critic_loss=147988848861.4054 entropy=17.6374 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 37940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537171.9 mean_steps=13.2
|
|
[Episode 37950] reward=-119919687.2 actor_loss=0.3056 critic_loss=151878429144.6154 entropy=17.6335 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 37960] reward=-119622884.8 actor_loss=0.3059 critic_loss=153786371150.7692 entropy=17.6299 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 37960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366407.1 mean_steps=16.2
|
|
[Episode 37970] reward=-118089022.9 actor_loss=0.2732 critic_loss=146106830848.0000 entropy=17.6227 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 37980] reward=-119678802.5 actor_loss=0.3021 critic_loss=158009874724.5714 entropy=17.6281 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 37980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500323.0 mean_steps=13.6
|
|
[Episode 37990] reward=-119321321.6 actor_loss=0.3082 critic_loss=152438087680.0000 entropy=17.6374 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 38000] reward=-123508593.9 actor_loss=0.2674 critic_loss=154943725021.8667 entropy=17.6381 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 38000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481242.9 mean_steps=14.7
|
|
[Episode 38010] reward=-120250780.1 actor_loss=0.2747 critic_loss=147549127368.3478 entropy=17.6322 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 38020] reward=-115243638.2 actor_loss=0.3436 critic_loss=142824281793.4222 entropy=17.6336 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 38020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546849.1 mean_steps=13.6
|
|
[Episode 38030] reward=-123799853.8 actor_loss=0.2761 critic_loss=153575652631.2727 entropy=17.6354 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 38040] reward=-118850493.6 actor_loss=0.3166 critic_loss=146017113281.7297 entropy=17.6302 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 38040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-508204.2 mean_steps=15.0
|
|
[Episode 38050] reward=-124427113.4 actor_loss=0.3500 critic_loss=161594044731.0769 entropy=17.6360 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 38060] reward=-116843640.3 actor_loss=0.3443 critic_loss=150492953941.3333 entropy=17.6256 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 38060] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-319883.4 mean_steps=17.9
|
|
[Episode 38070] reward=-112200694.0 actor_loss=0.3111 critic_loss=144364310869.3333 entropy=17.6215 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 38080] reward=-119493558.8 actor_loss=0.2505 critic_loss=144520493283.5555 entropy=17.6398 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 38080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-353038.8 mean_steps=17.6
|
|
[Episode 38090] reward=-119901847.0 actor_loss=0.2709 critic_loss=145463142809.6000 entropy=17.6283 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 38100] reward=-120833568.0 actor_loss=0.3139 critic_loss=158806907859.4783 entropy=17.6260 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 38100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-562365.2 mean_steps=13.8
|
|
[Episode 38110] reward=-119529547.0 actor_loss=0.2806 critic_loss=157393427894.8571 entropy=17.6236 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 38120] reward=-124700838.2 actor_loss=0.3695 critic_loss=264617400729.6000 entropy=17.6127 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 38120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499575.6 mean_steps=14.1
|
|
[Episode 38130] reward=-118361038.6 actor_loss=0.2844 critic_loss=146445622665.8462 entropy=17.6133 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 38140] reward=-123123380.7 actor_loss=0.3157 critic_loss=153873652849.7778 entropy=17.6264 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 38140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-526140.6 mean_steps=15.8
|
|
[Episode 38150] reward=-117381882.1 actor_loss=0.2294 critic_loss=156287137412.7408 entropy=17.6328 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 38160] reward=-117729723.3 actor_loss=0.2881 critic_loss=149584296618.6667 entropy=17.6483 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 38160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517698.5 mean_steps=14.7
|
|
[Episode 38170] reward=-116716616.7 actor_loss=0.3214 critic_loss=150733184341.3333 entropy=17.6547 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 38180] reward=-120839783.0 actor_loss=0.3212 critic_loss=153558189592.3810 entropy=17.6544 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 38180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501673.9 mean_steps=14.0
|
|
[Episode 38190] reward=-122651186.5 actor_loss=0.3437 critic_loss=153734103040.0000 entropy=17.6558 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 38200] reward=-119130195.7 actor_loss=0.3071 critic_loss=153989181067.6364 entropy=17.6553 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 38200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-496848.1 mean_steps=16.2
|
|
[Episode 38210] reward=-117786916.9 actor_loss=0.3202 critic_loss=149338092495.2381 entropy=17.6465 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 38220] reward=-116535914.6 actor_loss=0.2723 critic_loss=160818770505.1429 entropy=17.6413 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 38220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-429865.5 mean_steps=13.4
|
|
[Episode 38230] reward=-124090066.8 actor_loss=0.2025 critic_loss=163649636644.5714 entropy=17.6413 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 38240] reward=-115930040.4 actor_loss=0.3435 critic_loss=149391029733.0526 entropy=17.6462 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 38240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431078.3 mean_steps=15.2
|
|
[Episode 38250] reward=-117413112.2 actor_loss=0.2684 critic_loss=148650868345.9048 entropy=17.6562 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 38260] reward=-113378996.7 actor_loss=0.4761 critic_loss=144725756928.0000 entropy=17.6469 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Eval 38260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549289.8 mean_steps=14.3
|
|
[Episode 38270] reward=-119268039.4 actor_loss=0.3619 critic_loss=151112482071.2727 entropy=17.6518 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 38280] reward=-118319876.4 actor_loss=0.2494 critic_loss=152205123256.3200 entropy=17.6536 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 38280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555290.8 mean_steps=12.7
|
|
[Episode 38290] reward=-119761575.1 actor_loss=0.2215 critic_loss=150020993558.2609 entropy=17.6585 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 38300] reward=-118128542.3 actor_loss=0.2109 critic_loss=152610666945.5610 entropy=17.6652 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 38300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431253.5 mean_steps=14.6
|
|
[Episode 38310] reward=-123829002.4 actor_loss=0.1884 critic_loss=165438065198.5454 entropy=17.6822 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 38320] reward=-121915817.2 actor_loss=0.3092 critic_loss=152203762619.7333 entropy=17.6812 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 38320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433926.5 mean_steps=15.6
|
|
[Episode 38330] reward=-118566121.5 actor_loss=0.3513 critic_loss=154422556829.5385 entropy=17.6887 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 38340] reward=-120798099.8 actor_loss=0.4332 critic_loss=158633089778.5263 entropy=17.6871 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 38340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-631369.4 mean_steps=11.9
|
|
[Episode 38350] reward=-117711774.5 actor_loss=0.3564 critic_loss=148297995185.2308 entropy=17.6941 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 38360] reward=-117865248.3 actor_loss=0.2696 critic_loss=149918497555.6923 entropy=17.6853 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 38360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-499434.8 mean_steps=13.6
|
|
[Episode 38370] reward=-116929620.4 actor_loss=0.3988 critic_loss=147529250523.4286 entropy=17.6704 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 38380] reward=-118837129.3 actor_loss=0.2384 critic_loss=153053015381.3333 entropy=17.6689 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 38380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-350119.8 mean_steps=17.4
|
|
[Episode 38390] reward=-119322735.2 actor_loss=0.3848 critic_loss=152003485878.0444 entropy=17.6703 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 38400] reward=-125194810.4 actor_loss=0.3275 critic_loss=159479220435.8621 entropy=17.6599 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 38400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521054.5 mean_steps=13.4
|
|
[Episode 38410] reward=-112524665.0 actor_loss=0.3809 critic_loss=140129979112.7273 entropy=17.6557 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 38420] reward=-118919162.4 actor_loss=0.2401 critic_loss=146647233194.6667 entropy=17.6483 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 38420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-627914.3 mean_steps=13.2
|
|
[Episode 38430] reward=-119664358.8 actor_loss=0.2965 critic_loss=154160797509.8182 entropy=17.6489 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 38440] reward=-123188505.8 actor_loss=0.2271 critic_loss=171360768585.1429 entropy=17.6406 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 38440] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-683484.5 mean_steps=11.4
|
|
[Episode 38450] reward=-124149479.7 actor_loss=0.2602 critic_loss=164760963832.6857 entropy=17.6422 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 38460] reward=-125802966.4 actor_loss=0.2822 critic_loss=175500178612.7059 entropy=17.6389 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 38460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520002.7 mean_steps=15.3
|
|
[Episode 38470] reward=-116745352.8 actor_loss=0.2712 critic_loss=145762369536.0000 entropy=17.6316 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 38480] reward=-122026931.3 actor_loss=0.2541 critic_loss=158075534995.9111 entropy=17.6313 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 38480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435495.0 mean_steps=15.4
|
|
[Episode 38490] reward=-119016370.7 actor_loss=0.2021 critic_loss=153644296601.6000 entropy=17.6385 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1243 front_blocked=0
|
|
[Episode 38500] reward=-115436194.5 actor_loss=0.3283 critic_loss=149339928486.9565 entropy=17.6127 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 38500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-664175.5 mean_steps=12.6
|
|
[Episode 38510] reward=-122142697.1 actor_loss=0.2947 critic_loss=155358790412.1905 entropy=17.6123 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 38520] reward=-123104539.9 actor_loss=0.2985 critic_loss=220971216987.0222 entropy=17.6322 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 38520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531382.1 mean_steps=13.6
|
|
[Episode 38530] reward=-120706843.9 actor_loss=0.3898 critic_loss=157002780299.6364 entropy=17.6317 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 38540] reward=-115299511.4 actor_loss=0.3066 critic_loss=145763538582.5882 entropy=17.6278 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 38540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386763.3 mean_steps=15.9
|
|
[Episode 38550] reward=-117852185.6 actor_loss=0.2478 critic_loss=151318247911.6190 entropy=17.6421 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 38560] reward=-121550113.6 actor_loss=0.2177 critic_loss=158392174778.1818 entropy=17.6461 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 38560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-469317.4 mean_steps=15.8
|
|
[Episode 38570] reward=-120115034.4 actor_loss=0.1828 critic_loss=150912923710.0606 entropy=17.6474 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 38580] reward=-117960197.7 actor_loss=0.3904 critic_loss=151637517107.2000 entropy=17.6440 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 38580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571105.5 mean_steps=14.2
|
|
[Episode 38590] reward=-120113550.1 actor_loss=0.3166 critic_loss=148192963242.6667 entropy=17.6495 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 38600] reward=-117914780.3 actor_loss=0.2852 critic_loss=146418195186.5263 entropy=17.6590 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 38600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-377305.0 mean_steps=15.5
|
|
[Episode 38610] reward=-119015680.0 actor_loss=0.3518 critic_loss=148167411898.1818 entropy=17.6656 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 38620] reward=-114473456.9 actor_loss=0.3434 critic_loss=143582049460.7059 entropy=17.6622 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 38620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537068.5 mean_steps=14.8
|
|
[Episode 38630] reward=-120425794.6 actor_loss=0.4141 critic_loss=158791790376.4211 entropy=17.6582 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 38640] reward=-121561211.3 actor_loss=0.2158 critic_loss=152475770880.0000 entropy=17.6571 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 38640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466905.4 mean_steps=15.1
|
|
[Episode 38650] reward=-117731464.3 actor_loss=0.2891 critic_loss=149093325677.7143 entropy=17.6487 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 38660] reward=-121226937.1 actor_loss=0.2034 critic_loss=149221100384.7111 entropy=17.6359 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 38660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596498.0 mean_steps=13.1
|
|
[Episode 38670] reward=-114670421.2 actor_loss=0.2524 critic_loss=145703060318.3158 entropy=17.6288 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 38680] reward=-123658152.0 actor_loss=0.2447 critic_loss=156906252190.4762 entropy=17.6349 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 38680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-440119.2 mean_steps=16.4
|
|
[Episode 38690] reward=-120506539.1 actor_loss=0.2932 critic_loss=149669125351.2258 entropy=17.6655 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 38700] reward=-123264488.1 actor_loss=0.1927 critic_loss=161697567305.1429 entropy=17.6724 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 38700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500786.6 mean_steps=15.2
|
|
[Episode 38710] reward=-121221594.0 actor_loss=0.3395 critic_loss=155862626986.6667 entropy=17.6638 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 38720] reward=-121079070.2 actor_loss=0.1429 critic_loss=152529549312.0000 entropy=17.6476 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 38720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513232.9 mean_steps=14.2
|
|
[Episode 38730] reward=-125923940.0 actor_loss=0.2523 critic_loss=158062445256.3478 entropy=17.6529 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 38740] reward=-121372830.9 actor_loss=0.2599 critic_loss=151816609792.0000 entropy=17.6557 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 38740] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-689942.1 mean_steps=11.7
|
|
[Episode 38750] reward=-119547820.5 actor_loss=0.3050 critic_loss=153237585920.0000 entropy=17.6745 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 38760] reward=-121463611.3 actor_loss=0.2071 critic_loss=149209401116.4445 entropy=17.6875 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 38760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419531.9 mean_steps=15.5
|
|
[Episode 38770] reward=-120181234.1 actor_loss=0.3960 critic_loss=149384727990.8571 entropy=17.6812 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 38780] reward=-122863261.0 actor_loss=0.3365 critic_loss=157525000556.0889 entropy=17.6925 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 38780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-483199.8 mean_steps=15.9
|
|
[Episode 38790] reward=-126126143.5 actor_loss=0.2178 critic_loss=267360501760.0000 entropy=17.6936 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 38800] reward=-120815556.6 actor_loss=0.2644 critic_loss=157850221961.8462 entropy=17.6957 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 38800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507312.8 mean_steps=14.1
|
|
[Episode 38810] reward=-115397756.0 actor_loss=0.3257 critic_loss=144939728896.0000 entropy=17.6851 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 38820] reward=-122343654.7 actor_loss=0.3500 critic_loss=150298779830.0444 entropy=17.6687 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 38820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403977.0 mean_steps=15.6
|
|
[Episode 38830] reward=-117500585.0 actor_loss=0.3452 critic_loss=143878497894.4000 entropy=17.6861 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 38840] reward=-121874674.9 actor_loss=0.3351 critic_loss=151349660186.9474 entropy=17.7009 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 38840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-467645.8 mean_steps=15.6
|
|
[Episode 38850] reward=-120660985.2 actor_loss=0.3074 critic_loss=148762495549.4400 entropy=17.6968 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 38860] reward=-124564234.2 actor_loss=0.3132 critic_loss=156890797085.2571 entropy=17.7041 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 38860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552571.0 mean_steps=14.2
|
|
[Episode 38870] reward=-120808273.5 actor_loss=0.2053 critic_loss=153304402392.6154 entropy=17.6912 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 38880] reward=-119626129.0 actor_loss=0.2809 critic_loss=148968639450.0741 entropy=17.6968 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 38880] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-705996.9 mean_steps=11.8
|
|
[Episode 38890] reward=-124511911.2 actor_loss=0.2023 critic_loss=155819284480.0000 entropy=17.6875 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 38900] reward=-116999096.4 actor_loss=0.2983 critic_loss=139974945060.5714 entropy=17.6873 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 38900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544375.6 mean_steps=13.2
|
|
[Episode 38910] reward=-112970452.0 actor_loss=0.3763 critic_loss=136436240501.0286 entropy=17.6874 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 38920] reward=-119947115.4 actor_loss=0.2730 critic_loss=148725707676.9032 entropy=17.6870 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 38920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-380171.6 mean_steps=15.1
|
|
[Episode 38930] reward=-122286365.7 actor_loss=0.2469 critic_loss=157781994859.3548 entropy=17.6887 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 38940] reward=-125479364.0 actor_loss=0.1662 critic_loss=159447106846.7200 entropy=17.6990 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 38940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525214.4 mean_steps=14.4
|
|
[Episode 38950] reward=-118717337.2 actor_loss=0.2284 critic_loss=144986593962.6667 entropy=17.7139 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 38960] reward=-119538158.0 actor_loss=0.2707 critic_loss=145074202985.4118 entropy=17.7045 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 38960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525262.0 mean_steps=13.8
|
|
[Episode 38970] reward=-122147361.1 actor_loss=0.2119 critic_loss=154321447594.6667 entropy=17.7087 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 38980] reward=-119500567.2 actor_loss=0.1932 critic_loss=142752473415.6800 entropy=17.7042 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 38980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-440867.7 mean_steps=13.4
|
|
[Episode 38990] reward=-121141670.4 actor_loss=0.2303 critic_loss=154154429098.6667 entropy=17.7084 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 39000] reward=-123360596.8 actor_loss=0.3013 critic_loss=154808348867.0476 entropy=17.7090 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 39000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500481.0 mean_steps=14.1
|
|
[Episode 39010] reward=-121741207.0 actor_loss=0.2646 critic_loss=155912623226.8800 entropy=17.7023 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 39020] reward=-122187701.4 actor_loss=0.3316 critic_loss=153694124152.4706 entropy=17.6979 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-616767.9 mean_steps=12.1
|
|
[Episode 39030] reward=-120933820.6 actor_loss=0.1835 critic_loss=152921000618.6667 entropy=17.7060 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 39040] reward=-124497484.0 actor_loss=0.1862 critic_loss=153088607337.9310 entropy=17.7057 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 39040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530574.2 mean_steps=13.1
|
|
[Episode 39050] reward=-118962142.3 actor_loss=0.2574 critic_loss=147740362524.4445 entropy=17.7056 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 39060] reward=-117193619.8 actor_loss=0.3301 critic_loss=142715791509.8537 entropy=17.7107 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-497254.3 mean_steps=14.9
|
|
[Episode 39070] reward=-123158659.1 actor_loss=0.2687 critic_loss=148985143842.1333 entropy=17.7210 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 39080] reward=-119694090.9 actor_loss=0.2877 critic_loss=152378273955.8400 entropy=17.7153 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 39080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-499502.1 mean_steps=15.0
|
|
[Episode 39090] reward=-120650394.8 actor_loss=0.2859 critic_loss=150571793703.8222 entropy=17.7256 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 39100] reward=-122858123.5 actor_loss=0.3228 critic_loss=151670079123.9111 entropy=17.7118 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 39100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512790.7 mean_steps=13.9
|
|
[Episode 39110] reward=-119977804.1 actor_loss=0.2361 critic_loss=149613859188.3636 entropy=17.7077 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 39120] reward=-120951569.9 actor_loss=0.3517 critic_loss=150143574497.8824 entropy=17.7159 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 39120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-640085.6 mean_steps=13.1
|
|
[Episode 39130] reward=-116592966.3 actor_loss=0.2795 critic_loss=139302860998.1935 entropy=17.7017 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 39140] reward=-120685768.4 actor_loss=0.3252 critic_loss=158675491756.9730 entropy=17.6948 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-415737.1 mean_steps=16.0
|
|
[Episode 39150] reward=-124322375.1 actor_loss=0.3006 critic_loss=152449063321.6000 entropy=17.6919 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 39160] reward=-119786530.3 actor_loss=0.2759 critic_loss=151972065006.9333 entropy=17.7054 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 39160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-515956.6 mean_steps=14.7
|
|
[Episode 39170] reward=-122658373.0 actor_loss=0.2925 critic_loss=152384656564.7059 entropy=17.7004 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 39180] reward=-125447806.2 actor_loss=0.2599 critic_loss=155072583725.5111 entropy=17.6949 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382777.5 mean_steps=14.8
|
|
[Episode 39190] reward=-122270626.8 actor_loss=0.2982 critic_loss=153371474602.6667 entropy=17.6891 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 39200] reward=-121083054.7 actor_loss=0.2629 critic_loss=153278355137.4222 entropy=17.6926 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 39200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588110.1 mean_steps=13.4
|
|
[Episode 39210] reward=-126336792.7 actor_loss=0.2078 critic_loss=159816464197.8182 entropy=17.6902 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 39220] reward=-119647380.7 actor_loss=0.2901 critic_loss=151950901794.1333 entropy=17.6952 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 39220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419216.5 mean_steps=15.2
|
|
[Episode 39230] reward=-117129045.0 actor_loss=0.2542 critic_loss=146529570360.8889 entropy=17.6935 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 39240] reward=-123971319.7 actor_loss=0.2866 critic_loss=152622516724.6222 entropy=17.6913 approx_kl=0.0109 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 39240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-545584.0 mean_steps=13.8
|
|
[Episode 39250] reward=-119502378.8 actor_loss=0.3341 critic_loss=152306772413.2174 entropy=17.6826 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 39260] reward=-126048866.8 actor_loss=0.2226 critic_loss=160394990569.2444 entropy=17.6777 approx_kl=0.0100 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 39260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-521873.2 mean_steps=12.4
|
|
[Episode 39270] reward=-119971813.4 actor_loss=0.2347 critic_loss=145113889450.6667 entropy=17.6677 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 39280] reward=-120641434.2 actor_loss=0.3454 critic_loss=152224075776.0000 entropy=17.6512 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 39280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487289.1 mean_steps=14.1
|
|
[Episode 39290] reward=-122132651.7 actor_loss=0.2590 critic_loss=154744988647.0244 entropy=17.6622 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 39300] reward=-124571621.7 actor_loss=0.2745 critic_loss=155962058938.1818 entropy=17.6602 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 39300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413501.3 mean_steps=16.6
|
|
[Episode 39310] reward=-119720297.2 actor_loss=0.3683 critic_loss=150583049420.8000 entropy=17.6636 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 39320] reward=-125577953.6 actor_loss=0.2762 critic_loss=159853203634.0869 entropy=17.6622 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 39320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-418748.6 mean_steps=14.7
|
|
[Episode 39330] reward=-125221175.1 actor_loss=0.2390 critic_loss=156479438483.9111 entropy=17.6602 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 39340] reward=-118919031.4 actor_loss=0.2806 critic_loss=150309058641.9200 entropy=17.6629 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 39340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-460164.4 mean_steps=16.8
|
|
[Episode 39350] reward=-119032321.4 actor_loss=0.3130 critic_loss=164079277634.7826 entropy=17.6645 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 39360] reward=-114990757.7 actor_loss=0.3779 critic_loss=139554267136.0000 entropy=17.6615 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 39360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-350447.2 mean_steps=16.2
|
|
[Episode 39370] reward=-127726684.2 actor_loss=0.3414 critic_loss=303460900864.0000 entropy=17.6598 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 39380] reward=-115450098.5 actor_loss=0.2513 critic_loss=194042106493.1555 entropy=17.6713 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 39380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560656.2 mean_steps=12.8
|
|
[Episode 39390] reward=-120506117.1 actor_loss=0.2005 critic_loss=152510536817.7778 entropy=17.6613 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 39400] reward=-119590289.1 actor_loss=0.3618 critic_loss=149320293034.6667 entropy=17.6661 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 39400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520790.9 mean_steps=14.2
|
|
[Episode 39410] reward=-123164511.2 actor_loss=0.2544 critic_loss=154332297126.9565 entropy=17.6607 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 39420] reward=-121715660.1 actor_loss=0.2562 critic_loss=144746899956.6222 entropy=17.6474 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-380720.1 mean_steps=15.9
|
|
[Episode 39430] reward=-121591909.2 actor_loss=0.3161 critic_loss=153666275494.0540 entropy=17.6483 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 39440] reward=-125404033.0 actor_loss=0.2677 critic_loss=157422253093.9259 entropy=17.6528 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 39440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-493783.3 mean_steps=33.5
|
|
[Episode 39450] reward=-123737348.1 actor_loss=0.2517 critic_loss=150817982964.6222 entropy=17.6435 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 39460] reward=-117291263.7 actor_loss=0.4792 critic_loss=195822920499.2000 entropy=17.6419 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 39460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368795.9 mean_steps=15.9
|
|
[Episode 39470] reward=-119877467.0 actor_loss=0.3076 critic_loss=152819737897.2903 entropy=17.6297 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 39480] reward=-116594440.9 actor_loss=0.3158 critic_loss=141114470084.9231 entropy=17.6323 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 39480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435134.1 mean_steps=15.7
|
|
[Episode 39490] reward=-117432660.3 actor_loss=0.3722 critic_loss=147116562245.8182 entropy=17.6200 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 39500] reward=-115812761.9 actor_loss=0.3064 critic_loss=145363519867.2592 entropy=17.6347 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 39500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460752.2 mean_steps=15.1
|
|
[Episode 39510] reward=-122168694.2 actor_loss=0.3160 critic_loss=155521376477.4054 entropy=17.6198 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 39520] reward=-119197353.9 actor_loss=0.3338 critic_loss=153561299353.6000 entropy=17.6202 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 39520] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-661943.8 mean_steps=11.1
|
|
[Episode 39530] reward=-121981574.6 actor_loss=0.3490 critic_loss=153660240691.2000 entropy=17.6235 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 39540] reward=-123441677.0 actor_loss=0.1601 critic_loss=151353492184.1778 entropy=17.6240 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 39540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-493320.5 mean_steps=15.8
|
|
[Episode 39550] reward=-122793846.3 actor_loss=0.2841 critic_loss=154258948096.0000 entropy=17.6145 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 39560] reward=-120998599.0 actor_loss=0.3042 critic_loss=151692755656.3478 entropy=17.6271 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 39560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-409856.4 mean_steps=14.7
|
|
[Episode 39570] reward=-113969532.0 actor_loss=0.3447 critic_loss=135013009362.4889 entropy=17.6189 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 39580] reward=-124301446.8 actor_loss=0.3471 critic_loss=158741176320.0000 entropy=17.6238 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 39580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-622862.4 mean_steps=13.3
|
|
[Episode 39590] reward=-124243571.8 actor_loss=0.2377 critic_loss=152581673292.1081 entropy=17.6164 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 39600] reward=-120955850.4 actor_loss=0.2845 critic_loss=148980415087.3044 entropy=17.6197 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 39600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547862.3 mean_steps=13.6
|
|
[Episode 39610] reward=-121946531.6 actor_loss=0.3247 critic_loss=152033684480.0000 entropy=17.6261 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 39620] reward=-125710015.5 actor_loss=0.2116 critic_loss=152648882585.6000 entropy=17.6216 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 39620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536586.4 mean_steps=13.7
|
|
[Episode 39630] reward=-116170457.8 actor_loss=0.2885 critic_loss=143120123676.4445 entropy=17.6192 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 39640] reward=-119411608.0 actor_loss=0.3917 critic_loss=229986950099.4783 entropy=17.6210 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 39640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523113.7 mean_steps=14.2
|
|
[Episode 39650] reward=-122211135.6 actor_loss=0.2178 critic_loss=146303810323.6923 entropy=17.6203 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 39660] reward=-122318001.0 actor_loss=0.3375 critic_loss=150680457974.5185 entropy=17.6216 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 39660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488605.7 mean_steps=14.2
|
|
[Episode 39670] reward=-122301898.1 actor_loss=0.2968 critic_loss=152373730344.9600 entropy=17.6180 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 39680] reward=-124654434.1 actor_loss=0.2283 critic_loss=153194471424.0000 entropy=17.5998 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 39680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391741.0 mean_steps=16.1
|
|
[Episode 39690] reward=-120251052.4 actor_loss=0.2850 critic_loss=149828851939.5555 entropy=17.6066 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 39700] reward=-120661617.7 actor_loss=0.2486 critic_loss=148094405500.7180 entropy=17.6048 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 39700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-575452.6 mean_steps=14.8
|
|
[Episode 39710] reward=-119222227.7 actor_loss=0.2871 critic_loss=148733840203.2941 entropy=17.6045 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 39720] reward=-123573413.9 actor_loss=0.2814 critic_loss=149986937370.9474 entropy=17.5935 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 39720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470609.6 mean_steps=13.9
|
|
[Episode 39730] reward=-118722205.7 actor_loss=0.3259 critic_loss=145857450257.0667 entropy=17.5879 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 39740] reward=-119147741.5 actor_loss=0.2689 critic_loss=146846181108.8696 entropy=17.5864 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 39740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-352060.9 mean_steps=17.3
|
|
[Episode 39750] reward=-122299898.1 actor_loss=0.2787 critic_loss=148755886665.1429 entropy=17.5919 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 39760] reward=-117391988.6 actor_loss=0.2514 critic_loss=146001592320.0000 entropy=17.6010 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 39760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-551239.1 mean_steps=13.8
|
|
[Episode 39770] reward=-123061127.0 actor_loss=0.2816 critic_loss=149857815756.8000 entropy=17.6025 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 39780] reward=-116050361.0 actor_loss=0.2979 critic_loss=145089764101.6889 entropy=17.5935 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 39780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449570.2 mean_steps=16.1
|
|
[Episode 39790] reward=-126627487.2 actor_loss=0.2172 critic_loss=157958179659.2941 entropy=17.6112 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 39800] reward=-120233175.9 actor_loss=0.3220 critic_loss=149253496285.8667 entropy=17.6070 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 39800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432485.6 mean_steps=15.8
|
|
[Episode 39810] reward=-119198093.5 actor_loss=0.2425 critic_loss=149981137245.6585 entropy=17.6084 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 39820] reward=-118257150.5 actor_loss=0.3334 critic_loss=147001672704.0000 entropy=17.6145 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-420347.1 mean_steps=14.8
|
|
[Episode 39830] reward=-117117502.6 actor_loss=0.3362 critic_loss=146565560818.1622 entropy=17.6300 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 39840] reward=-119628382.8 actor_loss=0.1786 critic_loss=146666559232.0000 entropy=17.6303 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 39840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498871.5 mean_steps=15.2
|
|
[Episode 39850] reward=-117940629.8 actor_loss=0.2354 critic_loss=144229694707.8095 entropy=17.6333 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 39860] reward=-120779005.9 actor_loss=0.3707 critic_loss=146602553716.3636 entropy=17.6343 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 39860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-383647.1 mean_steps=15.2
|
|
[Episode 39870] reward=-122642676.7 actor_loss=0.2840 critic_loss=153408220968.4211 entropy=17.6303 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 39880] reward=-111495922.9 actor_loss=0.4136 critic_loss=138892702586.4348 entropy=17.6230 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 39880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426463.2 mean_steps=14.6
|
|
[Episode 39890] reward=-114896900.4 actor_loss=0.2806 critic_loss=143404261586.0513 entropy=17.6238 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 39900] reward=-124550485.9 actor_loss=0.2852 critic_loss=156192285842.2857 entropy=17.6191 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 39900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500669.0 mean_steps=13.4
|
|
[Episode 39910] reward=-120071669.3 actor_loss=0.2850 critic_loss=156450839756.8000 entropy=17.6135 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 39920] reward=-129042390.3 actor_loss=1.8697 critic_loss=339431520337.9200 entropy=17.6153 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 39920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-560799.7 mean_steps=14.4
|
|
[Episode 39930] reward=-124586487.9 actor_loss=0.2230 critic_loss=150965665792.0000 entropy=17.6285 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 39940] reward=-119968483.1 actor_loss=0.2779 critic_loss=144289776857.2121 entropy=17.6227 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 39940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-562562.4 mean_steps=14.6
|
|
[Episode 39950] reward=-123211066.4 actor_loss=0.2465 critic_loss=153859445396.6452 entropy=17.6262 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 39960] reward=-118640452.5 actor_loss=0.1872 critic_loss=145265039951.6444 entropy=17.6265 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 39960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-499394.1 mean_steps=16.4
|
|
[Episode 39970] reward=-115247596.6 actor_loss=0.3751 critic_loss=142398346945.4222 entropy=17.6466 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 39980] reward=-122202200.1 actor_loss=0.2932 critic_loss=148264306777.0435 entropy=17.6392 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 39980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479555.0 mean_steps=14.1
|
|
[Episode 39990] reward=-122060616.5 actor_loss=0.3400 critic_loss=150145823175.1111 entropy=17.6361 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 40000] reward=-114916531.0 actor_loss=0.3094 critic_loss=147625805470.8965 entropy=17.6356 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 40000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409942.6 mean_steps=15.8
|
|
[Episode 40010] reward=-121020284.6 actor_loss=0.2977 critic_loss=156645130240.0000 entropy=17.6288 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40020] reward=-121098229.9 actor_loss=0.2354 critic_loss=149002897408.0000 entropy=17.6236 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 40020] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-680173.5 mean_steps=11.6
|
|
[Episode 40030] reward=-123189499.5 actor_loss=0.3283 critic_loss=155672351665.2308 entropy=17.6210 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 40040] reward=-124699903.0 actor_loss=0.3444 critic_loss=154737214220.1905 entropy=17.6331 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 40040] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-638778.2 mean_steps=11.6
|
|
[Episode 40050] reward=-121424845.6 actor_loss=0.2719 critic_loss=149197235712.0000 entropy=17.6194 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 40060] reward=-115995445.1 actor_loss=0.2568 critic_loss=138978382994.2857 entropy=17.6233 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 40060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620710.7 mean_steps=12.3
|
|
[Episode 40070] reward=-121228931.8 actor_loss=0.2577 critic_loss=149286221687.4667 entropy=17.5963 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 40080] reward=-121335897.6 actor_loss=0.2294 critic_loss=149103713393.7778 entropy=17.6075 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 40080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542184.8 mean_steps=14.0
|
|
[Episode 40090] reward=-121231776.4 actor_loss=0.2514 critic_loss=150362456792.1778 entropy=17.6181 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 40100] reward=-121658535.2 actor_loss=0.2935 critic_loss=145244301691.2592 entropy=17.6136 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 40100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421835.5 mean_steps=15.7
|
|
[Episode 40110] reward=-114695906.8 actor_loss=0.4822 critic_loss=143459316349.1555 entropy=17.6147 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1530 front_blocked=0
|
|
[Episode 40120] reward=-116991222.4 actor_loss=0.3847 critic_loss=139512655346.8718 entropy=17.6121 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 40120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-375068.3 mean_steps=15.8
|
|
[Episode 40130] reward=-118125292.6 actor_loss=0.3037 critic_loss=144079272891.7333 entropy=17.6194 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 40140] reward=-114917087.0 actor_loss=0.3729 critic_loss=134106245256.5333 entropy=17.6302 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 40140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561592.7 mean_steps=14.2
|
|
[Episode 40150] reward=-119085370.3 actor_loss=0.2439 critic_loss=147773336312.6857 entropy=17.6290 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 40160] reward=-121041839.8 actor_loss=0.2752 critic_loss=146872311326.1176 entropy=17.6308 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 40160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-538593.3 mean_steps=12.4
|
|
[Episode 40170] reward=-119418287.2 actor_loss=0.2478 critic_loss=145287036459.8857 entropy=17.6358 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 40180] reward=-118606741.4 actor_loss=0.3508 critic_loss=145294049280.0000 entropy=17.6341 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 40180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568741.0 mean_steps=13.7
|
|
[Episode 40190] reward=-121485942.4 actor_loss=0.2822 critic_loss=150912539033.6000 entropy=17.6212 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 40200] reward=-120864414.7 actor_loss=0.3332 critic_loss=151043511030.5185 entropy=17.6170 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 40200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575711.2 mean_steps=13.6
|
|
[Episode 40210] reward=-126550300.4 actor_loss=0.1191 critic_loss=154512407210.6667 entropy=17.6134 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 40220] reward=-113562252.1 actor_loss=0.2844 critic_loss=140095328347.0222 entropy=17.6195 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 40220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531644.6 mean_steps=14.6
|
|
[Episode 40230] reward=-120632867.9 actor_loss=0.3197 critic_loss=148023150182.4000 entropy=17.6164 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 40240] reward=-121412902.5 actor_loss=0.2265 critic_loss=151622003214.6286 entropy=17.6215 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 40240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506432.3 mean_steps=14.2
|
|
[Episode 40250] reward=-121611454.4 actor_loss=0.2454 critic_loss=153922886899.8095 entropy=17.6270 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 40260] reward=-116177318.6 actor_loss=0.2685 critic_loss=145103890793.4118 entropy=17.6179 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 40260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540886.4 mean_steps=13.9
|
|
[Episode 40270] reward=-120180738.5 actor_loss=0.2699 critic_loss=149693099030.7556 entropy=17.6240 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 40280] reward=-113378204.1 actor_loss=0.3479 critic_loss=139010158006.8571 entropy=17.5956 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 40280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546622.8 mean_steps=13.9
|
|
[Episode 40290] reward=-122247564.1 actor_loss=0.2621 critic_loss=153156116289.4884 entropy=17.5817 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40300] reward=-110191428.8 actor_loss=0.2986 critic_loss=131484574068.3636 entropy=17.5711 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 40300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473004.8 mean_steps=15.2
|
|
[Episode 40310] reward=-121926640.2 actor_loss=0.3304 critic_loss=150783129510.9565 entropy=17.5703 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 40320] reward=-115506609.0 actor_loss=0.2454 critic_loss=143918352933.4634 entropy=17.5644 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 40320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-652043.4 mean_steps=12.3
|
|
[Episode 40330] reward=-122156834.3 actor_loss=0.2794 critic_loss=151680525562.3111 entropy=17.5681 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40340] reward=-118294022.6 actor_loss=0.2124 critic_loss=148648301909.3333 entropy=17.5644 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 40340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537054.4 mean_steps=13.3
|
|
[Episode 40350] reward=-121888146.1 actor_loss=0.3674 critic_loss=472679532633.0435 entropy=17.5581 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 40360] reward=-108952951.2 actor_loss=0.4801 critic_loss=130815953214.5778 entropy=17.5491 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1504 front_blocked=0
|
|
[Eval 40360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490794.9 mean_steps=14.2
|
|
[Episode 40370] reward=-120027317.4 actor_loss=0.3850 critic_loss=151278901172.1482 entropy=17.5630 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 40380] reward=-119786418.0 actor_loss=0.2350 critic_loss=161492532224.0000 entropy=17.5609 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 40380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386484.7 mean_steps=16.5
|
|
[Episode 40390] reward=-119367375.5 actor_loss=0.2440 critic_loss=145351237996.0889 entropy=17.5688 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 40400] reward=-120123189.3 actor_loss=0.2393 critic_loss=146778574723.8788 entropy=17.5772 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 40400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498105.1 mean_steps=14.1
|
|
[Episode 40410] reward=-113884113.8 actor_loss=0.3575 critic_loss=138635480268.8000 entropy=17.5739 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 40420] reward=-117857539.0 actor_loss=0.3205 critic_loss=148423314016.8649 entropy=17.5642 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 40420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-454303.3 mean_steps=14.2
|
|
[Episode 40430] reward=-115369285.4 actor_loss=0.3134 critic_loss=139326878011.0769 entropy=17.5572 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 40440] reward=-120139801.6 actor_loss=0.1659 critic_loss=146202368000.0000 entropy=17.5716 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 40440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508101.0 mean_steps=14.2
|
|
[Episode 40450] reward=-123153788.9 actor_loss=0.2474 critic_loss=158265279244.1905 entropy=17.5715 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 40460] reward=-117900612.2 actor_loss=0.3771 critic_loss=145627392682.6667 entropy=17.5720 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 40460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-645207.9 mean_steps=13.8
|
|
[Episode 40470] reward=-114798631.3 actor_loss=0.2748 critic_loss=146086842368.0000 entropy=17.5734 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 40480] reward=-120634607.5 actor_loss=0.2907 critic_loss=146739161770.6667 entropy=17.5708 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 40480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522677.1 mean_steps=14.2
|
|
[Episode 40490] reward=-120319010.0 actor_loss=0.3474 critic_loss=152514025026.7826 entropy=17.5696 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 40500] reward=-120435128.3 actor_loss=0.2989 critic_loss=145694646587.0769 entropy=17.5704 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 40500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571444.5 mean_steps=13.7
|
|
[Episode 40510] reward=-116659186.3 actor_loss=0.3261 critic_loss=143020699921.0667 entropy=17.5754 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 40520] reward=-121223860.7 actor_loss=0.3319 critic_loss=151464049394.5263 entropy=17.5948 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 40520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554752.5 mean_steps=12.7
|
|
[Episode 40530] reward=-115232214.6 actor_loss=0.3400 critic_loss=147694311131.4286 entropy=17.5980 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 40540] reward=-118308154.6 actor_loss=0.4068 critic_loss=143509509649.6552 entropy=17.5977 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 40540] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-672969.7 mean_steps=11.4
|
|
[Episode 40550] reward=-122954962.9 actor_loss=0.2181 critic_loss=150731832083.6923 entropy=17.5964 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 40560] reward=-117213824.2 actor_loss=0.2281 critic_loss=142783653478.4000 entropy=17.5762 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 40560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-527818.2 mean_steps=11.9
|
|
[Episode 40570] reward=-119471296.0 actor_loss=0.2921 critic_loss=148818467693.7143 entropy=17.5854 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 40580] reward=-117072967.0 actor_loss=0.3078 critic_loss=139859826635.4872 entropy=17.5913 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 40580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-558293.4 mean_steps=14.4
|
|
[Episode 40590] reward=-121378921.4 actor_loss=0.2841 critic_loss=150296662439.7242 entropy=17.5942 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 40600] reward=-121488085.7 actor_loss=0.2499 critic_loss=144898687180.8000 entropy=17.6068 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 40600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503839.4 mean_steps=14.3
|
|
[Episode 40610] reward=-121762647.7 actor_loss=0.2404 critic_loss=145804952399.4483 entropy=17.6048 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 40620] reward=-124607001.2 actor_loss=0.2735 critic_loss=153087328559.4074 entropy=17.5937 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 40620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438881.7 mean_steps=14.8
|
|
[Episode 40630] reward=-122068328.9 actor_loss=0.3340 critic_loss=149800394436.9231 entropy=17.5936 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 40640] reward=-118040814.9 actor_loss=0.3088 critic_loss=164305183151.1579 entropy=17.6033 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 40640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497904.4 mean_steps=14.1
|
|
[Episode 40650] reward=-111785296.5 actor_loss=0.3517 critic_loss=133815203430.4000 entropy=17.6046 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 40660] reward=-116561272.8 actor_loss=0.3783 critic_loss=143964461738.6667 entropy=17.6179 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 40660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408865.9 mean_steps=15.5
|
|
[Episode 40670] reward=-123314322.7 actor_loss=0.2433 critic_loss=156202864772.1290 entropy=17.6283 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 40680] reward=-109956124.0 actor_loss=0.3672 critic_loss=146987289048.6154 entropy=17.6302 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 40680] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-628527.5 mean_steps=12.7
|
|
[Episode 40690] reward=-119649201.8 actor_loss=0.2627 critic_loss=147383187269.8182 entropy=17.6289 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40700] reward=-123151590.4 actor_loss=0.3258 critic_loss=150447521011.8095 entropy=17.6316 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 40700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-476847.4 mean_steps=15.8
|
|
[Episode 40710] reward=-121648624.1 actor_loss=0.2357 critic_loss=150688576605.0909 entropy=17.6303 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 40720] reward=-124488088.6 actor_loss=0.2870 critic_loss=158133368030.6087 entropy=17.6235 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 40720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515033.7 mean_steps=14.4
|
|
[Episode 40730] reward=-117011128.6 actor_loss=0.2532 critic_loss=141001809139.8095 entropy=17.6226 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 40740] reward=-111950162.4 actor_loss=0.4058 critic_loss=143680070494.3158 entropy=17.6261 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 40740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462513.3 mean_steps=14.6
|
|
[Episode 40750] reward=-115493159.9 actor_loss=0.3930 critic_loss=138908558222.2222 entropy=17.6224 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 40760] reward=-115122481.8 actor_loss=0.2967 critic_loss=143180813365.8947 entropy=17.5986 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 40760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469754.3 mean_steps=15.1
|
|
[Episode 40770] reward=-120723043.8 actor_loss=0.2799 critic_loss=148588875069.7931 entropy=17.5993 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40780] reward=-112940752.5 actor_loss=0.2863 critic_loss=149872403342.2222 entropy=17.6049 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 40780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555956.1 mean_steps=12.7
|
|
[Episode 40790] reward=-121561069.2 actor_loss=0.2637 critic_loss=148107597329.6552 entropy=17.6061 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 40800] reward=-126252725.2 actor_loss=0.2658 critic_loss=160273047005.8667 entropy=17.6097 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 40800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-400368.6 mean_steps=14.7
|
|
[Episode 40810] reward=-114881398.5 actor_loss=0.3534 critic_loss=140822721604.2667 entropy=17.6076 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 40820] reward=-119333368.5 actor_loss=0.2814 critic_loss=145963278080.0000 entropy=17.6146 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 40820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532368.7 mean_steps=13.2
|
|
[Episode 40830] reward=-113441028.1 actor_loss=0.4097 critic_loss=136415214652.2353 entropy=17.6029 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 40840] reward=-119227945.4 actor_loss=0.3168 critic_loss=145693941369.9048 entropy=17.5870 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 40840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-423456.4 mean_steps=16.8
|
|
[Episode 40850] reward=-125554836.1 actor_loss=0.2040 critic_loss=159312243097.6000 entropy=17.5902 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 40860] reward=-120437167.6 actor_loss=0.2465 critic_loss=150570857267.2000 entropy=17.5781 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 40860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-419482.8 mean_steps=16.4
|
|
[Episode 40870] reward=-125636983.3 actor_loss=0.1801 critic_loss=163078228811.2941 entropy=17.5783 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 40880] reward=-122817593.9 actor_loss=0.2175 critic_loss=209114440310.1538 entropy=17.5864 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 40880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465237.7 mean_steps=15.1
|
|
[Episode 40890] reward=-115693230.5 actor_loss=0.3443 critic_loss=148269980135.6190 entropy=17.5876 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 40900] reward=-121533910.8 actor_loss=0.3457 critic_loss=150565479310.2222 entropy=17.5858 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 40900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-516365.3 mean_steps=15.2
|
|
[Episode 40910] reward=-121444870.4 actor_loss=0.2897 critic_loss=150475823331.5555 entropy=17.5946 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 40920] reward=-120371686.1 actor_loss=0.2761 critic_loss=150842714794.6667 entropy=17.5861 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 40920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528801.8 mean_steps=13.2
|
|
[Episode 40930] reward=-119294326.5 actor_loss=0.3298 critic_loss=156681397394.2857 entropy=17.5714 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 40940] reward=-115027388.5 actor_loss=0.3305 critic_loss=140202050796.3077 entropy=17.5715 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 40940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544994.9 mean_steps=14.2
|
|
[Episode 40950] reward=-124019175.6 actor_loss=0.3229 critic_loss=157537020586.6667 entropy=17.5629 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 40960] reward=-114546407.2 actor_loss=0.3033 critic_loss=143189847244.8000 entropy=17.5578 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 40960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-546571.5 mean_steps=13.8
|
|
[Episode 40970] reward=-120252862.1 actor_loss=0.2362 critic_loss=150377664512.0000 entropy=17.5672 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 40980] reward=-119064638.2 actor_loss=0.1896 critic_loss=151888317644.8000 entropy=17.5631 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 40980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-633083.3 mean_steps=12.1
|
|
[Episode 40990] reward=-117001620.6 actor_loss=0.4343 critic_loss=149194799427.3684 entropy=17.5677 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 41000] reward=-117441587.3 actor_loss=0.2620 critic_loss=143135047962.4828 entropy=17.5710 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 41000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553685.5 mean_steps=13.1
|
|
[Episode 41010] reward=-119128795.0 actor_loss=0.3249 critic_loss=148618211913.1429 entropy=17.5678 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 41020] reward=-122002346.7 actor_loss=0.3014 critic_loss=151183854592.0000 entropy=17.5761 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 41020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575932.5 mean_steps=13.7
|
|
[Episode 41030] reward=-119211588.5 actor_loss=0.3415 critic_loss=146637644024.2424 entropy=17.5784 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 41040] reward=-116102693.5 actor_loss=0.3089 critic_loss=146566700311.2727 entropy=17.5852 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 41040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517407.7 mean_steps=14.1
|
|
[Episode 41050] reward=-118496392.7 actor_loss=0.2952 critic_loss=144513443430.4000 entropy=17.5778 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 41060] reward=-112462490.9 actor_loss=0.2858 critic_loss=142760199782.4000 entropy=17.5733 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 41060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416542.5 mean_steps=14.5
|
|
[Episode 41070] reward=-113135994.6 actor_loss=0.3581 critic_loss=140926093642.3226 entropy=17.5738 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 41080] reward=-116122299.7 actor_loss=0.3341 critic_loss=139120696599.2727 entropy=17.5738 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468400.0 mean_steps=14.9
|
|
[Episode 41090] reward=-119353658.6 actor_loss=0.2791 critic_loss=146298807364.2667 entropy=17.5876 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 41100] reward=-112809297.9 actor_loss=0.3515 critic_loss=142570997005.4737 entropy=17.5907 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 41100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409163.4 mean_steps=15.2
|
|
[Episode 41110] reward=-114544285.7 actor_loss=0.2551 critic_loss=138994014071.4667 entropy=17.5912 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 41120] reward=-119873381.4 actor_loss=0.2550 critic_loss=149768608153.6000 entropy=17.5766 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 41120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616520.1 mean_steps=12.5
|
|
[Episode 41130] reward=-116956599.0 actor_loss=0.3347 critic_loss=143958940876.8000 entropy=17.5722 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 41140] reward=-120462467.3 actor_loss=0.3197 critic_loss=152720587776.0000 entropy=17.5777 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 41140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-429910.8 mean_steps=16.2
|
|
[Episode 41150] reward=-116156522.6 actor_loss=0.3214 critic_loss=141967469772.8000 entropy=17.5690 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 41160] reward=-122217946.8 actor_loss=0.2047 critic_loss=176078427750.4000 entropy=17.5697 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Eval 41160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500722.2 mean_steps=14.2
|
|
[Episode 41170] reward=-114819108.8 actor_loss=0.2639 critic_loss=145139343360.0000 entropy=17.5960 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 41180] reward=-113294716.1 actor_loss=0.1871 critic_loss=131342389156.9778 entropy=17.5974 approx_kl=0.0106 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 41180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460339.0 mean_steps=13.7
|
|
[Episode 41190] reward=-117002989.5 actor_loss=0.3164 critic_loss=149984744009.1429 entropy=17.5848 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 41200] reward=-121281607.2 actor_loss=0.3046 critic_loss=149380846933.3333 entropy=17.5811 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 41200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-646331.4 mean_steps=12.3
|
|
[Episode 41210] reward=-118227993.9 actor_loss=0.2974 critic_loss=143765541355.5200 entropy=17.5752 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 41220] reward=-120669075.3 actor_loss=0.3004 critic_loss=151669013845.3333 entropy=17.5760 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 41220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-355368.4 mean_steps=16.1
|
|
[Episode 41230] reward=-110498984.9 actor_loss=0.3409 critic_loss=132714997760.0000 entropy=17.5840 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 41240] reward=-119268419.4 actor_loss=0.1889 critic_loss=144350261248.0000 entropy=17.5842 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 41240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589111.0 mean_steps=13.8
|
|
[Episode 41250] reward=-120420924.0 actor_loss=0.2315 critic_loss=147261438138.1818 entropy=17.5818 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 41260] reward=-117596781.7 actor_loss=0.3834 critic_loss=139658147612.4445 entropy=17.5849 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 41260] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-352352.6 mean_steps=17.1
|
|
[Episode 41270] reward=-115183324.9 actor_loss=0.2116 critic_loss=146332524544.0000 entropy=17.5927 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 41280] reward=-121163116.1 actor_loss=0.4084 critic_loss=152487961486.2222 entropy=17.5873 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 41280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581147.7 mean_steps=13.9
|
|
[Episode 41290] reward=-123637512.1 actor_loss=0.2431 critic_loss=156876070001.7778 entropy=17.5860 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 41300] reward=-117138967.8 actor_loss=0.2862 critic_loss=149784671894.5882 entropy=17.5812 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 41300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544760.4 mean_steps=14.7
|
|
[Episode 41310] reward=-115362492.7 actor_loss=0.2025 critic_loss=141847470942.3158 entropy=17.5706 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 41320] reward=-118991408.6 actor_loss=0.2404 critic_loss=145454211072.0000 entropy=17.5663 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 41320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-311836.9 mean_steps=15.8
|
|
[Episode 41330] reward=-121915124.7 actor_loss=0.3057 critic_loss=152061636152.8889 entropy=17.5778 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 41340] reward=-120814355.4 actor_loss=0.2518 critic_loss=149846343680.0000 entropy=17.5826 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 41340] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-280067.7 mean_steps=18.8
|
|
[Episode 41350] reward=-115479282.0 actor_loss=0.3717 critic_loss=147768790220.8000 entropy=17.5805 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 41360] reward=-121254335.0 actor_loss=0.2373 critic_loss=176617699409.9200 entropy=17.5748 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 41360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622837.6 mean_steps=11.9
|
|
[Episode 41370] reward=-115096312.7 actor_loss=0.3183 critic_loss=145453844322.4615 entropy=17.5825 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 41380] reward=-113082118.2 actor_loss=0.3187 critic_loss=135837038136.8889 entropy=17.5951 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 41380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496444.0 mean_steps=14.2
|
|
[Episode 41390] reward=-123456699.7 actor_loss=0.3183 critic_loss=151447958674.2857 entropy=17.5779 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 41400] reward=-121747833.9 actor_loss=0.3216 critic_loss=156116010449.4546 entropy=17.5928 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549258.8 mean_steps=13.3
|
|
[Episode 41410] reward=-120457028.2 actor_loss=0.2969 critic_loss=146977553703.8222 entropy=17.5820 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 41420] reward=-118392303.1 actor_loss=0.2874 critic_loss=144550736607.1795 entropy=17.5767 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 41420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522799.3 mean_steps=13.7
|
|
[Episode 41430] reward=-119619260.3 actor_loss=0.1723 critic_loss=145271491032.6154 entropy=17.5884 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 41440] reward=-118924921.9 actor_loss=0.2496 critic_loss=143682116539.7333 entropy=17.5865 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 41440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-530543.7 mean_steps=15.2
|
|
[Episode 41450] reward=-113898747.6 actor_loss=0.2688 critic_loss=140688430609.6552 entropy=17.5947 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 41460] reward=-122117569.2 actor_loss=0.2525 critic_loss=152175225162.3226 entropy=17.5895 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 41460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389175.5 mean_steps=16.4
|
|
[Episode 41470] reward=-120757893.8 actor_loss=0.2915 critic_loss=151685854373.1613 entropy=17.5838 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 41480] reward=-119860245.3 actor_loss=0.2358 critic_loss=154597199708.1600 entropy=17.5781 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 41480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423062.3 mean_steps=14.5
|
|
[Episode 41490] reward=-116014925.6 actor_loss=0.3941 critic_loss=134364159453.8667 entropy=17.5818 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 41500] reward=-116701820.8 actor_loss=0.2901 critic_loss=144215641115.6757 entropy=17.5864 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 41500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517342.1 mean_steps=15.2
|
|
[Episode 41510] reward=-120547853.4 actor_loss=0.2424 critic_loss=146931345544.5333 entropy=17.5904 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 41520] reward=-116519038.3 actor_loss=0.2983 critic_loss=144174893832.8276 entropy=17.5940 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 41520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552401.0 mean_steps=14.4
|
|
[Episode 41530] reward=-116081644.8 actor_loss=0.3163 critic_loss=140057425165.4737 entropy=17.5770 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 41540] reward=-114518683.9 actor_loss=0.3339 critic_loss=140650590208.0000 entropy=17.5650 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 41540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-507755.0 mean_steps=15.3
|
|
[Episode 41550] reward=-121281115.8 actor_loss=0.2281 critic_loss=148291823348.8696 entropy=17.5797 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 41560] reward=-119834884.4 actor_loss=0.3108 critic_loss=147737199528.2286 entropy=17.5791 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 41560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571820.5 mean_steps=13.6
|
|
[Episode 41570] reward=-119339218.7 actor_loss=0.3087 critic_loss=143225566003.2000 entropy=17.5881 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 41580] reward=-121166635.2 actor_loss=0.3156 critic_loss=147587727360.0000 entropy=17.6012 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 41580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-612135.0 mean_steps=13.6
|
|
[Episode 41590] reward=-121790531.9 actor_loss=0.2024 critic_loss=145460922525.5385 entropy=17.5891 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 41600] reward=-123734243.6 actor_loss=0.2828 critic_loss=155895095808.0000 entropy=17.6083 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476543.9 mean_steps=14.9
|
|
[Episode 41610] reward=-121509158.3 actor_loss=0.2333 critic_loss=149755853358.5454 entropy=17.6123 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 41620] reward=-120873643.8 actor_loss=0.3960 critic_loss=152082478495.1351 entropy=17.6100 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 41620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-582400.5 mean_steps=12.5
|
|
[Episode 41630] reward=-120310562.5 actor_loss=0.3868 critic_loss=145453889536.0000 entropy=17.6060 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 41640] reward=-118990431.9 actor_loss=0.2342 critic_loss=142165015315.6923 entropy=17.6009 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 41640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518066.2 mean_steps=13.2
|
|
[Episode 41650] reward=-119616192.5 actor_loss=0.2194 critic_loss=146809125487.3044 entropy=17.6086 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 41660] reward=-120678039.6 actor_loss=0.3010 critic_loss=146978460233.1429 entropy=17.6115 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 41660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502942.1 mean_steps=14.0
|
|
[Episode 41670] reward=-119567475.4 actor_loss=0.2938 critic_loss=149984627489.3913 entropy=17.6197 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 41680] reward=-117746882.1 actor_loss=0.2522 critic_loss=152883984022.5882 entropy=17.6115 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 41680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510700.4 mean_steps=13.3
|
|
[Episode 41690] reward=-123748163.0 actor_loss=0.2659 critic_loss=148048284120.6154 entropy=17.6112 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 41700] reward=-114216827.5 actor_loss=0.4122 critic_loss=147086519828.4800 entropy=17.6035 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 41700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-527423.3 mean_steps=14.4
|
|
[Episode 41710] reward=-120350145.5 actor_loss=0.3248 critic_loss=144148645806.0800 entropy=17.6181 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 41720] reward=-117988161.1 actor_loss=0.2910 critic_loss=142894817490.0513 entropy=17.6244 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 41720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476340.0 mean_steps=14.6
|
|
[Episode 41730] reward=-117471883.4 actor_loss=0.4161 critic_loss=146443646789.8182 entropy=17.6320 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 41740] reward=-117891826.6 actor_loss=0.3324 critic_loss=147296352665.6000 entropy=17.6419 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467724.8 mean_steps=13.5
|
|
[Episode 41750] reward=-122547079.4 actor_loss=0.3050 critic_loss=148488899154.5807 entropy=17.6534 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 41760] reward=-121209415.2 actor_loss=0.2805 critic_loss=153677927453.2571 entropy=17.6413 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 41760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-573257.5 mean_steps=13.5
|
|
[Episode 41770] reward=-119957411.5 actor_loss=0.2820 critic_loss=142699878324.1482 entropy=17.6422 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 41780] reward=-121862746.4 actor_loss=0.2291 critic_loss=148345279829.3333 entropy=17.6329 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 41780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493394.8 mean_steps=14.0
|
|
[Episode 41790] reward=-111677013.6 actor_loss=0.2605 critic_loss=136369195331.3684 entropy=17.6253 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 41800] reward=-117909289.1 actor_loss=0.3277 critic_loss=143887405465.6000 entropy=17.6207 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409889.2 mean_steps=16.6
|
|
[Episode 41810] reward=-121844603.4 actor_loss=0.2824 critic_loss=147680393746.9630 entropy=17.6065 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 41820] reward=-115919289.6 actor_loss=0.3647 critic_loss=142303081338.4348 entropy=17.6081 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 41820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492036.9 mean_steps=14.3
|
|
[Episode 41830] reward=-116805928.5 actor_loss=0.3676 critic_loss=143954567623.1111 entropy=17.6082 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 41840] reward=-121929148.1 actor_loss=0.2598 critic_loss=150830888525.5757 entropy=17.5857 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 41840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555971.4 mean_steps=13.6
|
|
[Episode 41850] reward=-115986796.5 actor_loss=0.2095 critic_loss=139471987745.0323 entropy=17.5887 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 41860] reward=-118086056.3 actor_loss=0.3399 critic_loss=142549561461.0286 entropy=17.5922 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 41860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552761.5 mean_steps=13.9
|
|
[Episode 41870] reward=-119353765.2 actor_loss=0.3432 critic_loss=146886186037.8947 entropy=17.5804 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 41880] reward=-121179441.7 actor_loss=0.3138 critic_loss=149462356690.8235 entropy=17.5872 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 41880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428434.4 mean_steps=15.6
|
|
[Episode 41890] reward=-120402563.6 actor_loss=0.3334 critic_loss=148285736487.3846 entropy=17.5856 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 41900] reward=-114525729.1 actor_loss=0.3267 critic_loss=134937967859.8095 entropy=17.5930 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 41900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532043.3 mean_steps=13.6
|
|
[Episode 41910] reward=-116886196.7 actor_loss=0.3325 critic_loss=142955124578.4615 entropy=17.5948 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 41920] reward=-120733237.1 actor_loss=0.3493 critic_loss=148550651318.8571 entropy=17.6115 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 41920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483772.4 mean_steps=14.2
|
|
[Episode 41930] reward=-118318138.6 actor_loss=0.3514 critic_loss=147274307811.5555 entropy=17.6077 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 41940] reward=-120240384.5 actor_loss=0.3367 critic_loss=142183402761.4815 entropy=17.6097 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 41940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-574633.6 mean_steps=12.1
|
|
[Episode 41950] reward=-120543994.9 actor_loss=0.2926 critic_loss=147789571868.4445 entropy=17.6049 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 41960] reward=-118235975.1 actor_loss=0.1776 critic_loss=141783988451.5555 entropy=17.6079 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 41960] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-287805.1 mean_steps=17.9
|
|
[Episode 41970] reward=-121656801.7 actor_loss=0.2834 critic_loss=149366448310.0444 entropy=17.6094 approx_kl=0.0104 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 41980] reward=-120392710.8 actor_loss=0.2113 critic_loss=146545141561.8065 entropy=17.6155 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 41980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528611.4 mean_steps=14.3
|
|
[Episode 41990] reward=-118747326.7 actor_loss=0.2885 critic_loss=145776011709.2174 entropy=17.6075 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 42000] reward=-121153261.5 actor_loss=0.2855 critic_loss=145088823296.0000 entropy=17.6234 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 42000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-578739.6 mean_steps=12.7
|
|
[Episode 42010] reward=-120228715.1 actor_loss=0.3370 critic_loss=149936998692.5714 entropy=17.6223 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 42020] reward=-120610331.8 actor_loss=0.3561 critic_loss=145067140517.6471 entropy=17.6475 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 42020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596252.7 mean_steps=13.6
|
|
[Episode 42030] reward=-118330232.2 actor_loss=0.3285 critic_loss=140369567516.4445 entropy=17.6493 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 42040] reward=-121406355.6 actor_loss=0.3607 critic_loss=145419352291.5555 entropy=17.6365 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Eval 42040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528185.2 mean_steps=14.1
|
|
[Episode 42050] reward=-117495276.1 actor_loss=0.3205 critic_loss=150293373474.1333 entropy=17.6250 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 42060] reward=-123359115.2 actor_loss=0.1725 critic_loss=155917287424.0000 entropy=17.6326 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 42060] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-281470.4 mean_steps=16.6
|
|
[Episode 42070] reward=-120376795.4 actor_loss=0.2627 critic_loss=144777073012.3636 entropy=17.6282 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 42080] reward=-119026677.8 actor_loss=0.2569 critic_loss=144734839808.0000 entropy=17.6237 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 42080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519322.3 mean_steps=14.4
|
|
[Episode 42090] reward=-115866102.1 actor_loss=0.3717 critic_loss=137420922880.0000 entropy=17.6230 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 42100] reward=-118403671.1 actor_loss=0.2284 critic_loss=146909707507.8095 entropy=17.6195 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 42100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525085.6 mean_steps=14.5
|
|
[Episode 42110] reward=-113745651.4 actor_loss=0.3663 critic_loss=133772023222.8571 entropy=17.6189 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 42120] reward=-124479694.0 actor_loss=0.2740 critic_loss=160025223168.0000 entropy=17.6174 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 42120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502466.2 mean_steps=15.1
|
|
[Episode 42130] reward=-121878498.5 actor_loss=0.3176 critic_loss=169487097856.0000 entropy=17.6202 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42140] reward=-122692774.0 actor_loss=0.2902 critic_loss=148510753353.1429 entropy=17.6260 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 42140] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-704243.3 mean_steps=10.8
|
|
[Episode 42150] reward=-122687290.9 actor_loss=0.2777 critic_loss=146999852714.6667 entropy=17.6443 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42160] reward=-123467459.9 actor_loss=0.2887 critic_loss=153297216512.0000 entropy=17.6438 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 42160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-664619.8 mean_steps=12.2
|
|
[Episode 42170] reward=-119010595.2 actor_loss=0.2841 critic_loss=152378318848.0000 entropy=17.6475 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 42180] reward=-116662784.2 actor_loss=0.2023 critic_loss=201763772974.5454 entropy=17.6461 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 42180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-602006.1 mean_steps=13.8
|
|
[Episode 42190] reward=-118497669.2 actor_loss=0.3624 critic_loss=144417411287.5789 entropy=17.6386 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 42200] reward=-118131767.3 actor_loss=0.2756 critic_loss=140445063213.5111 entropy=17.6156 approx_kl=0.0102 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 42200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489807.4 mean_steps=14.2
|
|
[Episode 42210] reward=-120777585.3 actor_loss=0.1924 critic_loss=146668001962.6667 entropy=17.6261 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 42220] reward=-121432608.9 actor_loss=0.3663 critic_loss=149398509041.3714 entropy=17.6252 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 42220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-633105.4 mean_steps=13.3
|
|
[Episode 42230] reward=-115324007.9 actor_loss=0.2138 critic_loss=140152778260.4800 entropy=17.6254 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 42240] reward=-117783728.5 actor_loss=0.3208 critic_loss=149118924117.3333 entropy=17.6299 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 42240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490210.9 mean_steps=14.1
|
|
[Episode 42250] reward=-119411187.4 actor_loss=0.2567 critic_loss=144277436825.6000 entropy=17.6138 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 42260] reward=-121788310.7 actor_loss=0.2101 critic_loss=150095148889.9460 entropy=17.6168 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 42260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567128.0 mean_steps=13.8
|
|
[Episode 42270] reward=-122882379.7 actor_loss=0.2621 critic_loss=150740721232.8421 entropy=17.6187 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 42280] reward=-125936842.5 actor_loss=0.2490 critic_loss=152774909561.9048 entropy=17.6131 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 42280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443132.4 mean_steps=14.5
|
|
[Episode 42290] reward=-120527940.9 actor_loss=0.3394 critic_loss=145221692757.3333 entropy=17.6163 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 42300] reward=-121034536.5 actor_loss=0.3727 critic_loss=144426992058.8108 entropy=17.6073 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Eval 42300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-703778.0 mean_steps=13.8
|
|
[Episode 42310] reward=-117229460.7 actor_loss=0.1703 critic_loss=143225439325.0909 entropy=17.6125 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 42320] reward=-116229133.7 actor_loss=0.3143 critic_loss=139444516337.3714 entropy=17.6178 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 42320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489047.8 mean_steps=15.0
|
|
[Episode 42330] reward=-120692073.3 actor_loss=0.2619 critic_loss=143039442571.6364 entropy=17.6106 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 42340] reward=-118104292.0 actor_loss=0.2697 critic_loss=142713902031.2381 entropy=17.6038 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 42340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-639241.4 mean_steps=13.1
|
|
[Episode 42350] reward=-114931627.6 actor_loss=0.3192 critic_loss=136533690686.5778 entropy=17.6030 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 42360] reward=-127225162.8 actor_loss=0.2044 critic_loss=155774640128.0000 entropy=17.5932 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512581.9 mean_steps=14.1
|
|
[Episode 42370] reward=-113755922.0 actor_loss=0.2865 critic_loss=134725561088.0000 entropy=17.5926 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 42380] reward=-117594612.6 actor_loss=0.3073 critic_loss=144594101799.3846 entropy=17.5846 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 42380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489571.2 mean_steps=14.0
|
|
[Episode 42390] reward=-111497335.7 actor_loss=0.2847 critic_loss=139593141899.6364 entropy=17.5959 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 42400] reward=-118968624.7 actor_loss=0.2520 critic_loss=144237773917.0909 entropy=17.6119 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 42400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-380162.2 mean_steps=15.2
|
|
[Episode 42410] reward=-117631452.5 actor_loss=0.3059 critic_loss=141858351891.6923 entropy=17.6126 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 42420] reward=-114289436.1 actor_loss=0.3182 critic_loss=184980048164.5714 entropy=17.6238 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-589395.6 mean_steps=14.7
|
|
[Episode 42430] reward=-120291589.0 actor_loss=0.2135 critic_loss=142996181937.2308 entropy=17.6119 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 42440] reward=-119672474.2 actor_loss=0.2792 critic_loss=144961772050.9630 entropy=17.6132 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 42440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591484.3 mean_steps=13.6
|
|
[Episode 42450] reward=-113874268.4 actor_loss=0.2962 critic_loss=135545010468.5714 entropy=17.6186 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 42460] reward=-119437208.0 actor_loss=0.3227 critic_loss=141430860276.6222 entropy=17.6114 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 42460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-521562.1 mean_steps=15.2
|
|
[Episode 42470] reward=-122328863.3 actor_loss=0.3043 critic_loss=149379573760.0000 entropy=17.6143 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 42480] reward=-114526381.7 actor_loss=0.3083 critic_loss=154330261346.4615 entropy=17.6097 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 42480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543822.8 mean_steps=13.7
|
|
[Episode 42490] reward=-122380874.8 actor_loss=0.2138 critic_loss=173522904003.7647 entropy=17.6192 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 42500] reward=-113491293.6 actor_loss=0.4019 critic_loss=136549176040.7273 entropy=17.6123 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 42500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521110.4 mean_steps=14.4
|
|
[Episode 42510] reward=-126994019.4 actor_loss=0.2809 critic_loss=1059351731593.8462 entropy=17.6159 approx_kl=0.0041 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 42520] reward=-122742748.5 actor_loss=0.3037 critic_loss=148361509187.3684 entropy=17.6152 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 42520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521997.9 mean_steps=14.3
|
|
[Episode 42530] reward=-119908394.0 actor_loss=0.2477 critic_loss=144834153835.3548 entropy=17.6157 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 42540] reward=-118937117.2 actor_loss=0.3683 critic_loss=146667593142.8571 entropy=17.6073 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 42540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524952.7 mean_steps=13.5
|
|
[Episode 42550] reward=-112070924.2 actor_loss=0.2923 critic_loss=133709504184.3200 entropy=17.6112 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 42560] reward=-118713252.6 actor_loss=0.2241 critic_loss=150452203760.9412 entropy=17.6155 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 42560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430251.4 mean_steps=15.8
|
|
[Episode 42570] reward=-115036121.2 actor_loss=0.3679 critic_loss=148200432786.2857 entropy=17.6253 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 42580] reward=-126485997.3 actor_loss=0.2285 critic_loss=159878300194.1333 entropy=17.6135 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 42580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450642.9 mean_steps=15.1
|
|
[Episode 42590] reward=-118030877.5 actor_loss=0.3208 critic_loss=146740730993.7778 entropy=17.6161 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 42600] reward=-121057372.4 actor_loss=0.2628 critic_loss=148032630925.2414 entropy=17.6104 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 42600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-305095.7 mean_steps=16.9
|
|
[Episode 42610] reward=-121211165.5 actor_loss=0.2808 critic_loss=151820922197.3333 entropy=17.6189 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 42620] reward=-133434455.9 actor_loss=0.2574 critic_loss=723532080742.4000 entropy=17.6280 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-348153.3 mean_steps=16.2
|
|
[Episode 42630] reward=-130741476.7 actor_loss=0.3160 critic_loss=713710245205.3334 entropy=17.6230 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 42640] reward=-119547437.6 actor_loss=0.3279 critic_loss=146228194417.7778 entropy=17.6084 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 42640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464154.4 mean_steps=16.1
|
|
[Episode 42650] reward=-117308221.1 actor_loss=0.3478 critic_loss=136050840189.1555 entropy=17.6187 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 42660] reward=-113731258.8 actor_loss=0.3551 critic_loss=140610558361.6000 entropy=17.6154 approx_kl=0.0056 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 42660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468408.2 mean_steps=15.1
|
|
[Episode 42670] reward=-121580027.1 actor_loss=0.3124 critic_loss=151432900803.0476 entropy=17.6209 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42680] reward=-121530460.7 actor_loss=0.3059 critic_loss=171490594542.9333 entropy=17.6247 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 42680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-473795.9 mean_steps=13.7
|
|
[Episode 42690] reward=-119053012.0 actor_loss=0.3156 critic_loss=148222706192.5161 entropy=17.6279 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 42700] reward=-118099136.2 actor_loss=0.2650 critic_loss=146235504867.5555 entropy=17.6349 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-615828.7 mean_steps=14.0
|
|
[Episode 42710] reward=-124287279.5 actor_loss=0.1461 critic_loss=148270743161.9048 entropy=17.6370 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 42720] reward=-119574244.5 actor_loss=0.3395 critic_loss=163870084437.3333 entropy=17.6415 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 42720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-540989.6 mean_steps=12.8
|
|
[Episode 42730] reward=-117538134.8 actor_loss=0.2068 critic_loss=139095791802.1818 entropy=17.6363 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 42740] reward=-123055471.8 actor_loss=0.2582 critic_loss=267948488411.4286 entropy=17.6329 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 42740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386574.3 mean_steps=16.4
|
|
[Episode 42750] reward=-122334924.9 actor_loss=0.2603 critic_loss=177134074479.3044 entropy=17.6275 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 42760] reward=-124227363.3 actor_loss=0.2798 critic_loss=169737414332.6316 entropy=17.6186 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 42760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-478100.2 mean_steps=14.9
|
|
[Episode 42770] reward=-124637654.5 actor_loss=0.2559 critic_loss=155959180947.9111 entropy=17.6318 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 42780] reward=-119169152.8 actor_loss=0.2827 critic_loss=143203625779.2000 entropy=17.6250 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 42780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576779.7 mean_steps=13.5
|
|
[Episode 42790] reward=-115452476.2 actor_loss=0.3401 critic_loss=141294015692.8000 entropy=17.6137 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 42800] reward=-111447282.9 actor_loss=0.2865 critic_loss=135597847732.7059 entropy=17.6145 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-426602.0 mean_steps=17.2
|
|
[Episode 42810] reward=-119006172.4 actor_loss=0.1402 critic_loss=143802790083.0476 entropy=17.6011 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 42820] reward=-122651643.6 actor_loss=0.2349 critic_loss=159031358805.3333 entropy=17.5928 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 42820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443669.0 mean_steps=14.9
|
|
[Episode 42830] reward=-121076155.8 actor_loss=0.2588 critic_loss=147641475072.0000 entropy=17.6022 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 42840] reward=-126481708.0 actor_loss=0.2799 critic_loss=229022090854.4000 entropy=17.5876 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 42840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461516.7 mean_steps=14.3
|
|
[Episode 42850] reward=-115774080.8 actor_loss=0.4229 critic_loss=136740225987.7647 entropy=17.5868 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 42860] reward=-121640426.6 actor_loss=0.3278 critic_loss=207001944064.0000 entropy=17.5889 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 42860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445100.7 mean_steps=15.2
|
|
[Episode 42870] reward=-116600461.1 actor_loss=0.3151 critic_loss=138566263226.8108 entropy=17.6121 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42880] reward=-117952453.0 actor_loss=0.3497 critic_loss=170466012760.2758 entropy=17.6060 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 42880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-302474.7 mean_steps=17.2
|
|
[Episode 42890] reward=-121145767.1 actor_loss=0.2566 critic_loss=152128880640.0000 entropy=17.6140 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 42900] reward=-121207866.1 actor_loss=0.2480 critic_loss=143410794086.4000 entropy=17.6259 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 42900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502226.5 mean_steps=13.5
|
|
[Episode 42910] reward=-118248117.8 actor_loss=0.3893 critic_loss=141732619150.2222 entropy=17.6319 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 42920] reward=-118842512.0 actor_loss=0.2553 critic_loss=145063178240.0000 entropy=17.6446 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 42920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490446.2 mean_steps=14.4
|
|
[Episode 42930] reward=-118117224.8 actor_loss=0.2410 critic_loss=141138270720.0000 entropy=17.6434 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 42940] reward=-115933813.1 actor_loss=0.3485 critic_loss=139541946368.0000 entropy=17.6577 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 42940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438770.7 mean_steps=15.8
|
|
[Episode 42950] reward=-117634773.5 actor_loss=0.3420 critic_loss=146623072548.5714 entropy=17.6467 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42960] reward=-113488257.4 actor_loss=0.3392 critic_loss=142960767658.6667 entropy=17.6448 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 42960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423818.9 mean_steps=15.6
|
|
[Episode 42970] reward=-124891116.8 actor_loss=0.3041 critic_loss=184670387511.6522 entropy=17.6481 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 42980] reward=-119843500.5 actor_loss=0.3060 critic_loss=141499491864.3810 entropy=17.6573 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 42980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-646639.9 mean_steps=13.7
|
|
[Episode 42990] reward=-125569626.9 actor_loss=0.3084 critic_loss=158341606219.2941 entropy=17.6433 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 43000] reward=-120603848.3 actor_loss=0.2362 critic_loss=154425005585.6552 entropy=17.6315 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 43000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-400069.1 mean_steps=14.5
|
|
[Episode 43010] reward=-121478432.1 actor_loss=0.2652 critic_loss=199633184013.4737 entropy=17.6312 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 43020] reward=-120964469.3 actor_loss=0.3021 critic_loss=145005127149.0370 entropy=17.6278 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 43020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481818.2 mean_steps=14.8
|
|
[Episode 43030] reward=-120983883.9 actor_loss=0.3015 critic_loss=148067526585.3793 entropy=17.6356 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 43040] reward=-119475851.3 actor_loss=0.3233 critic_loss=149241776971.2941 entropy=17.6286 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 43040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580836.7 mean_steps=12.8
|
|
[Episode 43050] reward=-118074256.3 actor_loss=0.2772 critic_loss=153495392471.5789 entropy=17.6256 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 43060] reward=-121962109.7 actor_loss=0.3966 critic_loss=175075896621.1765 entropy=17.6208 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 43060] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-662806.9 mean_steps=11.5
|
|
[Episode 43070] reward=-116170802.4 actor_loss=0.3678 critic_loss=148097159346.0869 entropy=17.6313 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 43080] reward=-118964585.4 actor_loss=0.1956 critic_loss=143012772193.1035 entropy=17.6307 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 43080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592099.2 mean_steps=12.9
|
|
[Episode 43090] reward=-120340593.3 actor_loss=0.3389 critic_loss=184088578667.1628 entropy=17.6384 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 43100] reward=-122906672.4 actor_loss=0.2791 critic_loss=152065019904.0000 entropy=17.6289 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507660.1 mean_steps=14.1
|
|
[Episode 43110] reward=-117251407.0 actor_loss=0.3358 critic_loss=150380266184.3478 entropy=17.6302 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 43120] reward=-123540426.5 actor_loss=0.3099 critic_loss=166539785947.4286 entropy=17.6272 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 43120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-660541.8 mean_steps=12.4
|
|
[Episode 43130] reward=-122936411.9 actor_loss=0.2601 critic_loss=153483831198.4762 entropy=17.6287 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43140] reward=-122575021.4 actor_loss=0.1986 critic_loss=149691168995.5555 entropy=17.6296 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 43140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-428796.2 mean_steps=16.8
|
|
[Episode 43150] reward=-121624618.6 actor_loss=0.4234 critic_loss=154277468023.4667 entropy=17.6358 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1510 front_blocked=0
|
|
[Episode 43160] reward=-115642135.8 actor_loss=0.3970 critic_loss=140640167209.2903 entropy=17.6323 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 43160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488245.3 mean_steps=14.1
|
|
[Episode 43170] reward=-114737072.5 actor_loss=0.2985 critic_loss=138193973365.0286 entropy=17.6267 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43180] reward=-119216638.0 actor_loss=0.3067 critic_loss=145566484126.8965 entropy=17.6285 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 43180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-413658.9 mean_steps=15.3
|
|
[Episode 43190] reward=-122503297.8 actor_loss=0.3525 critic_loss=148736934980.2667 entropy=17.6200 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 43200] reward=-123243766.2 actor_loss=0.3147 critic_loss=154659916762.0741 entropy=17.6220 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 43200] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-303901.3 mean_steps=18.2
|
|
[Episode 43210] reward=-117856367.0 actor_loss=0.3469 critic_loss=139755480678.4000 entropy=17.6194 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 43220] reward=-120760164.1 actor_loss=0.3173 critic_loss=147090630610.4889 entropy=17.6172 approx_kl=0.0077 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 43220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494459.6 mean_steps=15.2
|
|
[Episode 43230] reward=-122379709.6 actor_loss=0.2807 critic_loss=145162300695.2727 entropy=17.6143 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 43240] reward=-121822657.3 actor_loss=0.2586 critic_loss=147625676344.8889 entropy=17.6134 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 43240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-590356.6 mean_steps=12.1
|
|
[Episode 43250] reward=-123583873.8 actor_loss=0.1776 critic_loss=178017105237.3333 entropy=17.6165 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 43260] reward=-120612190.9 actor_loss=0.3602 critic_loss=283900197888.0000 entropy=17.6176 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 43260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-604785.1 mean_steps=11.9
|
|
[Episode 43270] reward=-118330993.2 actor_loss=0.3529 critic_loss=146789259673.6000 entropy=17.6164 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 43280] reward=-115590103.8 actor_loss=0.1989 critic_loss=140327781262.2222 entropy=17.6265 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 43280] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-336143.0 mean_steps=17.2
|
|
[Episode 43290] reward=-115990833.9 actor_loss=0.2894 critic_loss=139841026785.2800 entropy=17.6421 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 43300] reward=-121107193.9 actor_loss=0.2430 critic_loss=152368971145.8462 entropy=17.6508 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 43300] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-274627.6 mean_steps=17.8
|
|
[Episode 43310] reward=-120610023.0 actor_loss=0.2406 critic_loss=149120987376.9412 entropy=17.6489 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 43320] reward=-117430544.8 actor_loss=0.3060 critic_loss=153719474242.0645 entropy=17.6537 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 43320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-577557.1 mean_steps=12.2
|
|
[Episode 43330] reward=-120937095.2 actor_loss=0.2872 critic_loss=146520393794.0645 entropy=17.6545 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43340] reward=-122429763.6 actor_loss=0.2984 critic_loss=149343400755.2000 entropy=17.6590 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 43340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-543445.9 mean_steps=15.5
|
|
[Episode 43350] reward=-115809245.5 actor_loss=0.3917 critic_loss=141379877707.2941 entropy=17.6575 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 43360] reward=-119517500.7 actor_loss=0.2446 critic_loss=145128762338.7429 entropy=17.6625 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581992.0 mean_steps=13.2
|
|
[Episode 43370] reward=-115933214.8 actor_loss=0.2097 critic_loss=140454981795.8400 entropy=17.6583 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 43380] reward=-118331527.3 actor_loss=0.2132 critic_loss=151080012458.6667 entropy=17.6681 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 43380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458582.2 mean_steps=15.3
|
|
[Episode 43390] reward=-121568263.3 actor_loss=0.2708 critic_loss=152398919590.9565 entropy=17.6581 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 43400] reward=-122870970.9 actor_loss=0.1953 critic_loss=148080506103.1724 entropy=17.6712 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 43400] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-654859.2 mean_steps=12.7
|
|
[Episode 43410] reward=-120994920.1 actor_loss=0.2836 critic_loss=147166598485.3333 entropy=17.6752 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 43420] reward=-118298648.3 actor_loss=0.3071 critic_loss=144659214172.1600 entropy=17.6616 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 43420] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-350031.5 mean_steps=18.9
|
|
[Episode 43430] reward=-117099379.7 actor_loss=0.2187 critic_loss=177148783820.8000 entropy=17.6612 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 43440] reward=-119416794.3 actor_loss=0.2063 critic_loss=146040553914.8108 entropy=17.6512 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 43440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386877.5 mean_steps=16.8
|
|
[Episode 43450] reward=-123212200.7 actor_loss=0.2983 critic_loss=223840980992.0000 entropy=17.6609 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 43460] reward=-115723118.6 actor_loss=0.2074 critic_loss=156435265035.3778 entropy=17.6457 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 43460] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-349075.3 mean_steps=17.8
|
|
[Episode 43470] reward=-120979602.3 actor_loss=0.3501 critic_loss=148137245627.7333 entropy=17.6456 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 43480] reward=-120594752.8 actor_loss=0.3651 critic_loss=155121807711.0857 entropy=17.6516 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 43480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468502.6 mean_steps=15.7
|
|
[Episode 43490] reward=-122152092.9 actor_loss=0.2417 critic_loss=153191976374.8571 entropy=17.6560 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 43500] reward=-116717531.7 actor_loss=0.3154 critic_loss=151300335691.8518 entropy=17.6466 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 43500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423056.0 mean_steps=15.8
|
|
[Episode 43510] reward=-115424800.3 actor_loss=0.3051 critic_loss=139419842332.4445 entropy=17.6373 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43520] reward=-124853120.8 actor_loss=0.2543 critic_loss=151947764622.2222 entropy=17.6358 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 43520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-558975.1 mean_steps=14.8
|
|
[Episode 43530] reward=-120458719.1 actor_loss=0.2600 critic_loss=150807203104.8205 entropy=17.6372 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43540] reward=-115978976.6 actor_loss=0.2488 critic_loss=142852336675.3103 entropy=17.6313 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 43540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521171.5 mean_steps=14.7
|
|
[Episode 43550] reward=-118911208.0 actor_loss=0.3114 critic_loss=142368196120.3810 entropy=17.6210 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 43560] reward=-122940790.7 actor_loss=0.2254 critic_loss=150023501047.1724 entropy=17.6233 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-496384.0 mean_steps=16.4
|
|
[Episode 43570] reward=-112437660.9 actor_loss=0.3201 critic_loss=139088870513.7778 entropy=17.6206 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 43580] reward=-112278882.9 actor_loss=0.3136 critic_loss=134308278272.0000 entropy=17.6100 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 43580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-517202.1 mean_steps=12.4
|
|
[Episode 43590] reward=-114402559.7 actor_loss=0.3222 critic_loss=139044389632.0000 entropy=17.6014 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 43600] reward=-116597776.5 actor_loss=0.2698 critic_loss=140825286997.3333 entropy=17.5914 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 43600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501398.1 mean_steps=13.6
|
|
[Episode 43610] reward=-120670380.2 actor_loss=0.2955 critic_loss=194640561341.6296 entropy=17.5930 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 43620] reward=-121020022.6 actor_loss=0.2550 critic_loss=146215293588.6452 entropy=17.5871 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 43620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-600180.9 mean_steps=14.1
|
|
[Episode 43630] reward=-120765757.0 actor_loss=0.2218 critic_loss=151353332349.1555 entropy=17.5835 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 43640] reward=-117473617.4 actor_loss=0.2864 critic_loss=142675247286.0444 entropy=17.5766 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 43640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-503471.7 mean_steps=13.4
|
|
[Episode 43650] reward=-119333047.7 actor_loss=0.1708 critic_loss=142653403858.8235 entropy=17.5863 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 43660] reward=-120004675.1 actor_loss=0.2649 critic_loss=149021104537.6000 entropy=17.5762 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485326.0 mean_steps=14.6
|
|
[Episode 43670] reward=-116390609.8 actor_loss=0.3319 critic_loss=141526230812.4445 entropy=17.5700 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 43680] reward=-118939960.2 actor_loss=0.2952 critic_loss=141263631397.9259 entropy=17.5774 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 43680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449671.3 mean_steps=14.8
|
|
[Episode 43690] reward=-121509805.1 actor_loss=0.2601 critic_loss=149158658404.1739 entropy=17.5684 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 43700] reward=-122718177.8 actor_loss=0.3528 critic_loss=254848307655.1111 entropy=17.5664 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 43700] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365977.0 mean_steps=16.6
|
|
[Episode 43710] reward=-116072785.6 actor_loss=0.3597 critic_loss=144648932352.0000 entropy=17.5732 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 43720] reward=-120408155.6 actor_loss=0.2972 critic_loss=145293196902.4000 entropy=17.5911 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 43720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488358.7 mean_steps=15.6
|
|
[Episode 43730] reward=-120035572.0 actor_loss=0.3004 critic_loss=145313482160.3556 entropy=17.6010 approx_kl=0.0108 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 43740] reward=-117729504.5 actor_loss=0.2420 critic_loss=140895049386.6667 entropy=17.6088 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501127.8 mean_steps=14.4
|
|
[Episode 43750] reward=-119937600.3 actor_loss=0.2643 critic_loss=145168490811.0769 entropy=17.6213 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 43760] reward=-116740889.6 actor_loss=0.3365 critic_loss=189294812501.3333 entropy=17.6245 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 43760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503929.8 mean_steps=15.7
|
|
[Episode 43770] reward=-121744119.4 actor_loss=0.3011 critic_loss=148384702242.5946 entropy=17.6412 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 43780] reward=-122950887.9 actor_loss=0.2249 critic_loss=148416532889.6000 entropy=17.6445 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 43780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-504701.3 mean_steps=16.6
|
|
[Episode 43790] reward=-117609901.5 actor_loss=0.3594 critic_loss=258987997742.5454 entropy=17.6436 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 43800] reward=-122836593.1 actor_loss=0.3229 critic_loss=203367889408.0000 entropy=17.6562 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 43800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530273.5 mean_steps=13.6
|
|
[Episode 43810] reward=-113154040.0 actor_loss=0.3008 critic_loss=139901597882.1818 entropy=17.6393 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 43820] reward=-125624245.6 actor_loss=0.3152 critic_loss=156210519433.8462 entropy=17.6546 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 43820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402531.6 mean_steps=15.7
|
|
[Episode 43830] reward=-121549764.1 actor_loss=0.2938 critic_loss=147207076717.7143 entropy=17.6494 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 43840] reward=-116662660.1 actor_loss=0.2668 critic_loss=148964291806.6087 entropy=17.6504 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 43840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518287.5 mean_steps=13.8
|
|
[Episode 43850] reward=-116216760.7 actor_loss=0.3453 critic_loss=141454980437.3333 entropy=17.6496 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 43860] reward=-113771939.4 actor_loss=0.3090 critic_loss=140742489429.3333 entropy=17.6550 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 43860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449470.3 mean_steps=15.8
|
|
[Episode 43870] reward=-119733277.5 actor_loss=0.2981 critic_loss=307307013266.2857 entropy=17.6483 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 43880] reward=-117312034.3 actor_loss=0.3067 critic_loss=146881987788.8000 entropy=17.6418 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 43880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612472.1 mean_steps=12.2
|
|
[Episode 43890] reward=-243306368.0 actor_loss=0.2088 critic_loss=37417444005205.3359 entropy=17.6405 approx_kl=0.0057 kl_stop=0 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 43900] reward=-118424004.8 actor_loss=0.2679 critic_loss=144556346900.4800 entropy=17.6481 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 43900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641619.4 mean_steps=12.2
|
|
[Episode 43910] reward=-122446877.5 actor_loss=0.3267 critic_loss=177802536082.2857 entropy=17.6415 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 43920] reward=-116593180.5 actor_loss=0.2332 critic_loss=142175867790.2222 entropy=17.6510 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 43920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-407524.1 mean_steps=14.7
|
|
[Episode 43930] reward=-116451356.9 actor_loss=0.2826 critic_loss=140666727330.9091 entropy=17.6383 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 43940] reward=-121490630.1 actor_loss=0.2653 critic_loss=152577372694.2609 entropy=17.6254 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 43940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-663633.9 mean_steps=12.7
|
|
[Episode 43950] reward=-113582704.4 actor_loss=0.3812 critic_loss=138195931287.7037 entropy=17.6307 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 43960] reward=-114207131.9 actor_loss=0.3146 critic_loss=135472351027.2000 entropy=17.6150 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 43960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448107.9 mean_steps=16.0
|
|
[Episode 43970] reward=-117399344.8 actor_loss=0.2693 critic_loss=141013797156.5714 entropy=17.6210 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 43980] reward=-121229926.9 actor_loss=0.2877 critic_loss=147019852458.6667 entropy=17.6094 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 43980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480203.3 mean_steps=14.4
|
|
[Episode 43990] reward=-120784317.1 actor_loss=0.2584 critic_loss=145245987752.2286 entropy=17.6004 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 44000] reward=-115376880.4 actor_loss=0.3031 critic_loss=142736591282.4243 entropy=17.6223 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 44000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-591705.2 mean_steps=11.8
|
|
[Episode 44010] reward=-115852920.2 actor_loss=0.3034 critic_loss=142124228139.8857 entropy=17.6159 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 44020] reward=-119466453.4 actor_loss=0.3837 critic_loss=162578153040.8421 entropy=17.6144 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 44020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412604.5 mean_steps=15.6
|
|
[Episode 44030] reward=-116368119.1 actor_loss=0.4097 critic_loss=149421211648.0000 entropy=17.6086 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 44040] reward=-128579846.8 actor_loss=0.3320 critic_loss=504985795242.6667 entropy=17.6140 approx_kl=0.0050 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 44040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439619.1 mean_steps=15.1
|
|
[Episode 44050] reward=-118807599.3 actor_loss=0.3443 critic_loss=154131420645.0526 entropy=17.6136 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 44060] reward=-124510670.0 actor_loss=0.4192 critic_loss=452873623365.8182 entropy=17.6242 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 44060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555236.5 mean_steps=13.9
|
|
[Episode 44070] reward=-117824918.5 actor_loss=0.2067 critic_loss=141771214028.8000 entropy=17.6267 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 44080] reward=-115441239.3 actor_loss=0.3649 critic_loss=140356021760.0000 entropy=17.6387 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 44080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467870.2 mean_steps=14.3
|
|
[Episode 44090] reward=-120227886.2 actor_loss=0.2711 critic_loss=146269404715.8857 entropy=17.6440 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 44100] reward=-118257867.0 actor_loss=0.2763 critic_loss=144891352157.0909 entropy=17.6422 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 44100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-652338.2 mean_steps=12.3
|
|
[Episode 44110] reward=-116995482.4 actor_loss=0.3396 critic_loss=157722982400.0000 entropy=17.6409 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 44120] reward=-120237181.7 actor_loss=0.3808 critic_loss=148618976460.8000 entropy=17.6408 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 44120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-484358.4 mean_steps=16.4
|
|
[Episode 44130] reward=-119034816.1 actor_loss=0.4185 critic_loss=225524213532.4445 entropy=17.6484 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 44140] reward=-130124164.2 actor_loss=0.3025 critic_loss=559362685415.6190 entropy=17.6624 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 44140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488210.5 mean_steps=14.2
|
|
[Episode 44150] reward=-118164428.6 actor_loss=0.3314 critic_loss=145313627447.6522 entropy=17.6697 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44160] reward=-124527131.9 actor_loss=0.3520 critic_loss=156807007072.7111 entropy=17.6725 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 44160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464213.6 mean_steps=16.1
|
|
[Episode 44170] reward=-120788431.2 actor_loss=0.2419 critic_loss=143709321688.6154 entropy=17.6800 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 44180] reward=-121364349.8 actor_loss=0.3002 critic_loss=142669564222.5778 entropy=17.6791 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 44180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537974.7 mean_steps=13.6
|
|
[Episode 44190] reward=-119773569.1 actor_loss=0.3121 critic_loss=152623046109.8667 entropy=17.6917 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 44200] reward=-124500914.6 actor_loss=0.2413 critic_loss=156374403572.6222 entropy=17.7079 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 44200] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-619769.7 mean_steps=13.1
|
|
[Episode 44210] reward=-118313868.4 actor_loss=0.2355 critic_loss=154350713241.6000 entropy=17.6913 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 44220] reward=-185248404.7 actor_loss=15.9681 critic_loss=15714963982472.5332 entropy=17.6769 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 44220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486767.6 mean_steps=14.3
|
|
[Episode 44230] reward=-117364600.1 actor_loss=0.3389 critic_loss=143379371417.6000 entropy=17.6785 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 44240] reward=-117279661.4 actor_loss=0.2609 critic_loss=158584796891.4286 entropy=17.6682 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 44240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-437600.1 mean_steps=16.4
|
|
[Episode 44250] reward=-116605062.7 actor_loss=0.2846 critic_loss=145290895177.9556 entropy=17.6514 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 44260] reward=-115979576.9 actor_loss=0.2643 critic_loss=140682963990.7556 entropy=17.6469 approx_kl=0.0068 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 44260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504936.8 mean_steps=14.6
|
|
[Episode 44270] reward=-120870467.3 actor_loss=0.3366 critic_loss=145991196672.0000 entropy=17.6398 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 44280] reward=-128753178.3 actor_loss=0.2824 critic_loss=372836718405.8182 entropy=17.6438 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 44280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544175.8 mean_steps=14.8
|
|
[Episode 44290] reward=-116447921.7 actor_loss=0.3419 critic_loss=144223799019.2433 entropy=17.6382 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44300] reward=-105263376.5 actor_loss=0.3486 critic_loss=132101720994.9091 entropy=17.6180 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 44300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462003.9 mean_steps=15.9
|
|
[Episode 44310] reward=-120693165.2 actor_loss=0.1550 critic_loss=174948044153.2632 entropy=17.6195 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Episode 44320] reward=-116497697.3 actor_loss=0.3442 critic_loss=139434241930.9714 entropy=17.6264 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 44320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462029.5 mean_steps=15.2
|
|
[Episode 44330] reward=-111135246.0 actor_loss=0.2368 critic_loss=135321804409.9048 entropy=17.6261 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Episode 44340] reward=-118469680.6 actor_loss=0.1984 critic_loss=167730221371.0769 entropy=17.6355 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 44340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-659695.3 mean_steps=12.4
|
|
[Episode 44350] reward=-114325981.0 actor_loss=0.3181 critic_loss=138436139506.1622 entropy=17.6483 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 44360] reward=-122406402.2 actor_loss=0.2667 critic_loss=145426537585.7778 entropy=17.6541 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 44360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468405.4 mean_steps=14.2
|
|
[Episode 44370] reward=-120254842.9 actor_loss=0.3462 critic_loss=150386423718.9565 entropy=17.6384 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 44380] reward=-117150978.6 actor_loss=0.2466 critic_loss=140427626968.6154 entropy=17.6446 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 44380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367549.2 mean_steps=16.7
|
|
[Episode 44390] reward=-116736958.3 actor_loss=0.2893 critic_loss=148034733444.4138 entropy=17.6408 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 44400] reward=-114066511.0 actor_loss=0.3684 critic_loss=145863747334.2439 entropy=17.6369 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 44400] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-368267.0 mean_steps=18.6
|
|
[Episode 44410] reward=-119548928.0 actor_loss=0.2867 critic_loss=145222863530.6667 entropy=17.6366 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 44420] reward=-117889493.4 actor_loss=0.2769 critic_loss=146316509790.8148 entropy=17.6201 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 44420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503806.5 mean_steps=14.6
|
|
[Episode 44430] reward=-123186173.4 actor_loss=0.1840 critic_loss=150118497441.6842 entropy=17.6167 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 44440] reward=-114982589.6 actor_loss=0.3125 critic_loss=134596574102.0690 entropy=17.6223 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 44440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478327.0 mean_steps=14.2
|
|
[Episode 44450] reward=-117587304.5 actor_loss=0.3633 critic_loss=140942716776.2963 entropy=17.6229 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 44460] reward=-121433672.6 actor_loss=0.2910 critic_loss=175289034752.0000 entropy=17.6213 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 44460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-432379.7 mean_steps=16.9
|
|
[Episode 44470] reward=-121908178.7 actor_loss=0.2785 critic_loss=144947997448.8276 entropy=17.6335 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44480] reward=-119238829.7 actor_loss=0.2569 critic_loss=139750397815.4667 entropy=17.6410 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 44480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484696.4 mean_steps=14.2
|
|
[Episode 44490] reward=-120068430.7 actor_loss=0.3067 critic_loss=144232892482.0645 entropy=17.6424 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 44500] reward=-121554906.5 actor_loss=0.2296 critic_loss=150124053904.6956 entropy=17.6347 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 44500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550300.9 mean_steps=12.8
|
|
[Episode 44510] reward=-117404296.4 actor_loss=0.3680 critic_loss=149440964794.1818 entropy=17.6208 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 44520] reward=-116498914.8 actor_loss=0.2768 critic_loss=140480361946.5366 entropy=17.6351 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 44520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462699.6 mean_steps=14.9
|
|
[Episode 44530] reward=-116215124.8 actor_loss=0.2272 critic_loss=135821922655.0857 entropy=17.6293 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 44540] reward=-118523513.0 actor_loss=0.3188 critic_loss=138742469017.6000 entropy=17.6292 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 44540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-325966.9 mean_steps=16.6
|
|
[Episode 44550] reward=-117140183.0 actor_loss=0.4010 critic_loss=141691252584.2963 entropy=17.6259 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1491 front_blocked=0
|
|
[Episode 44560] reward=-124002814.4 actor_loss=0.2036 critic_loss=149644557555.8095 entropy=17.6287 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 44560] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405997.5 mean_steps=16.1
|
|
[Episode 44570] reward=-122707825.8 actor_loss=0.2947 critic_loss=158313116103.1111 entropy=17.6288 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 44580] reward=-118301227.4 actor_loss=0.3030 critic_loss=153114386432.0000 entropy=17.6344 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 44580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461951.5 mean_steps=15.3
|
|
[Episode 44590] reward=-117313767.7 actor_loss=0.2886 critic_loss=139467404811.3778 entropy=17.6371 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 44600] reward=-123984021.1 actor_loss=0.2231 critic_loss=150643981839.5151 entropy=17.6558 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 44600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-352808.7 mean_steps=16.1
|
|
[Episode 44610] reward=-117776893.1 actor_loss=0.3268 critic_loss=143448343473.2308 entropy=17.6521 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 44620] reward=-128469450.7 actor_loss=0.2370 critic_loss=334287253325.9130 entropy=17.6486 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 44620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-372180.9 mean_steps=14.6
|
|
[Episode 44630] reward=-121347323.2 actor_loss=0.3272 critic_loss=161566441472.0000 entropy=17.6509 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 44640] reward=-116329192.0 actor_loss=0.3818 critic_loss=141383188935.1111 entropy=17.6539 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 44640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-360183.6 mean_steps=16.4
|
|
[Episode 44650] reward=-118031765.4 actor_loss=0.2447 critic_loss=150916830354.2857 entropy=17.6624 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 44660] reward=-123312777.7 actor_loss=0.2453 critic_loss=153530834616.3200 entropy=17.6616 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 44660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612311.3 mean_steps=12.0
|
|
[Episode 44670] reward=-117826698.8 actor_loss=0.2405 critic_loss=145980794880.0000 entropy=17.6650 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 44680] reward=-116067176.9 actor_loss=0.3076 critic_loss=146063966208.0000 entropy=17.6622 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 44680] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-712154.4 mean_steps=11.2
|
|
[Episode 44690] reward=-120454182.1 actor_loss=0.2790 critic_loss=149509959033.2632 entropy=17.6605 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44700] reward=-118160663.0 actor_loss=0.2824 critic_loss=144914869283.3103 entropy=17.6646 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 44700] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-708247.4 mean_steps=11.7
|
|
[Episode 44710] reward=-123913146.2 actor_loss=0.2512 critic_loss=147929877552.7619 entropy=17.6637 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 44720] reward=-115653503.4 actor_loss=0.3021 critic_loss=154703892695.5789 entropy=17.6569 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 44720] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-770832.5 mean_steps=10.2
|
|
[Episode 44730] reward=-122215779.9 actor_loss=0.3152 critic_loss=150236822732.8000 entropy=17.6476 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 44740] reward=-121498966.2 actor_loss=0.2443 critic_loss=156624602978.4615 entropy=17.6375 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 44740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531131.2 mean_steps=14.7
|
|
[Episode 44750] reward=-123777890.3 actor_loss=0.3370 critic_loss=297076283284.2105 entropy=17.6435 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 44760] reward=-112853696.1 actor_loss=0.3087 critic_loss=154108187807.2889 entropy=17.6553 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 44760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505875.6 mean_steps=14.8
|
|
[Episode 44770] reward=-119208275.3 actor_loss=0.3577 critic_loss=147793402265.6000 entropy=17.6811 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 44780] reward=-122500277.6 actor_loss=0.2519 critic_loss=154689921258.0571 entropy=17.6800 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 44780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493916.4 mean_steps=14.9
|
|
[Episode 44790] reward=-117091285.1 actor_loss=0.2511 critic_loss=144760754995.2000 entropy=17.6895 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 44800] reward=-115347176.8 actor_loss=0.3834 critic_loss=143306336938.6667 entropy=17.6950 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 44800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644722.2 mean_steps=12.6
|
|
[Episode 44810] reward=-123985721.4 actor_loss=0.3797 critic_loss=167790004797.4400 entropy=17.6937 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 44820] reward=-117311419.0 actor_loss=0.3418 critic_loss=210335478837.8947 entropy=17.7020 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 44820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497991.4 mean_steps=14.3
|
|
[Episode 44830] reward=-118821943.9 actor_loss=0.2854 critic_loss=151983387922.7317 entropy=17.6966 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 44840] reward=-117654209.2 actor_loss=0.2579 critic_loss=146784238250.6667 entropy=17.6930 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 44840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478824.1 mean_steps=14.2
|
|
[Episode 44850] reward=-115402492.9 actor_loss=0.3568 critic_loss=144809217807.0588 entropy=17.7042 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 44860] reward=-122670645.0 actor_loss=0.2563 critic_loss=195376975052.8000 entropy=17.6941 approx_kl=0.0049 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 44860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559522.4 mean_steps=12.8
|
|
[Episode 44870] reward=-116171150.8 actor_loss=0.3980 critic_loss=141272209115.4286 entropy=17.6920 approx_kl=0.0108 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 44880] reward=-117103089.9 actor_loss=0.2525 critic_loss=142968884766.1176 entropy=17.6924 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 44880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484129.0 mean_steps=14.3
|
|
[Episode 44890] reward=-121137759.5 actor_loss=0.3428 critic_loss=147019879765.3333 entropy=17.6941 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 44900] reward=-122297137.1 actor_loss=0.2022 critic_loss=161274600106.6667 entropy=17.6823 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 44900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450308.9 mean_steps=15.9
|
|
[Episode 44910] reward=-120852467.4 actor_loss=0.1893 critic_loss=146767197962.2400 entropy=17.7003 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 44920] reward=-115338424.1 actor_loss=0.3182 critic_loss=172481419309.5111 entropy=17.7116 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 44920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410287.1 mean_steps=15.6
|
|
[Episode 44930] reward=-117294190.0 actor_loss=0.3257 critic_loss=143572883456.0000 entropy=17.7008 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44940] reward=-121762803.3 actor_loss=0.1988 critic_loss=149155515596.8000 entropy=17.7025 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 44940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596509.4 mean_steps=12.7
|
|
[Episode 44950] reward=-122879920.5 actor_loss=0.1996 critic_loss=145284426301.4400 entropy=17.6942 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 44960] reward=-121144736.8 actor_loss=0.2291 critic_loss=180708774260.3636 entropy=17.6939 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 44960] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-652927.9 mean_steps=11.5
|
|
[Episode 44970] reward=-115162424.1 actor_loss=0.3511 critic_loss=136253595648.0000 entropy=17.6914 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 44980] reward=-121672517.4 actor_loss=0.1982 critic_loss=141615286810.9474 entropy=17.6877 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 44980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-435855.9 mean_steps=17.1
|
|
[Episode 44990] reward=-119582579.8 actor_loss=0.2820 critic_loss=182386186295.3513 entropy=17.6812 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 45000] reward=-119454057.2 actor_loss=0.3217 critic_loss=153015915178.6667 entropy=17.6846 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 45000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470019.9 mean_steps=14.9
|
|
[Episode 45010] reward=-118883559.9 actor_loss=0.2718 critic_loss=139638601570.4615 entropy=17.6956 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 45020] reward=-115776948.9 actor_loss=0.2143 critic_loss=140488113395.8095 entropy=17.6816 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 45020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556484.0 mean_steps=13.8
|
|
[Episode 45030] reward=-123111074.6 actor_loss=0.3225 critic_loss=214205047603.2000 entropy=17.6744 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 45040] reward=-119893511.5 actor_loss=0.2817 critic_loss=143907448077.4737 entropy=17.6567 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 45040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-438086.6 mean_steps=15.8
|
|
[Episode 45050] reward=-113975575.8 actor_loss=0.3330 critic_loss=163789056186.1818 entropy=17.6417 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 45060] reward=-110851539.6 actor_loss=0.3086 critic_loss=133330461416.7273 entropy=17.6403 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 45060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-493242.8 mean_steps=15.3
|
|
[Episode 45070] reward=-118355366.1 actor_loss=0.2649 critic_loss=142273773958.0952 entropy=17.6345 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 45080] reward=-118103878.1 actor_loss=0.1254 critic_loss=143513597269.3333 entropy=17.6359 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Eval 45080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-548362.9 mean_steps=12.8
|
|
[Episode 45090] reward=-115038156.9 actor_loss=0.2776 critic_loss=150421327189.3333 entropy=17.6382 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 45100] reward=-116747785.2 actor_loss=0.4054 critic_loss=145967994197.3333 entropy=17.6343 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 45100] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-276947.7 mean_steps=17.0
|
|
[Episode 45110] reward=-114946096.7 actor_loss=0.2705 critic_loss=140928027232.8649 entropy=17.6320 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 45120] reward=-121889849.4 actor_loss=0.2743 critic_loss=194075374214.7368 entropy=17.6369 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 45120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-344024.7 mean_steps=15.8
|
|
[Episode 45130] reward=-121787188.2 actor_loss=0.1876 critic_loss=165026609015.4667 entropy=17.6434 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45140] reward=-114466466.1 actor_loss=0.3983 critic_loss=139624332449.6842 entropy=17.6576 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 45140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509196.0 mean_steps=14.3
|
|
[Episode 45150] reward=-112268659.4 actor_loss=0.3089 critic_loss=133977512043.7895 entropy=17.6702 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 45160] reward=-121620651.3 actor_loss=0.2780 critic_loss=248300998200.8889 entropy=17.6762 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 45160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452301.6 mean_steps=15.9
|
|
[Episode 45170] reward=-119668310.3 actor_loss=0.2710 critic_loss=148706706090.6667 entropy=17.6791 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 45180] reward=-120876912.7 actor_loss=0.2662 critic_loss=154134038820.5714 entropy=17.6923 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 45180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-486330.7 mean_steps=16.1
|
|
[Episode 45190] reward=-118224410.9 actor_loss=0.1526 critic_loss=141505783552.0000 entropy=17.6828 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 45200] reward=-117165037.8 actor_loss=0.2173 critic_loss=143664805205.3333 entropy=17.6880 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 45200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509597.4 mean_steps=14.2
|
|
[Episode 45210] reward=-121917603.6 actor_loss=0.1123 critic_loss=162027695812.9231 entropy=17.6763 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 45220] reward=-116845062.3 actor_loss=0.3439 critic_loss=151043995298.3415 entropy=17.6583 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 45220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502387.1 mean_steps=14.5
|
|
[Episode 45230] reward=-121800266.8 actor_loss=0.3771 critic_loss=149780764717.5111 entropy=17.6583 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 45240] reward=-116467768.1 actor_loss=0.4001 critic_loss=147256636939.3778 entropy=17.6406 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 45240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421058.4 mean_steps=15.8
|
|
[Episode 45250] reward=-115409843.7 actor_loss=0.3248 critic_loss=141152539270.7368 entropy=17.6398 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 45260] reward=-120321546.5 actor_loss=0.2577 critic_loss=146537113736.5333 entropy=17.6411 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 45260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-561794.5 mean_steps=14.9
|
|
[Episode 45270] reward=-115578302.5 actor_loss=0.2148 critic_loss=144504696376.8889 entropy=17.6356 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 45280] reward=-132986263.1 actor_loss=0.3912 critic_loss=1551996767940.9231 entropy=17.6269 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 45280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-333882.9 mean_steps=16.6
|
|
[Episode 45290] reward=-116595385.3 actor_loss=0.2318 critic_loss=167248205894.6207 entropy=17.6166 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 45300] reward=-117367808.5 actor_loss=0.3100 critic_loss=140787909700.2667 entropy=17.6219 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 45300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-517989.5 mean_steps=15.6
|
|
[Episode 45310] reward=-112757677.2 actor_loss=0.2632 critic_loss=139014304563.2000 entropy=17.6296 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 45320] reward=-1392574758.5 actor_loss=66.2622 critic_loss=1863912175464903.0000 entropy=17.6147 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1003 front_blocked=0
|
|
[Eval 45320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-611016.5 mean_steps=12.9
|
|
[Episode 45330] reward=-124293936.9 actor_loss=0.3912 critic_loss=532028984368.7619 entropy=17.6157 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 45340] reward=-132944830.6 actor_loss=0.2815 critic_loss=1000024665034.1052 entropy=17.6220 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 45340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535898.4 mean_steps=13.3
|
|
[Episode 45350] reward=-250895502.4 actor_loss=54.6066 critic_loss=43145606900394.6641 entropy=17.6162 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 45360] reward=-231629080.4 actor_loss=0.2238 critic_loss=21842932551364.9219 entropy=17.6272 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1126 front_blocked=0
|
|
[Eval 45360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-654534.8 mean_steps=12.8
|
|
[Episode 45370] reward=-114221309.4 actor_loss=0.2837 critic_loss=141224769846.3030 entropy=17.6181 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45380] reward=-121761177.6 actor_loss=0.2544 critic_loss=149993536079.6444 entropy=17.6391 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 45380] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-381672.5 mean_steps=16.2
|
|
[Episode 45390] reward=-113934199.6 actor_loss=0.3369 critic_loss=136593875353.6000 entropy=17.6348 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 45400] reward=-128484452.4 actor_loss=0.2689 critic_loss=161466874774.0690 entropy=17.6510 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 45400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455062.4 mean_steps=15.1
|
|
[Episode 45410] reward=-113793687.9 actor_loss=0.2741 critic_loss=133710812364.8000 entropy=17.6420 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 45420] reward=-119714508.5 actor_loss=0.2459 critic_loss=148269769031.6800 entropy=17.6557 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 45420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523464.3 mean_steps=13.2
|
|
[Episode 45430] reward=-121115419.2 actor_loss=0.2699 critic_loss=146481832846.2222 entropy=17.6706 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 45440] reward=-113125710.7 actor_loss=0.2781 critic_loss=132295273033.1429 entropy=17.6663 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 45440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-549771.0 mean_steps=14.6
|
|
[Episode 45450] reward=-121061561.3 actor_loss=0.3131 critic_loss=151630123593.1429 entropy=17.6774 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 45460] reward=-121470258.3 actor_loss=0.2780 critic_loss=145384559537.2308 entropy=17.6850 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 45460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486112.2 mean_steps=15.2
|
|
[Episode 45470] reward=-121136096.0 actor_loss=0.2286 critic_loss=166007643415.2727 entropy=17.6823 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 45480] reward=-111228121.1 actor_loss=0.2752 critic_loss=138396930867.2000 entropy=17.6897 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 45480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-386132.0 mean_steps=15.1
|
|
[Episode 45490] reward=-115900983.8 actor_loss=0.3552 critic_loss=144937465537.4222 entropy=17.6898 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 45500] reward=-120204570.5 actor_loss=0.2865 critic_loss=152874492723.2000 entropy=17.7005 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 45500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428108.1 mean_steps=15.4
|
|
[Episode 45510] reward=-127175011.2 actor_loss=0.2318 critic_loss=246379341768.6487 entropy=17.6975 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 45520] reward=-233316176.8 actor_loss=1.5483 critic_loss=37948812549597.8672 entropy=17.7064 approx_kl=0.0054 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 45520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432699.8 mean_steps=14.9
|
|
[Episode 45530] reward=-122164416.3 actor_loss=0.1286 critic_loss=155912763255.4667 entropy=17.6863 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 45540] reward=-117746270.8 actor_loss=0.3231 critic_loss=188718323782.6207 entropy=17.6986 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 45540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-513011.6 mean_steps=14.8
|
|
[Episode 45550] reward=-122237281.3 actor_loss=0.3027 critic_loss=358135190869.3333 entropy=17.6855 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45560] reward=-125721078.3 actor_loss=0.3347 critic_loss=154196361216.0000 entropy=17.6882 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 45560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502304.9 mean_steps=14.5
|
|
[Episode 45570] reward=-119775715.9 actor_loss=0.3258 critic_loss=149882798735.3600 entropy=17.6935 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 45580] reward=-116403676.7 actor_loss=0.2406 critic_loss=132650570159.1579 entropy=17.6831 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 45580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584636.5 mean_steps=12.5
|
|
[Episode 45590] reward=-123399171.1 actor_loss=0.3218 critic_loss=425215704225.6842 entropy=17.6899 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 45600] reward=-120396828.7 actor_loss=0.2770 critic_loss=146614597339.4286 entropy=17.6987 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 45600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-348677.2 mean_steps=18.1
|
|
[Episode 45610] reward=-117599381.1 actor_loss=0.2619 critic_loss=155569669734.4000 entropy=17.6951 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 45620] reward=-115043027.0 actor_loss=0.2797 critic_loss=154415993651.2000 entropy=17.7039 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 45620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449856.1 mean_steps=14.7
|
|
[Episode 45630] reward=-3613184834.2 actor_loss=58.3778 critic_loss=10670776577586728.0000 entropy=17.7038 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1061 front_blocked=0
|
|
[Episode 45640] reward=-125606188.4 actor_loss=0.2117 critic_loss=158096115120.3556 entropy=17.7080 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 45640] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-330018.7 mean_steps=17.6
|
|
[Episode 45650] reward=-121024818.5 actor_loss=0.2382 critic_loss=236818935170.8445 entropy=17.7186 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45660] reward=-119433992.9 actor_loss=0.2090 critic_loss=147638846168.1778 entropy=17.7165 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 45660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441171.0 mean_steps=15.6
|
|
[Episode 45670] reward=-117721114.1 actor_loss=0.2907 critic_loss=152119927694.2222 entropy=17.7202 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 45680] reward=-117659373.0 actor_loss=0.2264 critic_loss=143287838786.0645 entropy=17.7225 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 45680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495868.3 mean_steps=15.5
|
|
[Episode 45690] reward=-115952793.5 actor_loss=0.3244 critic_loss=145059176820.3636 entropy=17.7074 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 45700] reward=-121599498.9 actor_loss=0.2337 critic_loss=150705009459.2000 entropy=17.7002 approx_kl=0.0070 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 45700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-605864.2 mean_steps=12.1
|
|
[Episode 45710] reward=-125612191.3 actor_loss=0.2635 critic_loss=392778934681.6000 entropy=17.6882 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 45720] reward=-200957712.1 actor_loss=9.7935 critic_loss=14954820347026.2852 entropy=17.7028 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 45720] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-262617.1 mean_steps=18.3
|
|
[Episode 45730] reward=-121165891.3 actor_loss=0.3773 critic_loss=146895152593.4546 entropy=17.6996 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Episode 45740] reward=-121400716.4 actor_loss=0.2097 critic_loss=149210306059.3778 entropy=17.7125 approx_kl=0.0047 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 45740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431545.5 mean_steps=15.9
|
|
[Episode 45750] reward=-141338500.6 actor_loss=0.4322 critic_loss=2579620233216.0000 entropy=17.7041 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1185 front_blocked=0
|
|
[Episode 45760] reward=-117769158.1 actor_loss=0.2524 critic_loss=141989976687.3044 entropy=17.6983 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 45760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-554562.4 mean_steps=15.5
|
|
[Episode 45770] reward=-118365731.2 actor_loss=0.3080 critic_loss=168326027556.5714 entropy=17.6931 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 45780] reward=-750305925.1 actor_loss=6.5046 critic_loss=1014540746411212.7500 entropy=17.7105 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 45780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417933.2 mean_steps=14.9
|
|
[Episode 45790] reward=-122346102.5 actor_loss=0.2220 critic_loss=172080852659.8919 entropy=17.7184 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45800] reward=-123662522.4 actor_loss=0.2340 critic_loss=173534496209.4546 entropy=17.7162 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 45800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465094.9 mean_steps=14.4
|
|
[Episode 45810] reward=-110784431.5 actor_loss=0.4378 critic_loss=135788948138.6667 entropy=17.7225 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 45820] reward=-115463896.7 actor_loss=0.4178 critic_loss=139030864457.1429 entropy=17.7205 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 45820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532838.7 mean_steps=13.2
|
|
[Episode 45830] reward=-120246443.7 actor_loss=0.3784 critic_loss=169797722468.1739 entropy=17.7283 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 45840] reward=-119185534.6 actor_loss=0.2509 critic_loss=146898570891.6364 entropy=17.7099 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 45840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581525.0 mean_steps=12.4
|
|
[Episode 45850] reward=-124314119.8 actor_loss=0.3141 critic_loss=251153594691.3684 entropy=17.7030 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 45860] reward=-116123324.3 actor_loss=0.4628 critic_loss=186479597613.5111 entropy=17.6975 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 45860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515649.2 mean_steps=13.8
|
|
[Episode 45870] reward=-121140365.3 actor_loss=0.1943 critic_loss=145033512732.4445 entropy=17.6826 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 45880] reward=-118813899.5 actor_loss=0.2555 critic_loss=144811066800.3556 entropy=17.6621 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 45880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-422009.7 mean_steps=14.4
|
|
[Episode 45890] reward=-122732554.0 actor_loss=0.2255 critic_loss=147767547699.2000 entropy=17.6706 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 45900] reward=-120893557.1 actor_loss=0.3164 critic_loss=157895558212.2667 entropy=17.6693 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 45900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-427213.0 mean_steps=16.6
|
|
[Episode 45910] reward=-119671632.2 actor_loss=0.3059 critic_loss=149770925787.4286 entropy=17.6670 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 45920] reward=-116743811.8 actor_loss=0.2757 critic_loss=148429968725.3333 entropy=17.6746 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 45920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-672464.6 mean_steps=13.1
|
|
[Episode 45930] reward=-151552469.6 actor_loss=0.2844 critic_loss=3294070065444.5713 entropy=17.6737 approx_kl=0.0048 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 45940] reward=-114732346.1 actor_loss=0.3987 critic_loss=167996632678.4000 entropy=17.6790 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 45940] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-430607.7 mean_steps=17.7
|
|
[Episode 45950] reward=-119388558.5 actor_loss=0.3157 critic_loss=210006660892.4445 entropy=17.6859 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 45960] reward=-120678356.8 actor_loss=0.2325 critic_loss=148220312689.7778 entropy=17.6949 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 45960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359180.2 mean_steps=16.1
|
|
[Episode 45970] reward=-120246368.9 actor_loss=0.3067 critic_loss=148793812012.5217 entropy=17.6778 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 45980] reward=-123381309.8 actor_loss=0.1700 critic_loss=151125631340.0889 entropy=17.6552 approx_kl=0.0095 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 45980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389726.4 mean_steps=16.4
|
|
[Episode 45990] reward=-113737915.3 actor_loss=0.2680 critic_loss=132935788710.0540 entropy=17.6632 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 46000] reward=-118007891.9 actor_loss=0.3542 critic_loss=145840781448.5333 entropy=17.6646 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 46000] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444672.5 mean_steps=14.7
|
|
[Episode 46010] reward=-126728977.8 actor_loss=0.2268 critic_loss=152393877454.0488 entropy=17.6629 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 46020] reward=-121138435.9 actor_loss=0.2399 critic_loss=149878569642.6667 entropy=17.6745 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 46020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-307283.3 mean_steps=16.7
|
|
[Episode 46030] reward=-115948637.6 actor_loss=0.3241 critic_loss=184611734674.2857 entropy=17.6899 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 46040] reward=-116693965.1 actor_loss=0.3127 critic_loss=146620183893.3333 entropy=17.6923 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 46040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453652.4 mean_steps=15.8
|
|
[Episode 46050] reward=-116632236.1 actor_loss=0.3312 critic_loss=140669759172.9231 entropy=17.6881 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 46060] reward=-117284965.2 actor_loss=0.2810 critic_loss=144646734892.5217 entropy=17.6879 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 46060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530848.2 mean_steps=14.1
|
|
[Episode 46070] reward=-117325438.9 actor_loss=0.2621 critic_loss=144367747072.0000 entropy=17.6797 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 46080] reward=-122460565.5 actor_loss=0.2348 critic_loss=145928621134.7692 entropy=17.6768 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 46080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488025.0 mean_steps=14.0
|
|
[Episode 46090] reward=-120696870.9 actor_loss=0.2924 critic_loss=148421630275.3684 entropy=17.6675 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 46100] reward=-116775324.4 actor_loss=0.2271 critic_loss=137242953500.4444 entropy=17.6608 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 46100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455127.7 mean_steps=14.8
|
|
[Episode 46110] reward=-118063311.9 actor_loss=0.3299 critic_loss=143318735803.7333 entropy=17.6625 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 46120] reward=-114765149.7 actor_loss=0.2692 critic_loss=139631673974.1538 entropy=17.6703 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 46120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626443.3 mean_steps=13.0
|
|
[Episode 46130] reward=-124337312.6 actor_loss=0.2417 critic_loss=155812579866.9474 entropy=17.6789 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 46140] reward=-112801024.6 actor_loss=0.2826 critic_loss=136607171677.0909 entropy=17.6859 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 46140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572226.3 mean_steps=12.4
|
|
[Episode 46150] reward=-118349985.3 actor_loss=0.3590 critic_loss=143019185766.4000 entropy=17.6818 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Episode 46160] reward=-114831570.9 actor_loss=0.3218 critic_loss=147003655964.4445 entropy=17.6906 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 46160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457519.6 mean_steps=14.0
|
|
[Episode 46170] reward=-119949806.3 actor_loss=0.2318 critic_loss=155240833934.2222 entropy=17.7048 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 46180] reward=-120321171.0 actor_loss=0.2898 critic_loss=150926195678.9677 entropy=17.7079 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 46180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501840.3 mean_steps=15.3
|
|
[Episode 46190] reward=-118968652.9 actor_loss=0.3403 critic_loss=166260017995.2941 entropy=17.7106 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 46200] reward=-122413752.2 actor_loss=0.2104 critic_loss=186423057729.8286 entropy=17.7405 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1270 front_blocked=0
|
|
[Eval 46200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614534.0 mean_steps=12.1
|
|
[Episode 46210] reward=-125926705.7 actor_loss=0.3284 critic_loss=1330532743168.0000 entropy=17.7374 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1172 front_blocked=0
|
|
[Episode 46220] reward=-118083737.9 actor_loss=0.2478 critic_loss=173902820034.2069 entropy=17.7416 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 46220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-601523.8 mean_steps=14.1
|
|
[Episode 46230] reward=-118592433.7 actor_loss=0.2690 critic_loss=150047031926.1538 entropy=17.7393 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 46240] reward=-115878043.6 actor_loss=0.4215 critic_loss=141889179506.7586 entropy=17.7492 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 46240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-626488.3 mean_steps=13.2
|
|
[Episode 46250] reward=-123157649.0 actor_loss=0.3063 critic_loss=261234918175.2195 entropy=17.7352 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 46260] reward=-122110722.4 actor_loss=0.2709 critic_loss=226699562188.8000 entropy=17.7390 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 46260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501669.7 mean_steps=14.4
|
|
[Episode 46270] reward=-124455857.0 actor_loss=0.2731 critic_loss=295680039789.7143 entropy=17.7389 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 46280] reward=-119643524.4 actor_loss=0.3068 critic_loss=150482064020.6452 entropy=17.7532 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 46280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490889.6 mean_steps=14.4
|
|
[Episode 46290] reward=-115075552.4 actor_loss=0.2605 critic_loss=143884976640.0000 entropy=17.7423 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 46300] reward=-114758752.8 actor_loss=0.2332 critic_loss=139008911018.6667 entropy=17.7383 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 46300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422077.5 mean_steps=15.9
|
|
[Episode 46310] reward=-113370020.5 actor_loss=0.2127 critic_loss=150580902115.5555 entropy=17.7361 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Episode 46320] reward=-116970187.7 actor_loss=0.2838 critic_loss=143477451239.6190 entropy=17.7126 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 46320] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-657471.2 mean_steps=10.2
|
|
[Episode 46330] reward=-116329108.1 actor_loss=0.2659 critic_loss=139428251966.5778 entropy=17.7132 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 46340] reward=-115813762.7 actor_loss=0.3187 critic_loss=143843964499.3488 entropy=17.7123 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 46340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-618924.8 mean_steps=12.0
|
|
[Episode 46350] reward=-118657690.1 actor_loss=0.3092 critic_loss=166802683904.0000 entropy=17.7282 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 46360] reward=-121271836.1 actor_loss=0.2790 critic_loss=144545619337.8462 entropy=17.7275 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 46360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417445.6 mean_steps=15.9
|
|
[Episode 46370] reward=-120148792.6 actor_loss=0.3081 critic_loss=148188603609.2121 entropy=17.7246 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 46380] reward=-117782565.7 actor_loss=0.3295 critic_loss=143410111409.2308 entropy=17.7090 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 46380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502123.0 mean_steps=14.6
|
|
[Episode 46390] reward=-120781004.6 actor_loss=0.2956 critic_loss=147721979728.4572 entropy=17.6924 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 46400] reward=-120617531.6 actor_loss=0.2224 critic_loss=144933004256.9697 entropy=17.6765 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 46400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435555.1 mean_steps=15.9
|
|
[Episode 46410] reward=-117871283.3 actor_loss=0.2905 critic_loss=141075269950.5778 entropy=17.6715 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 46420] reward=-116007656.2 actor_loss=0.3598 critic_loss=145692380774.4000 entropy=17.6681 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 46420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437989.3 mean_steps=15.0
|
|
[Episode 46430] reward=-116490735.9 actor_loss=0.2608 critic_loss=141785897537.6410 entropy=17.6645 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 46440] reward=-106532089.4 actor_loss=0.3222 critic_loss=133264597647.3600 entropy=17.6718 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 46440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-518851.9 mean_steps=15.2
|
|
[Episode 46450] reward=-118677988.8 actor_loss=0.3585 critic_loss=142273981644.8000 entropy=17.6734 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 46460] reward=-114219627.8 actor_loss=0.2369 critic_loss=139342620113.4546 entropy=17.6752 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 46460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512283.8 mean_steps=13.8
|
|
[Episode 46470] reward=-115342922.8 actor_loss=0.3066 critic_loss=137754158545.4546 entropy=17.6713 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 46480] reward=-120719405.4 actor_loss=0.2063 critic_loss=144248461448.5333 entropy=17.6795 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 46480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570605.2 mean_steps=13.3
|
|
[Episode 46490] reward=-116609328.8 actor_loss=0.3442 critic_loss=155124267546.9474 entropy=17.6865 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 46500] reward=-117454136.3 actor_loss=0.2829 critic_loss=145908269238.0444 entropy=17.6901 approx_kl=0.0074 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 46500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-418294.5 mean_steps=14.4
|
|
[Episode 46510] reward=-118885461.5 actor_loss=0.2722 critic_loss=142928231628.8000 entropy=17.6696 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 46520] reward=-119541442.6 actor_loss=0.2978 critic_loss=145317636388.5714 entropy=17.6841 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 46520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-573219.6 mean_steps=14.8
|
|
[Episode 46530] reward=-120340810.8 actor_loss=0.3166 critic_loss=144865252693.3333 entropy=17.6863 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 46540] reward=-119579572.5 actor_loss=0.3214 critic_loss=140210685033.9310 entropy=17.6787 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 46540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-764528.2 mean_steps=13.8
|
|
[Episode 46550] reward=-119209436.9 actor_loss=0.2922 critic_loss=141339885568.0000 entropy=17.6762 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 46560] reward=-115312073.6 actor_loss=0.2792 critic_loss=140287021875.2000 entropy=17.6774 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 46560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-880501.7 mean_steps=13.2
|
|
[Episode 46570] reward=-112772075.4 actor_loss=0.3062 critic_loss=161310211584.0000 entropy=17.6726 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 46580] reward=-120959786.6 actor_loss=0.3796 critic_loss=262072487116.8000 entropy=17.6721 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 46580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529186.2 mean_steps=13.3
|
|
[Episode 46590] reward=-122140627.2 actor_loss=0.2781 critic_loss=147825120051.2000 entropy=17.6806 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 46600] reward=-119060291.0 actor_loss=0.3034 critic_loss=142024738224.3556 entropy=17.6471 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 46600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437056.5 mean_steps=14.9
|
|
[Episode 46610] reward=-113226585.7 actor_loss=0.2928 critic_loss=134382110674.4889 entropy=17.6527 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 46620] reward=-122819673.3 actor_loss=0.2578 critic_loss=157348583365.4857 entropy=17.6465 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 46620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405048.8 mean_steps=15.9
|
|
[Episode 46630] reward=-121613112.0 actor_loss=0.1795 critic_loss=149723673258.6667 entropy=17.6336 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 46640] reward=-134925785.3 actor_loss=0.2916 critic_loss=1177970665917.2173 entropy=17.6441 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 46640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506477.6 mean_steps=14.6
|
|
[Episode 46650] reward=-113196420.7 actor_loss=0.3198 critic_loss=136247452740.2667 entropy=17.6197 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 46660] reward=-161339497.7 actor_loss=0.5323 critic_loss=6609900987278.2227 entropy=17.6232 approx_kl=0.0064 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 46660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559129.2 mean_steps=13.8
|
|
[Episode 46670] reward=-120437430.4 actor_loss=0.3021 critic_loss=150459780956.1600 entropy=17.6272 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 46680] reward=-118009498.5 actor_loss=0.2338 critic_loss=139045513716.6222 entropy=17.6407 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 46680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359835.3 mean_steps=17.4
|
|
[Episode 46690] reward=-120687074.3 actor_loss=0.2525 critic_loss=147139926546.9630 entropy=17.6381 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 46700] reward=-129608420.1 actor_loss=0.3075 critic_loss=1117917381336.1777 entropy=17.6416 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 46700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502703.4 mean_steps=15.4
|
|
[Episode 46710] reward=-117903105.2 actor_loss=0.3988 critic_loss=147959190016.0000 entropy=17.6340 approx_kl=0.0053 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 46720] reward=-121066851.8 actor_loss=0.2060 critic_loss=145716447368.5333 entropy=17.6359 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 46720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592516.9 mean_steps=12.9
|
|
[Episode 46730] reward=-119493154.9 actor_loss=0.3084 critic_loss=142821961728.0000 entropy=17.6382 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 46740] reward=-115762799.2 actor_loss=0.3087 critic_loss=141537073906.5263 entropy=17.6308 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 46740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576822.4 mean_steps=13.7
|
|
[Episode 46750] reward=-122318380.0 actor_loss=0.3200 critic_loss=165378593223.1111 entropy=17.6437 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 46760] reward=-113538758.1 actor_loss=0.2020 critic_loss=133827226510.2222 entropy=17.6482 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 46760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-556175.6 mean_steps=14.4
|
|
[Episode 46770] reward=-123071993.5 actor_loss=0.3266 critic_loss=147082125743.1579 entropy=17.6588 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 46780] reward=-117699435.0 actor_loss=0.3587 critic_loss=138245663792.7619 entropy=17.6618 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 46780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422139.2 mean_steps=15.6
|
|
[Episode 46790] reward=-113951917.6 actor_loss=0.2628 critic_loss=141753932399.3044 entropy=17.6649 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 46800] reward=-120426507.6 actor_loss=0.2925 critic_loss=156774028950.5882 entropy=17.6673 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 46800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-395307.4 mean_steps=16.4
|
|
[Episode 46810] reward=-115624439.5 actor_loss=0.2586 critic_loss=138109828513.1852 entropy=17.6666 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 46820] reward=-116382444.9 actor_loss=0.3667 critic_loss=135548866078.1176 entropy=17.6623 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 46820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486225.5 mean_steps=14.1
|
|
[Episode 46830] reward=-112825883.2 actor_loss=0.3364 critic_loss=132945095813.5652 entropy=17.6797 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 46840] reward=-113516618.3 actor_loss=0.3187 critic_loss=139950607661.1765 entropy=17.6779 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 46840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518500.3 mean_steps=13.1
|
|
[Episode 46850] reward=-119138688.5 actor_loss=0.2590 critic_loss=142600203299.3103 entropy=17.6900 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 46860] reward=-113973178.6 actor_loss=0.3481 critic_loss=131190658533.0526 entropy=17.6905 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 46860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-605999.9 mean_steps=13.8
|
|
[Episode 46870] reward=-117383323.0 actor_loss=0.2903 critic_loss=142828703861.0286 entropy=17.6892 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 46880] reward=-122918349.2 actor_loss=0.2803 critic_loss=148627897685.3333 entropy=17.7051 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 46880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-565991.9 mean_steps=12.7
|
|
[Episode 46890] reward=-121396468.8 actor_loss=0.3078 critic_loss=152155292861.6296 entropy=17.7078 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 46900] reward=-116779027.7 actor_loss=0.2550 critic_loss=143717695488.0000 entropy=17.6905 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 46900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401371.6 mean_steps=16.4
|
|
[Episode 46910] reward=-121849701.1 actor_loss=0.2311 critic_loss=155832623786.6667 entropy=17.6984 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 46920] reward=-120109849.0 actor_loss=0.2453 critic_loss=143586135244.8000 entropy=17.6858 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 46920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474434.1 mean_steps=14.1
|
|
[Episode 46930] reward=-116973204.6 actor_loss=0.3099 critic_loss=143925625651.2000 entropy=17.6750 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 46940] reward=-116557057.3 actor_loss=0.2319 critic_loss=140052581580.8000 entropy=17.6749 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 46940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504552.2 mean_steps=14.0
|
|
[Episode 46950] reward=-118617953.1 actor_loss=0.3139 critic_loss=146488904704.0000 entropy=17.6781 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 46960] reward=-122727394.8 actor_loss=0.3056 critic_loss=156043758933.3333 entropy=17.6698 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 46960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364804.1 mean_steps=16.2
|
|
[Episode 46970] reward=-123233817.2 actor_loss=0.3494 critic_loss=146538055452.4445 entropy=17.6569 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 46980] reward=-132228444.1 actor_loss=0.3008 critic_loss=1185214024908.8000 entropy=17.6530 approx_kl=0.0046 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 46980] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-273956.1 mean_steps=17.6
|
|
[Episode 46990] reward=-119142898.5 actor_loss=0.3794 critic_loss=190718076245.3333 entropy=17.6542 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 47000] reward=-114784196.7 actor_loss=0.2863 critic_loss=140532563968.0000 entropy=17.6396 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 47000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-581608.6 mean_steps=13.5
|
|
[Episode 47010] reward=-118691083.5 actor_loss=0.1709 critic_loss=143281149633.4222 entropy=17.6469 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 47020] reward=-121395850.0 actor_loss=0.3182 critic_loss=150589892450.4615 entropy=17.6637 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 47020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507932.9 mean_steps=14.0
|
|
[Episode 47030] reward=-123915540.5 actor_loss=0.3171 critic_loss=425962308312.1778 entropy=17.6702 approx_kl=0.0053 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 47040] reward=-116635189.9 actor_loss=0.4173 critic_loss=142396415122.2857 entropy=17.6982 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 47040] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332522.1 mean_steps=17.4
|
|
[Episode 47050] reward=-649310441.4 actor_loss=10.1641 critic_loss=478488406523904.0000 entropy=17.7077 approx_kl=0.0038 kl_stop=1 intervention_rate=0.1152 front_blocked=0
|
|
[Episode 47060] reward=-115382001.2 actor_loss=0.3541 critic_loss=146972745337.9048 entropy=17.6997 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 47060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536059.8 mean_steps=13.4
|
|
[Episode 47070] reward=-119710409.0 actor_loss=0.2619 critic_loss=148275742257.5484 entropy=17.6960 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47080] reward=-119232537.3 actor_loss=0.2753 critic_loss=144549672459.3778 entropy=17.6911 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 47080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407292.5 mean_steps=16.8
|
|
[Episode 47090] reward=-119346622.4 actor_loss=0.2488 critic_loss=141305268435.8621 entropy=17.6903 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 47100] reward=-116402741.3 actor_loss=0.3022 critic_loss=140716438802.7317 entropy=17.7100 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 47100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-563876.3 mean_steps=14.7
|
|
[Episode 47110] reward=-118500051.7 actor_loss=0.2815 critic_loss=146136849019.5862 entropy=17.7029 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 47120] reward=-120710509.5 actor_loss=0.3054 critic_loss=147794942361.6000 entropy=17.7110 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 47120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590636.4 mean_steps=13.6
|
|
[Episode 47130] reward=-113011240.6 actor_loss=0.2857 critic_loss=132506943670.0444 entropy=17.7066 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47140] reward=-116862756.6 actor_loss=0.2295 critic_loss=143852853475.5555 entropy=17.7224 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 47140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-582078.6 mean_steps=13.7
|
|
[Episode 47150] reward=-111602810.5 actor_loss=0.3152 critic_loss=142324135749.8182 entropy=17.7202 approx_kl=0.0047 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 47160] reward=-113194395.3 actor_loss=0.3750 critic_loss=139612808862.8965 entropy=17.7256 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 47160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473202.7 mean_steps=14.9
|
|
[Episode 47170] reward=-121362220.8 actor_loss=0.2644 critic_loss=154594590720.0000 entropy=17.7057 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47180] reward=-124402175.0 actor_loss=0.2946 critic_loss=149055607239.1111 entropy=17.7138 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 47180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450747.4 mean_steps=14.3
|
|
[Episode 47190] reward=-125364382.4 actor_loss=0.1988 critic_loss=154453819392.0000 entropy=17.7153 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 47200] reward=-119682139.7 actor_loss=0.2836 critic_loss=143187566299.4286 entropy=17.6966 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 47200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473354.4 mean_steps=14.1
|
|
[Episode 47210] reward=-119325173.7 actor_loss=0.2480 critic_loss=141482460592.3556 entropy=17.6943 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47220] reward=-120673965.0 actor_loss=0.2653 critic_loss=143819413857.1035 entropy=17.6971 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 47220] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-688816.5 mean_steps=11.6
|
|
[Episode 47230] reward=-118942515.0 actor_loss=0.2260 critic_loss=141329001715.8095 entropy=17.6972 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 47240] reward=-116575278.0 actor_loss=0.2627 critic_loss=143320607129.6000 entropy=17.6850 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 47240] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-623837.3 mean_steps=11.1
|
|
[Episode 47250] reward=-117283174.3 actor_loss=0.2928 critic_loss=149820637184.0000 entropy=17.6751 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 47260] reward=-117487988.5 actor_loss=0.4288 critic_loss=167369763810.7429 entropy=17.6712 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 47260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-464964.5 mean_steps=16.1
|
|
[Episode 47270] reward=-117146478.0 actor_loss=0.3002 critic_loss=138458666469.0526 entropy=17.6533 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 47280] reward=-118509647.4 actor_loss=0.3176 critic_loss=142281052797.1555 entropy=17.6584 approx_kl=0.0065 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 47280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-353166.0 mean_steps=16.5
|
|
[Episode 47290] reward=-119274522.8 actor_loss=0.2776 critic_loss=141854623901.5385 entropy=17.6549 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 47300] reward=-114757020.1 actor_loss=0.2707 critic_loss=135780839915.5200 entropy=17.6673 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 47300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-389937.7 mean_steps=15.3
|
|
[Episode 47310] reward=-112537825.6 actor_loss=0.4206 critic_loss=135536514935.4667 entropy=17.6766 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 47320] reward=-118413640.4 actor_loss=0.3454 critic_loss=148308972202.6667 entropy=17.6650 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 47320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480721.8 mean_steps=15.2
|
|
[Episode 47330] reward=-122370699.6 actor_loss=0.2878 critic_loss=156046413004.8000 entropy=17.6669 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 47340] reward=-124569330.5 actor_loss=0.2833 critic_loss=151324631950.2222 entropy=17.6551 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 47340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417223.3 mean_steps=15.7
|
|
[Episode 47350] reward=-112976801.9 actor_loss=0.4060 critic_loss=138094938112.0000 entropy=17.6616 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 47360] reward=-123861280.7 actor_loss=0.2676 critic_loss=156828387620.5714 entropy=17.6511 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 47360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-528262.9 mean_steps=13.2
|
|
[Episode 47370] reward=-118492428.9 actor_loss=0.2946 critic_loss=147056321008.4849 entropy=17.6557 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 47380] reward=-118358114.8 actor_loss=0.2942 critic_loss=139774494134.8571 entropy=17.6473 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 47380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439337.3 mean_steps=15.1
|
|
[Episode 47390] reward=-125142205.6 actor_loss=0.3485 critic_loss=284604138291.2000 entropy=17.6509 approx_kl=0.0052 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 47400] reward=-121250678.6 actor_loss=0.2269 critic_loss=147332965034.6667 entropy=17.6389 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 47400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523970.2 mean_steps=14.2
|
|
[Episode 47410] reward=-122297734.2 actor_loss=0.2690 critic_loss=150618802468.5714 entropy=17.6315 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47420] reward=-122065130.3 actor_loss=0.3804 critic_loss=146889140906.6667 entropy=17.6399 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1471 front_blocked=0
|
|
[Eval 47420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421243.7 mean_steps=16.7
|
|
[Episode 47430] reward=-115367635.5 actor_loss=0.3074 critic_loss=135569779097.6000 entropy=17.6332 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 47440] reward=-121684460.5 actor_loss=0.2654 critic_loss=142814918246.4000 entropy=17.6406 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 47440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487890.4 mean_steps=14.2
|
|
[Episode 47450] reward=-116144479.8 actor_loss=0.3502 critic_loss=138085997681.7778 entropy=17.6575 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 47460] reward=-114613842.7 actor_loss=0.4465 critic_loss=141668528128.0000 entropy=17.6434 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 47460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580345.2 mean_steps=12.7
|
|
[Episode 47470] reward=-121679227.8 actor_loss=0.3053 critic_loss=145147375748.1290 entropy=17.6481 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 47480] reward=-115432091.8 actor_loss=0.3298 critic_loss=137578434150.4000 entropy=17.6504 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 47480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431505.4 mean_steps=15.8
|
|
[Episode 47490] reward=-122188264.6 actor_loss=0.2371 critic_loss=147452421006.2222 entropy=17.6414 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 47500] reward=-117048213.4 actor_loss=0.1793 critic_loss=144391624614.9565 entropy=17.6449 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 47500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535447.4 mean_steps=14.4
|
|
[Episode 47510] reward=-112102006.2 actor_loss=0.3672 critic_loss=141700845663.2558 entropy=17.6555 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47520] reward=-123640976.5 actor_loss=0.2146 critic_loss=147704838054.9565 entropy=17.6498 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 47520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606244.0 mean_steps=13.2
|
|
[Episode 47530] reward=-123576790.2 actor_loss=0.3602 critic_loss=294356921344.0000 entropy=17.6431 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 47540] reward=-116307342.5 actor_loss=0.3690 critic_loss=139010277717.3333 entropy=17.6380 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 47540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589985.9 mean_steps=13.2
|
|
[Episode 47550] reward=-113851879.6 actor_loss=0.3407 critic_loss=151380171434.6667 entropy=17.6294 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 47560] reward=-117645751.5 actor_loss=0.2019 critic_loss=138972866402.4615 entropy=17.6355 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 47560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505813.8 mean_steps=14.7
|
|
[Episode 47570] reward=-116784659.4 actor_loss=0.3975 critic_loss=139406293219.5555 entropy=17.6378 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 47580] reward=-117064524.4 actor_loss=0.2701 critic_loss=138396536012.8000 entropy=17.6343 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 47580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451178.2 mean_steps=14.6
|
|
[Episode 47590] reward=-118710039.9 actor_loss=0.3015 critic_loss=154459748608.0000 entropy=17.6317 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 47600] reward=-119757576.5 actor_loss=0.3200 critic_loss=145672245248.0000 entropy=17.6313 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 47600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510290.9 mean_steps=14.6
|
|
[Episode 47610] reward=-119615246.1 actor_loss=0.2480 critic_loss=144039366144.0000 entropy=17.6339 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 47620] reward=-112873253.9 actor_loss=0.1795 critic_loss=133307268871.7576 entropy=17.6260 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 47620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476668.3 mean_steps=15.1
|
|
[Episode 47630] reward=-122106868.1 actor_loss=0.2165 critic_loss=147440541696.0000 entropy=17.6228 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 47640] reward=-117910713.2 actor_loss=0.3777 critic_loss=139936141056.0000 entropy=17.6215 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 47640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-535224.1 mean_steps=15.6
|
|
[Episode 47650] reward=-116568839.4 actor_loss=0.2256 critic_loss=140284092043.6364 entropy=17.6353 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 47660] reward=-114648225.1 actor_loss=0.3385 critic_loss=133561998729.8462 entropy=17.6300 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 47660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435274.1 mean_steps=15.8
|
|
[Episode 47670] reward=-116680429.3 actor_loss=0.3214 critic_loss=135248827538.2857 entropy=17.6329 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 47680] reward=-116450284.6 actor_loss=0.4235 critic_loss=139687091260.2353 entropy=17.6221 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 47680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-582895.0 mean_steps=13.9
|
|
[Episode 47690] reward=-119427435.5 actor_loss=0.2103 critic_loss=148693583248.6956 entropy=17.6114 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 47700] reward=-116095445.1 actor_loss=0.3069 critic_loss=137976811341.9131 entropy=17.6046 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 47700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-254005.6 mean_steps=17.9
|
|
[Episode 47710] reward=-119591977.5 actor_loss=0.1432 critic_loss=265008466850.9091 entropy=17.6196 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1191 front_blocked=0
|
|
[Episode 47720] reward=-117057405.3 actor_loss=0.3566 critic_loss=140079224877.5111 entropy=17.6360 approx_kl=0.0083 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 47720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463196.0 mean_steps=15.1
|
|
[Episode 47730] reward=-116214624.0 actor_loss=0.2885 critic_loss=143589105664.0000 entropy=17.6382 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 47740] reward=-116481860.6 actor_loss=0.3684 critic_loss=142200211206.2439 entropy=17.6463 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 47740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566396.5 mean_steps=13.8
|
|
[Episode 47750] reward=-117730567.7 actor_loss=0.3633 critic_loss=139875655115.0345 entropy=17.6567 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 47760] reward=-118668829.8 actor_loss=0.2417 critic_loss=148338550663.5294 entropy=17.6571 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 47760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515504.5 mean_steps=14.2
|
|
[Episode 47770] reward=-119371197.1 actor_loss=0.3423 critic_loss=146438569398.8571 entropy=17.6541 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 47780] reward=-114555774.1 actor_loss=0.2361 critic_loss=145224359025.7778 entropy=17.6589 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 47780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422928.8 mean_steps=16.4
|
|
[Episode 47790] reward=-119128309.9 actor_loss=0.2727 critic_loss=154420102212.2667 entropy=17.6586 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47800] reward=-119628503.6 actor_loss=0.3019 critic_loss=148458037551.4074 entropy=17.6628 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 47800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583295.2 mean_steps=12.8
|
|
[Episode 47810] reward=-116449606.5 actor_loss=0.2635 critic_loss=136792698606.9333 entropy=17.6443 approx_kl=0.0093 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 47820] reward=-120244501.2 actor_loss=0.2165 critic_loss=146855453416.7273 entropy=17.6491 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 47820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-468457.6 mean_steps=13.1
|
|
[Episode 47830] reward=-121319337.4 actor_loss=0.2603 critic_loss=153345898723.5555 entropy=17.6467 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 47840] reward=-119776012.7 actor_loss=0.3329 critic_loss=150026169548.8000 entropy=17.6499 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 47840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592970.4 mean_steps=12.5
|
|
[Episode 47850] reward=-117126338.8 actor_loss=0.3038 critic_loss=153138487777.8824 entropy=17.6527 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 47860] reward=-118250521.0 actor_loss=0.3494 critic_loss=160231651328.0000 entropy=17.6601 approx_kl=0.0059 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 47860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-364415.7 mean_steps=15.7
|
|
[Episode 47870] reward=-117791154.0 actor_loss=0.3500 critic_loss=142583485676.3077 entropy=17.6716 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 47880] reward=-118002671.5 actor_loss=0.3745 critic_loss=162077445168.7619 entropy=17.6746 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 47880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-334551.5 mean_steps=16.5
|
|
[Episode 47890] reward=-113168847.8 actor_loss=0.3336 critic_loss=136767480685.7143 entropy=17.6670 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 47900] reward=-114495079.6 actor_loss=0.3319 critic_loss=140830174916.9231 entropy=17.6614 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 47900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-577744.3 mean_steps=13.7
|
|
[Episode 47910] reward=-117029682.5 actor_loss=0.2726 critic_loss=145375712256.0000 entropy=17.6596 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 47920] reward=-122680912.0 actor_loss=0.2287 critic_loss=144840523385.9048 entropy=17.6615 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 47920] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-222186.4 mean_steps=19.1
|
|
[Episode 47930] reward=-121603929.2 actor_loss=0.2501 critic_loss=143884226755.0476 entropy=17.6535 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 47940] reward=-120432303.2 actor_loss=0.2427 critic_loss=155155074947.8788 entropy=17.6336 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 47940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525689.1 mean_steps=14.3
|
|
[Episode 47950] reward=-111177692.7 actor_loss=0.3926 critic_loss=129482043782.0952 entropy=17.6294 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 47960] reward=-122415861.7 actor_loss=0.2839 critic_loss=148054706555.2592 entropy=17.6375 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 47960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-577135.8 mean_steps=15.8
|
|
[Episode 47970] reward=-122878162.8 actor_loss=0.1922 critic_loss=144240878842.3111 entropy=17.6439 approx_kl=0.0097 kl_stop=0 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 47980] reward=-117208907.3 actor_loss=0.2724 critic_loss=143152573274.8387 entropy=17.6417 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 47980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-703579.8 mean_steps=13.1
|
|
[Episode 47990] reward=-117915409.9 actor_loss=0.2485 critic_loss=143738758212.2667 entropy=17.6498 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 48000] reward=-117077135.2 actor_loss=0.3158 critic_loss=140130853428.9655 entropy=17.6475 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 48000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572417.5 mean_steps=12.9
|
|
[Episode 48010] reward=-120276320.1 actor_loss=0.2560 critic_loss=142330472220.4445 entropy=17.6379 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 48020] reward=-121624785.4 actor_loss=0.2029 critic_loss=144304811804.4445 entropy=17.6283 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 48020] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-618908.6 mean_steps=12.6
|
|
[Episode 48030] reward=-112342335.1 actor_loss=0.3081 critic_loss=139866291313.7778 entropy=17.6384 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 48040] reward=-120556440.3 actor_loss=0.2520 critic_loss=142644411202.3704 entropy=17.6343 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 48040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377422.2 mean_steps=16.2
|
|
[Episode 48050] reward=-121235051.5 actor_loss=0.3157 critic_loss=141763259392.0000 entropy=17.6129 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 48060] reward=-113312354.1 actor_loss=0.3806 critic_loss=134920060507.8974 entropy=17.6122 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 48060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365338.7 mean_steps=16.4
|
|
[Episode 48070] reward=-117152274.8 actor_loss=0.2753 critic_loss=143337818014.4762 entropy=17.6174 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 48080] reward=-113810993.0 actor_loss=0.3419 critic_loss=131879210734.9333 entropy=17.6260 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 48080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528630.7 mean_steps=14.2
|
|
[Episode 48090] reward=-114898073.9 actor_loss=0.3296 critic_loss=136488373134.2222 entropy=17.6207 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 48100] reward=-117472626.6 actor_loss=0.2739 critic_loss=134986559272.4211 entropy=17.6148 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 48100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511578.2 mean_steps=14.2
|
|
[Episode 48110] reward=-159581340.2 actor_loss=0.3499 critic_loss=7519019289170.5811 entropy=17.6166 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1224 front_blocked=0
|
|
[Episode 48120] reward=-115636799.6 actor_loss=0.2369 critic_loss=142672637132.8000 entropy=17.6230 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 48120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474948.2 mean_steps=15.5
|
|
[Episode 48130] reward=-116772650.2 actor_loss=0.2311 critic_loss=173022626520.1778 entropy=17.6255 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 48140] reward=-119098603.4 actor_loss=0.2948 critic_loss=140891067970.7826 entropy=17.6356 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 48140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-376904.8 mean_steps=15.6
|
|
[Episode 48150] reward=-116153365.1 actor_loss=0.2983 critic_loss=138692332014.3448 entropy=17.6294 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 48160] reward=-124458872.3 actor_loss=0.2199 critic_loss=180687544623.4074 entropy=17.6290 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 48160] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-334784.7 mean_steps=16.9
|
|
[Episode 48170] reward=-119171372.5 actor_loss=0.3201 critic_loss=148011838668.8000 entropy=17.6366 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 48180] reward=-119939951.3 actor_loss=0.2850 critic_loss=148448297324.0889 entropy=17.6389 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 48180] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612195.2 mean_steps=11.9
|
|
[Episode 48190] reward=-118463384.9 actor_loss=0.1983 critic_loss=158035658384.4102 entropy=17.6539 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Episode 48200] reward=-114427031.4 actor_loss=0.3050 critic_loss=139973099155.9111 entropy=17.6714 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 48200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548916.9 mean_steps=13.9
|
|
[Episode 48210] reward=-120714543.2 actor_loss=0.3231 critic_loss=150436276585.4118 entropy=17.6761 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 48220] reward=-112669295.7 actor_loss=0.2753 critic_loss=139191398985.1429 entropy=17.6713 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 48220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-404670.3 mean_steps=17.6
|
|
[Episode 48230] reward=-115214522.5 actor_loss=0.3224 critic_loss=140243378878.1714 entropy=17.6735 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 48240] reward=-126801971.6 actor_loss=0.2736 critic_loss=229766802711.2727 entropy=17.6689 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 48240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448455.5 mean_steps=14.9
|
|
[Episode 48250] reward=-118967865.6 actor_loss=0.1580 critic_loss=145002016085.3333 entropy=17.6733 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 48260] reward=-120316221.4 actor_loss=0.2678 critic_loss=153394626560.0000 entropy=17.6723 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 48260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-541618.9 mean_steps=15.3
|
|
[Episode 48270] reward=-117138602.8 actor_loss=0.2883 critic_loss=135920089861.6889 entropy=17.6635 approx_kl=0.0062 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 48280] reward=-115702706.9 actor_loss=0.3106 critic_loss=139341446375.2258 entropy=17.6675 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 48280] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588808.4 mean_steps=13.1
|
|
[Episode 48290] reward=-115533292.7 actor_loss=0.2767 critic_loss=151977283762.0869 entropy=17.6544 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 48300] reward=-119624707.3 actor_loss=0.1878 critic_loss=143003234544.9412 entropy=17.6515 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 48300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-422782.8 mean_steps=16.7
|
|
[Episode 48310] reward=-116893914.7 actor_loss=0.2403 critic_loss=138845933476.9778 entropy=17.6623 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 48320] reward=-121098011.5 actor_loss=0.2875 critic_loss=184984021178.1818 entropy=17.6790 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 48320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-588127.9 mean_steps=13.9
|
|
[Episode 48330] reward=-116301944.9 actor_loss=0.3996 critic_loss=137687975058.2857 entropy=17.6785 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 48340] reward=-113584291.8 actor_loss=0.3382 critic_loss=129213593941.3333 entropy=17.6855 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 48340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506573.4 mean_steps=14.3
|
|
[Episode 48350] reward=-114408265.0 actor_loss=0.2930 critic_loss=139502931148.8000 entropy=17.6859 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 48360] reward=-117240504.4 actor_loss=0.2684 critic_loss=145077077918.4762 entropy=17.6702 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 48360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-564758.6 mean_steps=13.5
|
|
[Episode 48370] reward=-114978025.7 actor_loss=0.3453 critic_loss=148170893401.0435 entropy=17.6695 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 48380] reward=-122215599.0 actor_loss=0.2739 critic_loss=147148339609.6000 entropy=17.6809 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 48380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-491474.4 mean_steps=15.2
|
|
[Episode 48390] reward=-121508704.0 actor_loss=0.2075 critic_loss=226420132912.7619 entropy=17.6752 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Episode 48400] reward=-112977200.8 actor_loss=0.3513 critic_loss=148408718677.3333 entropy=17.6832 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 48400] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-539006.5 mean_steps=12.5
|
|
[Episode 48410] reward=-115924536.6 actor_loss=0.3455 critic_loss=141887150535.1111 entropy=17.6827 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 48420] reward=-122093539.7 actor_loss=0.2445 critic_loss=153995800576.0000 entropy=17.6787 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 48420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-458261.3 mean_steps=16.7
|
|
[Episode 48430] reward=-123470096.1 actor_loss=0.2176 critic_loss=150081414413.4737 entropy=17.6866 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 48440] reward=-121205047.7 actor_loss=0.1643 critic_loss=155432173568.0000 entropy=17.6867 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 48440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500486.3 mean_steps=15.4
|
|
[Episode 48450] reward=-115799961.3 actor_loss=0.3265 critic_loss=146692563907.7647 entropy=17.6938 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 48460] reward=-121777509.3 actor_loss=0.2823 critic_loss=154298891759.4839 entropy=17.6757 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517934.1 mean_steps=14.2
|
|
[Episode 48470] reward=-118486187.0 actor_loss=0.2515 critic_loss=141489190725.8182 entropy=17.6813 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 48480] reward=-114124995.6 actor_loss=0.2868 critic_loss=138583458669.7143 entropy=17.6678 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 48480] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-676230.0 mean_steps=11.4
|
|
[Episode 48490] reward=-117767210.2 actor_loss=0.2681 critic_loss=151228739405.9131 entropy=17.6885 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 48500] reward=-121106387.1 actor_loss=0.3460 critic_loss=150573898137.6000 entropy=17.6726 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 48500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540662.8 mean_steps=13.7
|
|
[Episode 48510] reward=-114864949.4 actor_loss=0.2593 critic_loss=150761786026.6667 entropy=17.6706 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 48520] reward=-117058878.9 actor_loss=0.2754 critic_loss=138328041062.4000 entropy=17.6700 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 48520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542777.2 mean_steps=14.6
|
|
[Episode 48530] reward=-119347304.9 actor_loss=0.3462 critic_loss=142916133228.0889 entropy=17.6613 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 48540] reward=-116810044.3 actor_loss=0.3381 critic_loss=147631015448.3810 entropy=17.6658 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496943.1 mean_steps=14.2
|
|
[Episode 48550] reward=-121403119.1 actor_loss=0.3780 critic_loss=150189090952.5333 entropy=17.6706 approx_kl=0.0076 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Episode 48560] reward=-115861706.0 actor_loss=0.2629 critic_loss=139412634387.6923 entropy=17.6565 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 48560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426851.8 mean_steps=15.9
|
|
[Episode 48570] reward=-119005504.5 actor_loss=0.2976 critic_loss=139830180522.6667 entropy=17.6700 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 48580] reward=-118521455.0 actor_loss=0.3310 critic_loss=160593647206.4000 entropy=17.6935 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 48580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411128.2 mean_steps=15.2
|
|
[Episode 48590] reward=-113330240.3 actor_loss=0.3597 critic_loss=138365445356.3077 entropy=17.6992 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 48600] reward=-120607224.0 actor_loss=0.3059 critic_loss=146860771901.4400 entropy=17.7011 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 48600] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571329.2 mean_steps=12.8
|
|
[Episode 48610] reward=-117767287.4 actor_loss=0.3507 critic_loss=145928706650.3529 entropy=17.6967 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 48620] reward=-116540609.3 actor_loss=0.3451 critic_loss=141111849779.2000 entropy=17.6997 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 48620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484433.3 mean_steps=14.3
|
|
[Episode 48630] reward=-123572346.0 actor_loss=0.2130 critic_loss=151117087500.1905 entropy=17.6896 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 48640] reward=-117190271.8 actor_loss=0.3469 critic_loss=140722215087.5428 entropy=17.7035 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 48640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579999.4 mean_steps=12.9
|
|
[Episode 48650] reward=-118838887.5 actor_loss=0.3171 critic_loss=144721451643.5862 entropy=17.7174 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 48660] reward=-121287076.2 actor_loss=0.3121 critic_loss=153269273486.2222 entropy=17.7087 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 48660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-679467.5 mean_steps=12.3
|
|
[Episode 48670] reward=-115733323.1 actor_loss=0.3656 critic_loss=137826756380.4445 entropy=17.7153 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 48680] reward=-119716694.9 actor_loss=0.3282 critic_loss=142368853125.5652 entropy=17.7010 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 48680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459635.9 mean_steps=15.2
|
|
[Episode 48690] reward=-118674472.2 actor_loss=0.2487 critic_loss=140453940955.4286 entropy=17.6955 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 48700] reward=-117134086.0 actor_loss=0.3291 critic_loss=134787587218.2857 entropy=17.7108 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 48700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-454057.1 mean_steps=14.1
|
|
[Episode 48710] reward=-123186042.3 actor_loss=0.3124 critic_loss=146559301495.4667 entropy=17.7170 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 48720] reward=-118351766.2 actor_loss=0.2973 critic_loss=141913242935.6522 entropy=17.6913 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-596874.5 mean_steps=13.8
|
|
[Episode 48730] reward=-119373712.6 actor_loss=0.2696 critic_loss=144855978985.2444 entropy=17.6905 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 48740] reward=-118367657.3 actor_loss=0.2781 critic_loss=138346980649.2903 entropy=17.6983 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-638096.7 mean_steps=12.2
|
|
[Episode 48750] reward=-122238278.1 actor_loss=0.2758 critic_loss=144221886600.5333 entropy=17.6958 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 48760] reward=-119093872.6 actor_loss=0.3213 critic_loss=143327082496.0000 entropy=17.6926 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452860.3 mean_steps=13.7
|
|
[Episode 48770] reward=-121730526.4 actor_loss=0.2252 critic_loss=153097794653.0909 entropy=17.6895 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 48780] reward=-110666079.6 actor_loss=0.3227 critic_loss=132464882408.7273 entropy=17.6868 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 48780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557697.6 mean_steps=13.8
|
|
[Episode 48790] reward=-120514620.2 actor_loss=0.1842 critic_loss=144435607259.4286 entropy=17.6755 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 48800] reward=-115519832.0 actor_loss=0.3617 critic_loss=140335077729.1035 entropy=17.6834 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 48800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-629541.0 mean_steps=12.2
|
|
[Episode 48810] reward=-117864081.4 actor_loss=0.3055 critic_loss=142858333440.0000 entropy=17.6797 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 48820] reward=-113057842.6 actor_loss=0.3076 critic_loss=133161475731.9111 entropy=17.7027 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 48820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-451513.0 mean_steps=13.9
|
|
[Episode 48830] reward=-118416054.7 actor_loss=0.3080 critic_loss=147084788076.0889 entropy=17.7224 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 48840] reward=-118469911.0 actor_loss=0.3178 critic_loss=139596684947.9111 entropy=17.7135 approx_kl=0.0075 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 48840] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-708085.9 mean_steps=10.7
|
|
[Episode 48850] reward=-118708095.0 actor_loss=0.2817 critic_loss=143836180164.9231 entropy=17.7148 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 48860] reward=-119359695.5 actor_loss=0.3210 critic_loss=144592438067.2000 entropy=17.7191 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 48860] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-572572.6 mean_steps=11.8
|
|
[Episode 48870] reward=-119713868.6 actor_loss=0.2509 critic_loss=150789337239.7037 entropy=17.7194 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 48880] reward=-117840003.9 actor_loss=0.2632 critic_loss=145589460601.9048 entropy=17.7189 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 48880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-462403.3 mean_steps=15.2
|
|
[Episode 48890] reward=-116956630.4 actor_loss=0.3267 critic_loss=137813190535.5294 entropy=17.7181 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 48900] reward=-119861005.5 actor_loss=0.3087 critic_loss=149656591473.7778 entropy=17.7096 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 48900] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-713796.6 mean_steps=10.7
|
|
[Episode 48910] reward=-122997930.7 actor_loss=0.2569 critic_loss=148343312068.9231 entropy=17.7015 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 48920] reward=-118017506.4 actor_loss=0.3277 critic_loss=138599484211.2000 entropy=17.7046 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 48920] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-392533.6 mean_steps=17.6
|
|
[Episode 48930] reward=-119746246.9 actor_loss=0.3291 critic_loss=142132654633.5135 entropy=17.7030 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 48940] reward=-119655884.4 actor_loss=0.3649 critic_loss=148934462841.2632 entropy=17.7048 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Eval 48940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-640816.1 mean_steps=12.2
|
|
[Episode 48950] reward=-118133697.8 actor_loss=0.2592 critic_loss=136480896341.3333 entropy=17.6911 approx_kl=0.0084 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 48960] reward=-118662154.8 actor_loss=0.2200 critic_loss=137275174291.3939 entropy=17.6951 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 48960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-585690.4 mean_steps=12.9
|
|
[Episode 48970] reward=-117598233.7 actor_loss=0.3184 critic_loss=146355858272.7111 entropy=17.6787 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 48980] reward=-120402982.0 actor_loss=0.3000 critic_loss=147311528779.2941 entropy=17.6571 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 48980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509907.2 mean_steps=14.1
|
|
[Episode 48990] reward=-122747348.4 actor_loss=0.1883 critic_loss=147432661447.1111 entropy=17.6491 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 49000] reward=-120302211.8 actor_loss=0.2506 critic_loss=147562894373.9259 entropy=17.6552 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 49000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-620748.4 mean_steps=12.9
|
|
[Episode 49010] reward=-118769594.5 actor_loss=0.3700 critic_loss=143487451477.3333 entropy=17.6382 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 49020] reward=-122212802.7 actor_loss=0.2781 critic_loss=150164139659.6364 entropy=17.6183 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 49020] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-681897.6 mean_steps=11.7
|
|
[Episode 49030] reward=-114518613.7 actor_loss=0.4399 critic_loss=135938186386.2857 entropy=17.6084 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1484 front_blocked=0
|
|
[Episode 49040] reward=-121755345.4 actor_loss=0.2512 critic_loss=143234132286.5778 entropy=17.6078 approx_kl=0.0091 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 49040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489514.3 mean_steps=14.9
|
|
[Episode 49050] reward=-117850500.5 actor_loss=0.2710 critic_loss=137800831249.0667 entropy=17.6035 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 49060] reward=-123874403.3 actor_loss=0.2410 critic_loss=148061403363.5555 entropy=17.6104 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 49060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-483405.1 mean_steps=13.5
|
|
[Episode 49070] reward=-121363814.3 actor_loss=0.3371 critic_loss=144672895522.1333 entropy=17.6159 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 49080] reward=-122141348.8 actor_loss=0.2568 critic_loss=146550560358.4000 entropy=17.6224 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 49080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-601125.2 mean_steps=13.7
|
|
[Episode 49090] reward=-114702001.7 actor_loss=0.3693 critic_loss=137266682733.7143 entropy=17.6302 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 49100] reward=-122846741.8 actor_loss=0.2157 critic_loss=147035175755.2941 entropy=17.6202 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 49100] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-329357.0 mean_steps=18.0
|
|
[Episode 49110] reward=-119421924.7 actor_loss=0.3854 critic_loss=153342627840.0000 entropy=17.6313 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 49120] reward=-118442231.4 actor_loss=0.2406 critic_loss=138947766347.8518 entropy=17.6286 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 49120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-537512.1 mean_steps=12.8
|
|
[Episode 49130] reward=-125292213.1 actor_loss=0.2996 critic_loss=154129762596.5714 entropy=17.6085 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 49140] reward=-118972900.9 actor_loss=0.2338 critic_loss=138945594368.0000 entropy=17.6030 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 49140] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-648029.0 mean_steps=11.4
|
|
[Episode 49150] reward=-121454618.4 actor_loss=0.2377 critic_loss=143556955574.8571 entropy=17.6049 approx_kl=0.0055 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 49160] reward=-120125277.1 actor_loss=0.2845 critic_loss=141859168069.8182 entropy=17.6003 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 49160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371508.0 mean_steps=16.1
|
|
[Episode 49170] reward=-119455723.5 actor_loss=0.2168 critic_loss=139120302200.4706 entropy=17.6046 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 49180] reward=-115007815.4 actor_loss=0.3182 critic_loss=133852907640.4706 entropy=17.5972 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 49180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-659535.0 mean_steps=13.1
|
|
[Episode 49190] reward=-121183650.7 actor_loss=0.2060 critic_loss=148767107451.2592 entropy=17.6004 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 49200] reward=-113841891.4 actor_loss=0.3726 critic_loss=133162804645.6471 entropy=17.6051 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 49200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-514235.9 mean_steps=15.4
|
|
[Episode 49210] reward=-121674722.1 actor_loss=0.2970 critic_loss=144270580874.3784 entropy=17.6106 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 49220] reward=-117374518.1 actor_loss=0.3583 critic_loss=135310302841.9048 entropy=17.6108 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 49220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-598017.6 mean_steps=14.8
|
|
[Episode 49230] reward=-122784275.3 actor_loss=0.3554 critic_loss=148375304192.0000 entropy=17.5895 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 49240] reward=-117354915.0 actor_loss=0.3961 critic_loss=138577216580.2667 entropy=17.5953 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 49240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424806.4 mean_steps=15.8
|
|
[Episode 49250] reward=-119694265.6 actor_loss=0.3088 critic_loss=140871265315.3103 entropy=17.6039 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 49260] reward=-117959553.6 actor_loss=0.3135 critic_loss=133562788285.2174 entropy=17.5988 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 49260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484867.2 mean_steps=15.3
|
|
[Episode 49270] reward=-126532386.2 actor_loss=0.3131 critic_loss=152743166680.1778 entropy=17.5893 approx_kl=0.0081 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 49280] reward=-115492351.2 actor_loss=0.4118 critic_loss=138546885099.5200 entropy=17.5749 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 49280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490904.9 mean_steps=14.2
|
|
[Episode 49290] reward=-120127272.3 actor_loss=0.3229 critic_loss=145791366485.3333 entropy=17.5800 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 49300] reward=-122875482.1 actor_loss=0.3415 critic_loss=148923246324.8696 entropy=17.5852 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 49300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501009.5 mean_steps=13.9
|
|
[Episode 49310] reward=-117769516.1 actor_loss=0.2921 critic_loss=140375044505.6000 entropy=17.5858 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 49320] reward=-118518342.2 actor_loss=0.1450 critic_loss=137636363702.8571 entropy=17.5994 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 49320] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-314119.4 mean_steps=16.9
|
|
[Episode 49330] reward=-115380854.3 actor_loss=0.2450 critic_loss=130935198326.1538 entropy=17.6029 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 49340] reward=-121775756.4 actor_loss=0.1793 critic_loss=145467285147.8261 entropy=17.5963 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 49340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-523917.7 mean_steps=15.7
|
|
[Episode 49350] reward=-122239288.3 actor_loss=0.3199 critic_loss=160152841122.9091 entropy=17.5872 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 49360] reward=-121364510.4 actor_loss=0.3197 critic_loss=144972286853.1200 entropy=17.5837 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 49360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480478.0 mean_steps=14.1
|
|
[Episode 49370] reward=-113228481.0 actor_loss=0.2562 critic_loss=136577124165.8182 entropy=17.5906 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 49380] reward=-115040153.2 actor_loss=0.2952 critic_loss=146030572573.2571 entropy=17.5955 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 49380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-534229.6 mean_steps=12.1
|
|
[Episode 49390] reward=-120222472.2 actor_loss=0.1965 critic_loss=141033312256.0000 entropy=17.5975 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 49400] reward=-115497462.1 actor_loss=0.3893 critic_loss=130543794176.0000 entropy=17.6201 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 49400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478608.3 mean_steps=14.0
|
|
[Episode 49410] reward=-121129905.4 actor_loss=0.2056 critic_loss=140319002272.9143 entropy=17.6278 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 49420] reward=-116650333.3 actor_loss=0.1939 critic_loss=134700694771.8095 entropy=17.6310 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 49420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-548068.8 mean_steps=14.4
|
|
[Episode 49430] reward=-121442970.6 actor_loss=0.2858 critic_loss=143735494580.1482 entropy=17.6231 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 49440] reward=-143886337.6 actor_loss=0.2736 critic_loss=2599326746760.5332 entropy=17.6243 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 49440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366089.4 mean_steps=16.1
|
|
[Episode 49450] reward=-117491353.3 actor_loss=0.2829 critic_loss=138843902582.1538 entropy=17.6384 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 49460] reward=-158315314.6 actor_loss=0.2528 critic_loss=3835510709101.7144 entropy=17.6452 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 49460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-492101.1 mean_steps=15.4
|
|
[Episode 49470] reward=-121134919.4 actor_loss=0.2195 critic_loss=146969969459.2000 entropy=17.6435 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 49480] reward=-117572691.1 actor_loss=0.2505 critic_loss=139345980074.6667 entropy=17.6506 approx_kl=0.0088 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 49480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-693532.4 mean_steps=12.5
|
|
[Episode 49490] reward=-117541457.8 actor_loss=0.3266 critic_loss=147994209757.8667 entropy=17.6473 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 49500] reward=-118738759.6 actor_loss=0.2922 critic_loss=151051901701.6889 entropy=17.6484 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 49500] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-626382.4 mean_steps=11.0
|
|
[Episode 49510] reward=-117938809.4 actor_loss=0.3077 critic_loss=139909469525.3333 entropy=17.6632 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 49520] reward=-122697437.8 actor_loss=0.2820 critic_loss=146375754069.3333 entropy=17.6421 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 49520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440909.9 mean_steps=15.4
|
|
[Episode 49530] reward=-115886803.0 actor_loss=0.4181 critic_loss=140147727473.7778 entropy=17.6308 approx_kl=0.0061 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Episode 49540] reward=-122041379.0 actor_loss=0.3209 critic_loss=151361408099.0968 entropy=17.6291 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 49540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-508590.6 mean_steps=12.7
|
|
[Episode 49550] reward=-118892131.6 actor_loss=0.2308 critic_loss=164438513931.1304 entropy=17.6206 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 49560] reward=-115688357.5 actor_loss=0.2701 critic_loss=136273589187.7647 entropy=17.6244 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 49560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451216.8 mean_steps=15.5
|
|
[Episode 49570] reward=-121538377.5 actor_loss=0.2685 critic_loss=163573280085.3333 entropy=17.6298 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 49580] reward=-122543508.3 actor_loss=0.2115 critic_loss=145884417774.9333 entropy=17.6138 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 49580] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575707.2 mean_steps=12.2
|
|
[Episode 49590] reward=-120801606.1 actor_loss=0.2062 critic_loss=170500763863.5789 entropy=17.6191 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 49600] reward=-120787535.2 actor_loss=0.3802 critic_loss=142505692910.9333 entropy=17.6120 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 49600] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-312036.3 mean_steps=17.1
|
|
[Episode 49610] reward=-119801753.4 actor_loss=0.4057 critic_loss=143378199815.3143 entropy=17.6183 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 49620] reward=-123959957.2 actor_loss=0.2192 critic_loss=148558635752.7273 entropy=17.6087 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 49620] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-685735.1 mean_steps=11.4
|
|
[Episode 49630] reward=-120280824.1 actor_loss=0.3519 critic_loss=147046249995.3778 entropy=17.6155 approx_kl=0.0079 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 49640] reward=-121222003.3 actor_loss=0.2572 critic_loss=144702202985.9310 entropy=17.6187 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 49640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476059.2 mean_steps=13.6
|
|
[Episode 49650] reward=-116114822.1 actor_loss=0.3173 critic_loss=140454136854.7556 entropy=17.6120 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 49660] reward=-122289724.4 actor_loss=0.2357 critic_loss=194706046156.8000 entropy=17.6137 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 49660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484093.1 mean_steps=14.8
|
|
[Episode 49670] reward=-205447136.6 actor_loss=1.4725 critic_loss=19355471341158.3984 entropy=17.6233 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 49680] reward=-119812297.0 actor_loss=0.2193 critic_loss=133809211741.6585 entropy=17.6400 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 49680] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-404221.8 mean_steps=16.6
|
|
[Episode 49690] reward=-112500514.5 actor_loss=0.2982 critic_loss=153840653653.3333 entropy=17.6434 approx_kl=0.0057 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 49700] reward=-116915208.6 actor_loss=0.3587 critic_loss=139469234540.0889 entropy=17.6414 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 49700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540972.0 mean_steps=13.2
|
|
[Episode 49710] reward=-117593372.0 actor_loss=0.4657 critic_loss=145425722026.6667 entropy=17.6404 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1517 front_blocked=0
|
|
[Episode 49720] reward=-117456621.3 actor_loss=0.3104 critic_loss=138330947349.9429 entropy=17.6636 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 49720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-505812.3 mean_steps=14.3
|
|
[Episode 49730] reward=-121888493.0 actor_loss=0.2159 critic_loss=148660172920.4706 entropy=17.6704 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 49740] reward=-119093597.6 actor_loss=0.3354 critic_loss=166685091157.3333 entropy=17.6712 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 49740] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-388402.5 mean_steps=17.1
|
|
[Episode 49750] reward=-118349144.1 actor_loss=0.2493 critic_loss=142397320760.8889 entropy=17.6824 approx_kl=0.0056 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 49760] reward=-114708458.5 actor_loss=0.3403 critic_loss=141514817740.8000 entropy=17.6833 approx_kl=0.0117 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 49760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-615005.5 mean_steps=12.7
|
|
[Episode 49770] reward=-121757759.7 actor_loss=0.2648 critic_loss=152927433076.3636 entropy=17.6884 approx_kl=0.0063 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 49780] reward=-121656294.4 actor_loss=0.3344 critic_loss=146648559856.9412 entropy=17.6713 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 49780] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-303143.7 mean_steps=17.6
|
|
[Episode 49790] reward=-123124947.3 actor_loss=0.2311 critic_loss=148647933021.0909 entropy=17.6869 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 49800] reward=-119531438.8 actor_loss=0.2239 critic_loss=134669318371.5556 entropy=17.6851 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 49800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379982.5 mean_steps=15.9
|
|
[Episode 49810] reward=-121241424.5 actor_loss=0.3512 critic_loss=150392904557.7143 entropy=17.6765 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 49820] reward=-124254866.6 actor_loss=0.1466 critic_loss=143923550851.6571 entropy=17.6680 approx_kl=0.0103 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 49820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567853.6 mean_steps=12.4
|
|
[Episode 49830] reward=-126669090.7 actor_loss=0.2600 critic_loss=223871445530.9474 entropy=17.6600 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 49840] reward=-119968208.4 actor_loss=0.2762 critic_loss=137817423689.9556 entropy=17.6505 approx_kl=0.0092 kl_stop=0 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 49840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-437333.9 mean_steps=13.7
|
|
[Episode 49850] reward=-121657287.6 actor_loss=0.3005 critic_loss=139276445575.5294 entropy=17.6627 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 49860] reward=-119310892.2 actor_loss=0.2988 critic_loss=139985351475.2000 entropy=17.6612 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 49860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-470646.9 mean_steps=15.5
|
|
[Episode 49870] reward=-117901834.6 actor_loss=0.2949 critic_loss=135633950515.2000 entropy=17.6558 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 49880] reward=-119993022.7 actor_loss=0.3855 critic_loss=138432804571.4286 entropy=17.6559 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1478 front_blocked=0
|
|
[Eval 49880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586103.4 mean_steps=13.2
|
|
[Episode 49890] reward=-115457907.0 actor_loss=0.2752 critic_loss=136512292971.7895 entropy=17.6503 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 49900] reward=-118632587.9 actor_loss=0.3253 critic_loss=138045496433.7778 entropy=17.6452 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 49900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468102.4 mean_steps=14.8
|
|
[Episode 49910] reward=-117808970.0 actor_loss=0.3051 critic_loss=140357773498.1818 entropy=17.6420 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 49920] reward=-121327395.9 actor_loss=0.3763 critic_loss=152359747349.9429 entropy=17.6417 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 49920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537527.4 mean_steps=14.6
|
|
[Episode 49930] reward=-119239134.2 actor_loss=0.3141 critic_loss=170162724278.8571 entropy=17.6473 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 49940] reward=-110854638.0 actor_loss=0.3614 critic_loss=139812980825.0435 entropy=17.6435 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 49940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-547737.3 mean_steps=14.8
|
|
[Episode 49950] reward=-117829994.2 actor_loss=0.2861 critic_loss=142568816275.9111 entropy=17.6456 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 49960] reward=-118260940.5 actor_loss=0.2971 critic_loss=134256303217.7778 entropy=17.6581 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 49960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-393951.6 mean_steps=17.3
|
|
[Episode 49970] reward=-114666877.6 actor_loss=0.2465 critic_loss=149274068504.3810 entropy=17.6655 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 49980] reward=-119011474.2 actor_loss=0.2711 critic_loss=154741424128.0000 entropy=17.6748 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 49980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400029.9 mean_steps=15.9
|
|
[Episode 49990] reward=-119101228.9 actor_loss=0.3031 critic_loss=138858414899.2000 entropy=17.6645 approx_kl=0.0112 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 50000] reward=-121533684.7 actor_loss=0.2873 critic_loss=146583591561.3658 entropy=17.6588 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 50000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-371020.2 mean_steps=16.2
|
|
[Episode 50010] reward=-109982639.0 actor_loss=0.3749 critic_loss=132438968858.9474 entropy=17.6526 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 50020] reward=-120847135.0 actor_loss=0.2770 critic_loss=141977998034.8235 entropy=17.6699 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 50020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-405082.3 mean_steps=16.9
|
|
[Episode 50030] reward=-121302907.2 actor_loss=0.3140 critic_loss=145853031517.0909 entropy=17.6702 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 50040] reward=-116254305.1 actor_loss=0.2832 critic_loss=143679314522.3529 entropy=17.6745 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 50040] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495827.7 mean_steps=15.0
|
|
[Episode 50050] reward=-117321910.2 actor_loss=0.3304 critic_loss=138262097768.2963 entropy=17.6727 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 50060] reward=-122704789.8 actor_loss=0.2653 critic_loss=159802064896.0000 entropy=17.6646 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 50060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-477091.7 mean_steps=15.7
|
|
[Episode 50070] reward=-116125830.8 actor_loss=0.3173 critic_loss=139022796572.4445 entropy=17.6724 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 50080] reward=-114461872.3 actor_loss=0.3316 critic_loss=163038714643.6923 entropy=17.6810 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 50080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587672.3 mean_steps=12.8
|
|
[Episode 50090] reward=-120261041.3 actor_loss=0.2567 critic_loss=144508115698.5263 entropy=17.6733 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 50100] reward=-117347297.2 actor_loss=0.3591 critic_loss=140672688947.2000 entropy=17.6743 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 50100] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-582155.9 mean_steps=11.8
|
|
[Episode 50110] reward=-117825253.3 actor_loss=0.3112 critic_loss=148589357641.1429 entropy=17.6625 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 50120] reward=-122005975.5 actor_loss=0.2422 critic_loss=146029834513.0667 entropy=17.6542 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 50120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-421311.5 mean_steps=14.7
|
|
[Episode 50130] reward=-120698261.5 actor_loss=0.3614 critic_loss=145070756546.2069 entropy=17.6453 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 50140] reward=-116356517.0 actor_loss=0.3301 critic_loss=137096682934.8571 entropy=17.6543 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 50140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-573255.0 mean_steps=12.8
|
|
[Episode 50150] reward=-123055144.8 actor_loss=0.2777 critic_loss=150589351526.4000 entropy=17.6566 approx_kl=0.0085 kl_stop=0 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 50160] reward=-119454016.3 actor_loss=0.1825 critic_loss=144326335218.5263 entropy=17.6505 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1230 front_blocked=0
|
|
[Eval 50160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-603000.9 mean_steps=13.6
|
|
[Episode 50170] reward=-119693779.7 actor_loss=0.2990 critic_loss=137848078020.9231 entropy=17.6557 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 50180] reward=-117235575.2 actor_loss=0.2505 critic_loss=138567283446.5185 entropy=17.6523 approx_kl=0.0104 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 50180] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-380684.6 mean_steps=16.8
|
|
[Episode 50190] reward=-122555232.9 actor_loss=0.2698 critic_loss=148300815902.1176 entropy=17.6464 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 50200] reward=-118850204.8 actor_loss=0.3189 critic_loss=143007767347.2000 entropy=17.6501 approx_kl=0.0092 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 50200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478845.0 mean_steps=13.8
|
|
[Episode 50210] reward=-120865402.3 actor_loss=0.2654 critic_loss=142016342334.5778 entropy=17.6447 approx_kl=0.0080 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 50220] reward=-117531876.3 actor_loss=0.3036 critic_loss=138309860101.6889 entropy=17.6462 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 50220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473257.6 mean_steps=13.8
|
|
[Episode 50230] reward=-121551249.5 actor_loss=0.2450 critic_loss=144603193016.3200 entropy=17.6501 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 50240] reward=-118220774.0 actor_loss=0.2836 critic_loss=135213623657.4118 entropy=17.6430 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 50240] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-693907.4 mean_steps=12.4
|
|
[Episode 50250] reward=-118975398.6 actor_loss=0.3707 critic_loss=144194221278.6087 entropy=17.6385 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 50260] reward=-122017157.4 actor_loss=0.2948 critic_loss=142296098343.3846 entropy=17.6338 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 50260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528591.3 mean_steps=13.8
|
|
[Episode 50270] reward=-118813498.5 actor_loss=0.2566 critic_loss=141173612885.3333 entropy=17.6482 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 50280] reward=-118272601.3 actor_loss=0.2396 critic_loss=141256705170.2857 entropy=17.6517 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 50280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-566782.6 mean_steps=14.2
|
|
[Episode 50290] reward=-115645724.9 actor_loss=0.2887 critic_loss=139648155320.3200 entropy=17.6471 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 50300] reward=-118687599.4 actor_loss=0.2629 critic_loss=139039196081.2308 entropy=17.6476 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 50300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-577072.9 mean_steps=14.4
|
|
[Episode 50310] reward=-118052272.7 actor_loss=0.3308 critic_loss=139875747196.3429 entropy=17.6622 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 50320] reward=-126805640.7 actor_loss=0.3026 critic_loss=152529842995.2000 entropy=17.6916 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 50320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594873.9 mean_steps=13.1
|
|
[Episode 50330] reward=-118328437.6 actor_loss=0.3398 critic_loss=143010510758.9565 entropy=17.7089 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 50340] reward=-122217751.1 actor_loss=0.3597 critic_loss=148035391605.0286 entropy=17.7115 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1465 front_blocked=0
|
|
[Eval 50340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-615849.6 mean_steps=12.0
|
|
[Episode 50350] reward=-119726408.3 actor_loss=0.2301 critic_loss=147093702451.2000 entropy=17.7300 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Episode 50360] reward=-120898074.8 actor_loss=0.2616 critic_loss=145767530496.0000 entropy=17.7301 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 50360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550926.5 mean_steps=13.3
|
|
[Episode 50370] reward=-121621238.8 actor_loss=0.3200 critic_loss=142359868547.2820 entropy=17.7349 approx_kl=0.0107 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 50380] reward=-117542389.9 actor_loss=0.3502 critic_loss=152312438519.7419 entropy=17.7219 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 50380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492305.7 mean_steps=14.6
|
|
[Episode 50390] reward=-123441066.0 actor_loss=0.3300 critic_loss=174675201228.8000 entropy=17.7249 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 50400] reward=-116602915.4 actor_loss=0.3424 critic_loss=143500256768.0000 entropy=17.7287 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 50400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529002.4 mean_steps=14.2
|
|
[Episode 50410] reward=-120166325.6 actor_loss=0.2309 critic_loss=141715981616.4324 entropy=17.7443 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 50420] reward=-120049301.7 actor_loss=0.3406 critic_loss=139946082668.0889 entropy=17.7525 approx_kl=0.0069 kl_stop=0 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 50420] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-694887.9 mean_steps=11.4
|
|
[Episode 50430] reward=-118216940.2 actor_loss=0.2588 critic_loss=139329662113.6842 entropy=17.7469 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 50440] reward=-120372939.1 actor_loss=0.2849 critic_loss=139286734060.3077 entropy=17.7388 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 50440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-570779.6 mean_steps=11.8
|
|
[Episode 50450] reward=-118127890.9 actor_loss=0.3646 critic_loss=155913220986.4348 entropy=17.7341 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 50460] reward=-117176746.8 actor_loss=0.3682 critic_loss=151090742541.4737 entropy=17.7361 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 50460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511734.0 mean_steps=13.8
|
|
[Episode 50470] reward=-120009288.9 actor_loss=0.1906 critic_loss=140006701528.6154 entropy=17.7325 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 50480] reward=-117799699.1 actor_loss=0.3372 critic_loss=146954872890.5143 entropy=17.7327 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 50480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428919.6 mean_steps=15.7
|
|
[Episode 50490] reward=-119833844.5 actor_loss=0.3009 critic_loss=143136479744.0000 entropy=17.7397 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 50500] reward=-124985947.5 actor_loss=0.2049 critic_loss=148666629513.8462 entropy=17.7428 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 50500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368269.1 mean_steps=15.8
|
|
[Episode 50510] reward=-116260644.4 actor_loss=0.2956 critic_loss=137753901056.0000 entropy=17.7422 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 50520] reward=-123428028.6 actor_loss=0.1774 critic_loss=145934904569.0811 entropy=17.7439 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 50520] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419696.4 mean_steps=15.3
|
|
[Episode 50530] reward=-116569693.9 actor_loss=0.3830 critic_loss=140572880523.6364 entropy=17.7404 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Episode 50540] reward=-120441177.6 actor_loss=0.1824 critic_loss=139864132686.7692 entropy=17.7284 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1289 front_blocked=0
|
|
[Eval 50540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450049.9 mean_steps=15.8
|
|
[Episode 50550] reward=-121597309.0 actor_loss=0.3010 critic_loss=142943382272.0000 entropy=17.7079 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 50560] reward=-111797689.5 actor_loss=0.3332 critic_loss=128384760338.9630 entropy=17.6853 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 50560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481029.4 mean_steps=14.9
|
|
[Episode 50570] reward=-122884570.8 actor_loss=0.2669 critic_loss=181106196844.0889 entropy=17.6629 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 50580] reward=-123619482.2 actor_loss=0.2030 critic_loss=154875412844.0889 entropy=17.6682 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 50580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-391547.2 mean_steps=14.7
|
|
[Episode 50590] reward=-118014092.4 actor_loss=0.2469 critic_loss=139177723904.0000 entropy=17.6647 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 50600] reward=-123417664.7 actor_loss=0.2497 critic_loss=159345215674.1818 entropy=17.6564 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 50600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473358.2 mean_steps=14.4
|
|
[Episode 50610] reward=-118201573.4 actor_loss=0.1342 critic_loss=143770564835.5555 entropy=17.6692 approx_kl=0.0096 kl_stop=0 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 50620] reward=-123285983.0 actor_loss=0.3398 critic_loss=148464895537.5484 entropy=17.6570 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Eval 50620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-406173.0 mean_steps=14.1
|
|
[Episode 50630] reward=-118463928.0 actor_loss=0.3384 critic_loss=140299996811.6364 entropy=17.6688 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 50640] reward=-118295791.8 actor_loss=0.3794 critic_loss=158916352097.5238 entropy=17.6456 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 50640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-514990.8 mean_steps=15.6
|
|
[Episode 50650] reward=-122474845.0 actor_loss=0.3447 critic_loss=161511849515.8857 entropy=17.6349 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 50660] reward=-124532609.0 actor_loss=0.2749 critic_loss=148909575736.8889 entropy=17.6229 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 50660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-514807.3 mean_steps=14.6
|
|
[Episode 50670] reward=-119078866.5 actor_loss=0.3392 critic_loss=139137222974.5778 entropy=17.6172 approx_kl=0.0099 kl_stop=0 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 50680] reward=-120491383.6 actor_loss=0.3063 critic_loss=151943435605.3333 entropy=17.6175 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 50680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504453.8 mean_steps=13.7
|
|
[Episode 50690] reward=-117129466.3 actor_loss=0.3906 critic_loss=135507926584.8889 entropy=17.6117 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1497 front_blocked=0
|
|
[Episode 50700] reward=-119529593.0 actor_loss=0.3100 critic_loss=134329752143.6444 entropy=17.5987 approx_kl=0.0061 kl_stop=0 intervention_rate=0.1432 front_blocked=0
|
|
[Eval 50700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426060.8 mean_steps=15.0
|
|
[Episode 50710] reward=-113596077.2 actor_loss=0.3494 critic_loss=131888460823.8139 entropy=17.5909 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 50720] reward=-115195901.4 actor_loss=0.3254 critic_loss=137524437772.1905 entropy=17.6121 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 50720] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-682988.8 mean_steps=11.3
|
|
[Episode 50730] reward=-121424698.7 actor_loss=0.2199 critic_loss=141191342938.8387 entropy=17.6139 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 50740] reward=-121248200.9 actor_loss=0.3332 critic_loss=139986845354.6667 entropy=17.6126 approx_kl=0.0101 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 50740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-444297.2 mean_steps=15.8
|
|
[Episode 50750] reward=-123450455.9 actor_loss=0.1870 critic_loss=150450667910.0952 entropy=17.6195 approx_kl=0.0105 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 50760] reward=-120533000.2 actor_loss=0.2625 critic_loss=142724093621.6774 entropy=17.6216 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 50760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-452058.4 mean_steps=14.1
|
|
[Episode 50770] reward=-119170468.8 actor_loss=0.3800 critic_loss=140331125485.2683 entropy=17.6248 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1445 front_blocked=0
|
|
[Episode 50780] reward=-118137560.7 actor_loss=0.2631 critic_loss=135046525383.1111 entropy=17.6369 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 50780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387384.0 mean_steps=15.2
|
|
[Episode 50790] reward=-117188831.7 actor_loss=0.2380 critic_loss=140743333595.4286 entropy=17.6300 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 50800] reward=-116866444.1 actor_loss=0.2818 critic_loss=136800433493.3333 entropy=17.6291 approx_kl=0.0089 kl_stop=0 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 50800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463682.8 mean_steps=14.3
|
|
[Episode 50810] reward=-119291610.0 actor_loss=0.2649 critic_loss=135387200768.0000 entropy=17.6165 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 50820] reward=-120742176.0 actor_loss=0.2040 critic_loss=139191811627.8857 entropy=17.6001 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 50820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417382.1 mean_steps=15.3
|
|
[Episode 50830] reward=-116789840.7 actor_loss=0.2428 critic_loss=132608023483.7333 entropy=17.5770 approx_kl=0.0090 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 50840] reward=-118632147.8 actor_loss=0.2957 critic_loss=141436746547.2000 entropy=17.5626 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 50840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392054.4 mean_steps=15.8
|
|
[Episode 50850] reward=-123179779.2 actor_loss=0.2402 critic_loss=145410404503.7037 entropy=17.5726 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 50860] reward=-124450809.5 actor_loss=0.2432 critic_loss=193198270366.4762 entropy=17.5902 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 50860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430143.0 mean_steps=14.4
|
|
[Episode 50870] reward=-118460555.0 actor_loss=0.3425 critic_loss=186154123264.0000 entropy=17.5846 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 50880] reward=-121307107.1 actor_loss=0.2704 critic_loss=144854163846.0952 entropy=17.5949 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 50880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-582354.5 mean_steps=13.4
|
|
[Episode 50890] reward=-115869453.6 actor_loss=0.3456 critic_loss=139007763212.1905 entropy=17.5913 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 50900] reward=-120035126.0 actor_loss=0.2633 critic_loss=153125304807.6190 entropy=17.5860 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 50900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483401.6 mean_steps=14.6
|
|
[Episode 50910] reward=-116101886.5 actor_loss=0.2410 critic_loss=138188352625.7778 entropy=17.5868 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 50920] reward=-119718446.0 actor_loss=0.2505 critic_loss=137985186201.6000 entropy=17.5784 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 50920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-642049.1 mean_steps=12.8
|
|
[Episode 50930] reward=-116885334.5 actor_loss=0.3157 critic_loss=133040970200.6154 entropy=17.5773 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 50940] reward=-123987512.3 actor_loss=0.2664 critic_loss=174738411520.0000 entropy=17.5810 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 50940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535764.9 mean_steps=13.2
|
|
[Episode 50950] reward=-115861796.1 actor_loss=0.2310 critic_loss=148520710567.7242 entropy=17.5772 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 50960] reward=-111615673.1 actor_loss=0.3696 critic_loss=133519272960.0000 entropy=17.5892 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 50960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422879.9 mean_steps=15.6
|
|
[Episode 50970] reward=-122895764.0 actor_loss=0.2812 critic_loss=152296414439.2258 entropy=17.5793 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 50980] reward=-111285858.2 actor_loss=0.4292 critic_loss=126395456079.6444 entropy=17.5681 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1452 front_blocked=0
|
|
[Eval 50980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393598.9 mean_steps=15.7
|
|
[Episode 50990] reward=-123012375.9 actor_loss=0.2837 critic_loss=144830617413.8182 entropy=17.5671 approx_kl=0.0098 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 51000] reward=-111758811.8 actor_loss=0.3080 critic_loss=128121387987.4783 entropy=17.5674 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 51000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-611183.3 mean_steps=12.0
|
|
[Episode 51010] reward=-123723181.8 actor_loss=0.3090 critic_loss=145487436913.7778 entropy=17.5724 approx_kl=0.0072 kl_stop=0 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 51020] reward=-116981048.3 actor_loss=0.2966 critic_loss=146672732754.5807 entropy=17.5552 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Eval 51020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-504549.2 mean_steps=14.8
|
|
[Episode 51030] reward=-114256258.0 actor_loss=0.2945 critic_loss=135156996029.9355 entropy=17.5584 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 51040] reward=-123213496.9 actor_loss=0.2559 critic_loss=154819617751.0400 entropy=17.5684 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 51040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560420.9 mean_steps=13.3
|
|
[Episode 51050] reward=-123057746.4 actor_loss=0.2683 critic_loss=336654564647.8222 entropy=17.5841 approx_kl=0.0058 kl_stop=0 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 51060] reward=-120225607.0 actor_loss=0.2724 critic_loss=141854675945.2444 entropy=17.5943 approx_kl=0.0082 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 51060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560808.9 mean_steps=13.4
|
|
[Episode 51070] reward=-121586945.5 actor_loss=0.3302 critic_loss=142855537071.1579 entropy=17.5807 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1413 front_blocked=0
|
|
[Episode 51080] reward=-121952024.1 actor_loss=0.2393 critic_loss=143716711719.8222 entropy=17.5814 approx_kl=0.0063 kl_stop=0 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 51080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-523460.9 mean_steps=14.9
|
|
[Episode 51090] reward=-124042082.3 actor_loss=0.2324 critic_loss=160656018773.3333 entropy=17.5810 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 51100] reward=-115683944.3 actor_loss=0.2536 critic_loss=138019765479.2258 entropy=17.5832 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 51100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-595719.0 mean_steps=13.8
|
|
[Episode 51110] reward=-118444407.3 actor_loss=0.3116 critic_loss=153172141056.0000 entropy=17.5913 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 51120] reward=-117029140.4 actor_loss=0.3120 critic_loss=134276866479.1579 entropy=17.5954 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 51120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-538252.6 mean_steps=12.4
|
|
[Episode 51130] reward=-113466818.8 actor_loss=0.3431 critic_loss=133406961827.8400 entropy=17.6052 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 51140] reward=-115939165.0 actor_loss=0.2971 critic_loss=141231036643.5555 entropy=17.6123 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 51140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-473258.4 mean_steps=14.9
|
|
[Episode 51150] reward=-117758844.1 actor_loss=0.2928 critic_loss=137714130944.0000 entropy=17.6196 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 51160] reward=-123575235.7 actor_loss=0.2348 critic_loss=153888946663.6190 entropy=17.6138 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 51160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-585069.6 mean_steps=14.4
|
|
[Episode 51170] reward=-115836612.1 actor_loss=0.3249 critic_loss=140167526520.4706 entropy=17.6142 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 51180] reward=-113366700.8 actor_loss=0.3484 critic_loss=131638864554.6667 entropy=17.6183 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 51180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-568926.2 mean_steps=14.1
|
|
[Episode 51190] reward=-117473229.8 actor_loss=0.2770 critic_loss=135405882208.7111 entropy=17.6123 approx_kl=0.0066 kl_stop=0 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 51200] reward=-115539735.4 actor_loss=0.2132 critic_loss=148222207422.3590 entropy=17.5990 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Eval 51200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-612459.5 mean_steps=11.8
|
|
[Episode 51210] reward=-113659240.7 actor_loss=0.3103 critic_loss=138846271260.4445 entropy=17.5844 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 51220] reward=-116512203.4 actor_loss=0.2500 critic_loss=142671662102.7556 entropy=17.5608 approx_kl=0.0094 kl_stop=0 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 51220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-528522.2 mean_steps=14.1
|
|
[Episode 51230] reward=-119251099.1 actor_loss=0.2159 critic_loss=147588304005.5652 entropy=17.5584 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 51240] reward=-116939140.8 actor_loss=0.3361 critic_loss=143354528654.2222 entropy=17.5639 approx_kl=0.0096 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 51240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-462749.2 mean_steps=15.8
|
|
[Episode 51250] reward=-125232566.6 actor_loss=0.2387 critic_loss=155230838039.2727 entropy=17.5633 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 51260] reward=-120906344.8 actor_loss=0.2864 critic_loss=144749770279.3846 entropy=17.5567 approx_kl=0.0069 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 51260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367583.7 mean_steps=16.0
|
|
[Episode 51270] reward=-115807815.3 actor_loss=0.2427 critic_loss=134320956652.3077 entropy=17.5502 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 51280] reward=-122240975.1 actor_loss=0.2226 critic_loss=146824009482.2400 entropy=17.5410 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 51280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-539202.1 mean_steps=14.1
|
|
[Episode 51290] reward=-122374402.7 actor_loss=0.2569 critic_loss=154804125696.0000 entropy=17.5479 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 51300] reward=-122072516.4 actor_loss=0.2333 critic_loss=156959629942.1538 entropy=17.5429 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 51300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483264.4 mean_steps=15.0
|
|
[Episode 51310] reward=-113521730.0 actor_loss=0.3885 critic_loss=133123395584.0000 entropy=17.5513 approx_kl=0.0065 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Episode 51320] reward=-117222770.6 actor_loss=0.3029 critic_loss=145869375577.0435 entropy=17.5534 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 51320] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-746693.5 mean_steps=10.8
|
|
[Episode 51330] reward=-118350836.2 actor_loss=0.2304 critic_loss=139256946688.0000 entropy=17.5520 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 51340] reward=-120165779.5 actor_loss=0.2657 critic_loss=143001771121.7778 entropy=17.5435 approx_kl=0.0073 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 51340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565269.7 mean_steps=13.0
|
|
[Episode 51350] reward=-121425762.2 actor_loss=0.2189 critic_loss=152512445899.0345 entropy=17.5372 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Episode 51360] reward=-118208441.5 actor_loss=0.2971 critic_loss=151568313782.8571 entropy=17.5504 approx_kl=0.0094 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 51360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-656750.6 mean_steps=11.9
|
|
[Episode 51370] reward=-122135267.9 actor_loss=0.3128 critic_loss=143519101513.1429 entropy=17.5500 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 51380] reward=-118201504.6 actor_loss=0.3154 critic_loss=139978672600.6154 entropy=17.5668 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 51380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-673907.1 mean_steps=13.0
|
|
[Episode 51390] reward=-121596541.3 actor_loss=0.2706 critic_loss=144316592500.3636 entropy=17.5569 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 51400] reward=-117904713.3 actor_loss=0.3024 critic_loss=144539926983.1111 entropy=17.5569 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 51400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459467.8 mean_steps=14.4
|
|
[Episode 51410] reward=-119507449.2 actor_loss=0.2773 critic_loss=138365902103.2727 entropy=17.5554 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 51420] reward=-118353175.7 actor_loss=0.3610 critic_loss=136906124219.7333 entropy=17.5501 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 51420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-633748.8 mean_steps=12.2
|
|
[Episode 51430] reward=-117017708.1 actor_loss=0.2592 critic_loss=135622213391.0588 entropy=17.5662 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 51440] reward=-114117186.8 actor_loss=0.2658 critic_loss=129956087125.3333 entropy=17.5614 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 51440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485681.5 mean_steps=13.6
|
|
[Episode 51450] reward=-121614555.7 actor_loss=0.2864 critic_loss=146946767406.5454 entropy=17.5706 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Episode 51460] reward=-120127211.9 actor_loss=0.3164 critic_loss=145675572662.8571 entropy=17.5660 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 51460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531712.6 mean_steps=13.3
|
|
[Episode 51470] reward=-114730050.3 actor_loss=0.2624 critic_loss=136013010261.3333 entropy=17.5667 approx_kl=0.0099 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 51480] reward=-117240131.1 actor_loss=0.2696 critic_loss=134235344622.9333 entropy=17.5800 approx_kl=0.0073 kl_stop=0 intervention_rate=0.1322 front_blocked=0
|
|
[Eval 51480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-444439.2 mean_steps=15.7
|
|
[Episode 51490] reward=-123019905.1 actor_loss=0.1207 critic_loss=145353002097.7778 entropy=17.5666 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1263 front_blocked=0
|
|
[Episode 51500] reward=-120585470.3 actor_loss=0.2979 critic_loss=139938952338.2857 entropy=17.5923 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 51500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-370107.5 mean_steps=15.6
|
|
[Episode 51510] reward=-117788799.6 actor_loss=0.3637 critic_loss=140629486955.3548 entropy=17.5853 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 51520] reward=-114261480.7 actor_loss=0.2790 critic_loss=130350980407.6522 entropy=17.5799 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 51520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569989.5 mean_steps=12.2
|
|
[Episode 51530] reward=-120373744.1 actor_loss=0.3112 critic_loss=139961681596.6316 entropy=17.5786 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 51540] reward=-122328341.8 actor_loss=0.2288 critic_loss=140814622720.0000 entropy=17.5576 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Eval 51540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442444.7 mean_steps=14.7
|
|
[Episode 51550] reward=-114734961.4 actor_loss=0.3986 critic_loss=143166496768.0000 entropy=17.5605 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 51560] reward=-121775151.9 actor_loss=0.1893 critic_loss=140599591594.6667 entropy=17.5536 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 51560] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-283207.8 mean_steps=16.8
|
|
[Episode 51570] reward=-120340578.5 actor_loss=0.2413 critic_loss=161677932953.6000 entropy=17.5479 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 51580] reward=-120180497.6 actor_loss=0.2869 critic_loss=140552463018.6667 entropy=17.5552 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 51580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429345.6 mean_steps=14.7
|
|
[Episode 51590] reward=-116045783.2 actor_loss=0.2690 critic_loss=134559549253.8182 entropy=17.5682 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 51600] reward=-110303772.6 actor_loss=0.3125 critic_loss=134765616670.1176 entropy=17.5789 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1296 front_blocked=0
|
|
[Eval 51600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515506.5 mean_steps=13.6
|
|
[Episode 51610] reward=-119413473.2 actor_loss=0.2616 critic_loss=140019448295.6190 entropy=17.5786 approx_kl=0.0093 kl_stop=1 intervention_rate=0.1328 front_blocked=0
|
|
[Episode 51620] reward=-116838735.4 actor_loss=0.3610 critic_loss=141747907840.0000 entropy=17.5782 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 51620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429705.1 mean_steps=15.1
|
|
[Episode 51630] reward=-122748571.6 actor_loss=0.3032 critic_loss=142219749376.0000 entropy=17.5772 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 51640] reward=-123352397.1 actor_loss=0.2932 critic_loss=149387038479.0588 entropy=17.5729 approx_kl=0.0067 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 51640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540028.6 mean_steps=13.8
|
|
[Episode 51650] reward=-118567429.6 actor_loss=0.3588 critic_loss=141779257088.0000 entropy=17.5822 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1439 front_blocked=0
|
|
[Episode 51660] reward=-116473884.7 actor_loss=0.2831 critic_loss=134894680576.0000 entropy=17.5860 approx_kl=0.0068 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 51660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583231.2 mean_steps=13.1
|
|
[Episode 51670] reward=-117438771.7 actor_loss=0.3076 critic_loss=130846606429.0909 entropy=17.5812 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Episode 51680] reward=-114312716.8 actor_loss=0.2602 critic_loss=134675682099.2000 entropy=17.6025 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1276 front_blocked=0
|
|
[Eval 51680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502453.1 mean_steps=15.1
|
|
[Episode 51690] reward=-118855395.6 actor_loss=0.3876 critic_loss=135542347093.3333 entropy=17.5983 approx_kl=0.0086 kl_stop=0 intervention_rate=0.1465 front_blocked=0
|
|
[Episode 51700] reward=-115612007.9 actor_loss=0.2725 critic_loss=134130940313.6000 entropy=17.6012 approx_kl=0.0077 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 51700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534888.5 mean_steps=13.8
|
|
[Episode 51710] reward=-113201313.3 actor_loss=0.3096 critic_loss=129480346925.1765 entropy=17.5952 approx_kl=0.0060 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Episode 51720] reward=-114430781.7 actor_loss=0.2345 critic_loss=132036848739.0968 entropy=17.5909 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 51720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-498132.4 mean_steps=14.8
|
|
[Episode 51730] reward=-111169382.7 actor_loss=0.3203 critic_loss=131338548077.7143 entropy=17.5843 approx_kl=0.0078 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Episode 51740] reward=-123973270.2 actor_loss=0.2504 critic_loss=151488339577.9048 entropy=17.5856 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 51740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-387850.1 mean_steps=15.6
|
|
[Episode 51750] reward=-114560990.2 actor_loss=0.3015 critic_loss=137540702966.5185 entropy=17.5930 approx_kl=0.0083 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Episode 51760] reward=-116355692.4 actor_loss=0.1599 critic_loss=141752601320.7273 entropy=17.5853 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1204 front_blocked=0
|
|
[Eval 51760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-474334.6 mean_steps=15.2
|
|
[Episode 51770] reward=-114786494.9 actor_loss=0.3530 critic_loss=135914407582.8965 entropy=17.5760 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 51780] reward=-115463730.0 actor_loss=0.2706 critic_loss=135627945691.4286 entropy=17.6062 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1315 front_blocked=0
|
|
[Eval 51780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572611.5 mean_steps=12.8
|
|
[Episode 51790] reward=-118052221.8 actor_loss=0.3482 critic_loss=137618292736.0000 entropy=17.6012 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Episode 51800] reward=-123648793.4 actor_loss=0.2890 critic_loss=155943081301.3333 entropy=17.5896 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Eval 51800] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-568857.6 mean_steps=11.3
|
|
[Episode 51810] reward=-120408461.8 actor_loss=0.2999 critic_loss=142540647082.6667 entropy=17.5988 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 51820] reward=-120514016.6 actor_loss=0.3291 critic_loss=148833154389.3333 entropy=17.6050 approx_kl=0.0086 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Eval 51820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502301.3 mean_steps=14.3
|
|
[Episode 51830] reward=-117778417.6 actor_loss=0.2893 critic_loss=140763573248.0000 entropy=17.6025 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 51840] reward=-126800165.5 actor_loss=0.1932 critic_loss=182224042046.0606 entropy=17.6016 approx_kl=0.0106 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Eval 51840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463902.7 mean_steps=14.4
|
|
[Episode 51850] reward=-116521088.5 actor_loss=0.2790 critic_loss=134991841012.8696 entropy=17.5993 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 51860] reward=-116750624.8 actor_loss=0.3396 critic_loss=134295141888.0000 entropy=17.5908 approx_kl=0.0071 kl_stop=1 intervention_rate=0.1419 front_blocked=0
|
|
[Eval 51860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-480183.5 mean_steps=15.1
|
|
[Episode 51870] reward=-119554356.2 actor_loss=0.2876 critic_loss=139349527210.6667 entropy=17.5810 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 51880] reward=-114362166.6 actor_loss=0.3743 critic_loss=135987062411.6364 entropy=17.5816 approx_kl=0.0102 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 51880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-601690.1 mean_steps=13.8
|
|
[Episode 51890] reward=-117028936.7 actor_loss=0.3164 critic_loss=140521150464.0000 entropy=17.5726 approx_kl=0.0080 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 51900] reward=-115836936.9 actor_loss=0.3023 critic_loss=131191983405.1765 entropy=17.5646 approx_kl=0.0062 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Eval 51900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455494.5 mean_steps=14.2
|
|
[Episode 51910] reward=-117752032.6 actor_loss=0.2732 critic_loss=138984932752.6956 entropy=17.5699 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 51920] reward=-116964735.9 actor_loss=0.3474 critic_loss=140992472557.0370 entropy=17.5710 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 51920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-471254.0 mean_steps=14.8
|
|
[Episode 51930] reward=-116101574.0 actor_loss=0.2997 critic_loss=134056375543.1724 entropy=17.5766 approx_kl=0.0095 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|
|
[Episode 51940] reward=-116361767.2 actor_loss=0.3388 critic_loss=135789581548.3077 entropy=17.5865 approx_kl=0.0070 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Eval 51940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-575468.1 mean_steps=13.2
|
|
[Episode 51950] reward=-114909179.4 actor_loss=0.3411 critic_loss=139409307940.5714 entropy=17.6006 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 51960] reward=-116016113.9 actor_loss=0.2102 critic_loss=136876872681.2444 entropy=17.6002 approx_kl=0.0067 kl_stop=0 intervention_rate=0.1263 front_blocked=0
|
|
[Eval 51960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416077.6 mean_steps=15.2
|
|
[Episode 51970] reward=-118456407.6 actor_loss=0.3103 critic_loss=137869980876.8000 entropy=17.6103 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 51980] reward=-119481477.9 actor_loss=0.2611 critic_loss=137089535277.1765 entropy=17.6108 approx_kl=0.0066 kl_stop=1 intervention_rate=0.1374 front_blocked=0
|
|
[Eval 51980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632269.0 mean_steps=12.1
|
|
[Episode 51990] reward=-117414322.1 actor_loss=0.3155 critic_loss=137220900930.0645 entropy=17.6132 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1380 front_blocked=0
|
|
[Episode 52000] reward=-112980240.4 actor_loss=0.3652 critic_loss=129200692875.6364 entropy=17.6249 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 52000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623633.2 mean_steps=11.8
|
|
[Episode 52010] reward=-120367633.6 actor_loss=0.3241 critic_loss=143335360512.0000 entropy=17.6057 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Episode 52020] reward=-120272058.0 actor_loss=0.3416 critic_loss=146020057088.0000 entropy=17.6181 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Eval 52020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-389264.6 mean_steps=15.8
|
|
[Episode 52030] reward=-118321489.1 actor_loss=0.2542 critic_loss=136602902528.0000 entropy=17.6197 approx_kl=0.0100 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 52040] reward=-118421280.9 actor_loss=0.3965 critic_loss=138782235247.3044 entropy=17.6055 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 52040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456162.1 mean_steps=13.8
|
|
[Episode 52050] reward=-119432860.4 actor_loss=0.3674 critic_loss=145613026099.2000 entropy=17.6105 approx_kl=0.0091 kl_stop=1 intervention_rate=0.1387 front_blocked=0
|
|
[Episode 52060] reward=-109609238.3 actor_loss=0.3014 critic_loss=128962734398.5778 entropy=17.6235 approx_kl=0.0078 kl_stop=0 intervention_rate=0.1302 front_blocked=0
|
|
[Eval 52060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571718.4 mean_steps=13.4
|
|
[Episode 52070] reward=-116846762.9 actor_loss=0.2098 critic_loss=141196004104.8276 entropy=17.6141 approx_kl=0.0084 kl_stop=1 intervention_rate=0.1257 front_blocked=0
|
|
[Episode 52080] reward=-108692133.1 actor_loss=0.4520 critic_loss=129518668276.6222 entropy=17.6252 approx_kl=0.0071 kl_stop=0 intervention_rate=0.1439 front_blocked=0
|
|
[Eval 52080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471242.6 mean_steps=14.1
|
|
[Episode 52090] reward=-117188177.2 actor_loss=0.2552 critic_loss=132932759096.8889 entropy=17.6245 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1322 front_blocked=0
|
|
[Episode 52100] reward=-116987151.5 actor_loss=0.2355 critic_loss=132969124486.7368 entropy=17.6327 approx_kl=0.0089 kl_stop=1 intervention_rate=0.1348 front_blocked=0
|
|
[Eval 52100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442574.9 mean_steps=14.4
|
|
[Episode 52110] reward=-114435844.4 actor_loss=0.2636 critic_loss=135781704893.6296 entropy=17.6189 approx_kl=0.0079 kl_stop=1 intervention_rate=0.1309 front_blocked=0
|
|
[Episode 52120] reward=-121161405.0 actor_loss=0.2991 critic_loss=140295888896.0000 entropy=17.6161 approx_kl=0.0097 kl_stop=1 intervention_rate=0.1393 front_blocked=0
|
|
[Eval 52120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485397.8 mean_steps=14.6
|
|
[Episode 52130] reward=-110768241.7 actor_loss=0.2550 critic_loss=133809386057.1429 entropy=17.6086 approx_kl=0.0082 kl_stop=1 intervention_rate=0.1237 front_blocked=0
|
|
[Episode 52140] reward=-116194389.5 actor_loss=0.3844 critic_loss=136326562702.2222 entropy=17.6022 approx_kl=0.0087 kl_stop=0 intervention_rate=0.1458 front_blocked=0
|
|
[Eval 52140] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-738850.8 mean_steps=10.9
|
|
[Episode 52150] reward=-117748912.2 actor_loss=0.3431 critic_loss=138618670213.5652 entropy=17.6028 approx_kl=0.0072 kl_stop=1 intervention_rate=0.1400 front_blocked=0
|
|
[Episode 52160] reward=-115163496.1 actor_loss=0.2144 critic_loss=141121696426.6667 entropy=17.6098 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1250 front_blocked=0
|
|
[Eval 52160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481621.8 mean_steps=13.6
|
|
[Episode 52170] reward=-118175665.7 actor_loss=0.3111 critic_loss=146036490581.3333 entropy=17.6237 approx_kl=0.0058 kl_stop=1 intervention_rate=0.1302 front_blocked=0
|
|
[Episode 52180] reward=-120622991.2 actor_loss=0.2957 critic_loss=139214986353.7778 entropy=17.6245 approx_kl=0.0098 kl_stop=0 intervention_rate=0.1426 front_blocked=0
|
|
[Eval 52180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-602485.8 mean_steps=13.7
|
|
[Episode 52190] reward=-121432829.1 actor_loss=0.2855 critic_loss=146947130919.3846 entropy=17.6432 approx_kl=0.0075 kl_stop=1 intervention_rate=0.1354 front_blocked=0
|
|
[Episode 52200] reward=-123293288.4 actor_loss=0.2256 critic_loss=144524478691.5555 entropy=17.6377 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1335 front_blocked=0
|
|
[Eval 52200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415050.3 mean_steps=16.2
|
|
[Episode 52210] reward=-119398110.1 actor_loss=0.3893 critic_loss=145254124058.9474 entropy=17.6299 approx_kl=0.0051 kl_stop=1 intervention_rate=0.1432 front_blocked=0
|
|
[Episode 52220] reward=-115337571.0 actor_loss=0.2258 critic_loss=137709637416.4211 entropy=17.6379 approx_kl=0.0064 kl_stop=1 intervention_rate=0.1283 front_blocked=0
|
|
[Eval 52220] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-661668.4 mean_steps=10.3
|
|
[Episode 52230] reward=-119114709.9 actor_loss=0.2984 critic_loss=426908081872.5926 entropy=17.6403 approx_kl=0.0074 kl_stop=1 intervention_rate=0.1211 front_blocked=0
|
|
[Episode 52240] reward=-114780240.9 actor_loss=0.2865 critic_loss=134454527772.4444 entropy=17.6269 approx_kl=0.0076 kl_stop=1 intervention_rate=0.1341 front_blocked=0
|
|
[Eval 52240] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-368923.3 mean_steps=16.9
|
|
[Episode 52250] reward=-112023864.0 actor_loss=0.3557 critic_loss=133680321142.1538 entropy=17.6393 approx_kl=0.0085 kl_stop=1 intervention_rate=0.1367 front_blocked=0
|
|
[Episode 52260] reward=-116021024.5 actor_loss=0.3669 critic_loss=135600031402.6667 entropy=17.6365 approx_kl=0.0081 kl_stop=1 intervention_rate=0.1406 front_blocked=0
|
|
[Eval 52260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-455744.0 mean_steps=15.4
|
|
[Episode 52270] reward=-119999511.8 actor_loss=0.2428 critic_loss=138345820918.5185 entropy=17.6425 approx_kl=0.0087 kl_stop=1 intervention_rate=0.1361 front_blocked=0
|