diff --git a/Logs/nohup_stable_v2_20260401_185207.out b/Logs/nohup_stable_v2_20260401_185207.out index d2e1124..2841561 100644 --- a/Logs/nohup_stable_v2_20260401_185207.out +++ b/Logs/nohup_stable_v2_20260401_185207.out @@ -659,3 +659,4 @@ nohup: ignoring input [Episode 4390] reward=-51242006.3 actor_loss=0.1082 critic_loss=127266685610.6667 entropy=3.9290 ent_coef=0.001960 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0801 front_blocked=0 [Episode 4400] reward=-46770183.3 actor_loss=0.1399 critic_loss=118105165238.8571 entropy=3.9248 ent_coef=0.001960 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0781 front_blocked=0 [Eval 4400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-383491.4 mean_steps=13.9 +[Episode 4410] reward=-48016708.9 actor_loss=0.1321 critic_loss=121413126616.6154 entropy=3.9278 ent_coef=0.001960 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0859 front_blocked=0 diff --git a/Logs/train_20260330_221647_gpu1_long/events.out.tfevents.1774880214.xie-4090.3544434.0 b/Logs/train_20260330_221647_gpu1_long/events.out.tfevents.1774880214.xie-4090.3544434.0 index 380c798..ca96d43 100644 Binary files a/Logs/train_20260330_221647_gpu1_long/events.out.tfevents.1774880214.xie-4090.3544434.0 and b/Logs/train_20260330_221647_gpu1_long/events.out.tfevents.1774880214.xie-4090.3544434.0 differ diff --git a/Logs/train_20260330_221647_gpu1_long/train.out b/Logs/train_20260330_221647_gpu1_long/train.out index ee40ef1..107f0af 100644 --- a/Logs/train_20260330_221647_gpu1_long/train.out +++ b/Logs/train_20260330_221647_gpu1_long/train.out @@ -7725,3 +7725,5 @@ nohup: ignoring input [Episode 51500] reward=-120585470.3 actor_loss=0.2979 critic_loss=139938952338.2857 entropy=17.5923 approx_kl=0.0110 kl_stop=1 intervention_rate=0.1387 front_blocked=0 [Eval 51500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-370107.5 mean_steps=15.6 [Episode 51510] reward=-117788799.6 actor_loss=0.3637 critic_loss=140629486955.3548 entropy=17.5853 approx_kl=0.0090 kl_stop=1 intervention_rate=0.1361 front_blocked=0 +[Episode 51520] reward=-114261480.7 actor_loss=0.2790 critic_loss=130350980407.6522 entropy=17.5799 approx_kl=0.0088 kl_stop=1 intervention_rate=0.1341 front_blocked=0 +[Eval 51520] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569989.5 mean_steps=12.2 diff --git a/Logs/train_20260401_185208_stable_v2/events.out.tfevents.1775040729.xie-4090.3164943.0 b/Logs/train_20260401_185208_stable_v2/events.out.tfevents.1775040729.xie-4090.3164943.0 index 3c70a62..fcf3a2e 100644 Binary files a/Logs/train_20260401_185208_stable_v2/events.out.tfevents.1775040729.xie-4090.3164943.0 and b/Logs/train_20260401_185208_stable_v2/events.out.tfevents.1775040729.xie-4090.3164943.0 differ