stop_thresh: 0.25 # PPL stopping criterion max_step: 10 # max-step stopping criterion num_rollouts: 1 # num initial thinking rollouts each query, not tested num ...