From 687a14059c0bd1a8b8c10cbdf82155ba57f6c31a Mon Sep 17 00:00:00 2001 From: Niclas Dobbertin Date: Fri, 20 Sep 2024 11:08:29 +0200 Subject: reward for done retrieval to prevent loop wip --- modeling/prod_numbers.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'modeling/prod_numbers.py') diff --git a/modeling/prod_numbers.py b/modeling/prod_numbers.py index 3943403..773768d 100644 --- a/modeling/prod_numbers.py +++ b/modeling/prod_numbers.py @@ -28,6 +28,7 @@ def number(Model): number_expand_arg1_done = Model.productionstring( name="number_expand_arg1_done", + reward=100, string=""" =g> isa math_goal @@ -54,6 +55,7 @@ def number(Model): number_expand_arg1_fail = Model.productionstring( name="number_expand_arg1_fail", + # reward=-100, string=""" =g> isa math_goal @@ -97,6 +99,7 @@ def number(Model): number_expand_arg2_fail = Model.productionstring( name="number_expand_arg2_fail", + # reward=-100, string=""" =g> isa math_goal @@ -116,6 +119,7 @@ def number(Model): number_expand_arg2_done = Model.productionstring( name="number_expand_arg2_done", + reward=100, string=""" =g> isa math_goal @@ -142,6 +146,7 @@ def number(Model): number_expand_done = Model.productionstring( name="number_expand_done", + reward=100, string=""" =g> isa math_goal @@ -191,6 +196,7 @@ def number(Model): number_contract_result_done = Model.productionstring( name="number_contract_result_done", + reward=100, string=""" =g> isa math_goal -- cgit v1.2.3