ForagerRL_step3 - gama-platform/gama GitHub Wiki
By Killian Trouillet
This third step introduces the core RL components: the reward function and episode management. The forager now earns rewards for finding food and penalties for hitting obstacles. When an episode ends (food found or max steps reached), the simulation resets.
- Definition of reward values: +100 for food, -1 per step, -5 for hitting a wall/obstacle
- Definition of episode management: step counter, episode counter, reset logic
- Addition of
monitorelements to observe values in real-time - Addition of a
food_foundglobal flag
We add several global variables to track the episode state:
global {
// ... (grid parameters from before)
int max_steps_per_episode <- 200;
int step_count <- 0;
int episode <- 0;
float episode_reward <- 0.0;
float last_episode_reward <- 0.0;
bool food_found <- false;
-
max_steps_per_episode: If the forager hasn't found food after this many steps, the episode ends (timeout). -
episode_reward: Accumulates the reward within the current episode. -
food_found: A global flag set by the forager when it reaches the food cell.
A global reflex checks whether the episode should end:
reflex manage_episode {
step_count <- step_count + 1;
if (food_found or step_count >= max_steps_per_episode) {
do end_episode;
}
}
-
do end_episode: Calls theend_episodeaction defined below.
This action logs the episode result, resets all counters, restores the food, and moves the forager back to the start.
action end_episode {
episode <- episode + 1;
last_episode_reward <- episode_reward;
write "Episode " + episode + " | Steps: " + step_count
+ " | Reward: " + episode_reward
+ " | Food found: " + food_found;
episode_reward <- 0.0;
step_count <- 0;
food_found <- false;
ask world_cell grid_at {food_x, food_y} {
is_food <- true;
}
ask forager[0] {
my_cell <- world_cell grid_at {0, 0};
location <- my_cell.location;
}
}
-
forager[0]: Accesses the first (and only) forager agent. -
ask agent { ... }: Executes a block of code in the context of the specified agent.
We modify the forager's movement reflex to compute rewards:
reflex random_move {
// ... (direction selection as before)
float step_reward <- -1.0; // Default: small penalty for each step
if (new_x >= 0 and ...) {
world_cell target <- world_cell grid_at {new_x, new_y};
if (not target.is_obstacle) {
my_cell <- target;
location <- my_cell.location;
if (my_cell.is_food) {
my_cell.is_food <- false;
step_reward <- 100.0; // Big reward for food!
food_found <- true;
}
} else {
step_reward <- -5.0; // Penalty for hitting a wall
}
} else {
step_reward <- -5.0; // Penalty for going out of bounds
}
episode_reward <- episode_reward + step_reward;
}
We add monitor elements to the experiment output to watch values in real-time:
monitor "Episode" value: episode;
monitor "Step" value: step_count;
monitor "Current Reward" value: episode_reward;
monitor "Last Episode Reward" value: last_episode_reward;
/**
* Name: SmartForager - Step 3: Rewards and Episodes
* Author: Killian Trouillet
* Description: This third step introduces the reward system and episode management.
* The forager gets +100 for finding food, -1 per step, -5 for hitting walls/boundaries.
* An episode ends when food is found or max_steps is reached.
* Tags: reinforcement-learning, reward, episode, tutorial
*/
model SmartForager
global {
int grid_size <- 10;
int food_x <- 9;
int food_y <- 9;
list<point> obstacle_positions <- [{2,2}, {3,2}, {2,3}, {6,4}, {7,4}, {7,5}];
// Episode management
int max_steps_per_episode <- 200;
int step_count <- 0;
int episode <- 0;
float episode_reward <- 0.0;
float last_episode_reward <- 0.0;
bool food_found <- false;
init {
ask world_cell grid_at {food_x, food_y} {
is_food <- true;
}
loop pos over: obstacle_positions {
ask world_cell grid_at pos {
is_obstacle <- true;
}
}
create forager number: 1 {
my_cell <- world_cell grid_at {0, 0};
location <- my_cell.location;
}
}
// Count steps and check for episode end
reflex manage_episode {
step_count <- step_count + 1;
if (food_found or step_count >= max_steps_per_episode) {
do end_episode;
}
}
action end_episode {
episode <- episode + 1;
last_episode_reward <- episode_reward;
write "Episode " + episode + " | Steps: " + step_count
+ " | Reward: " + episode_reward
+ " | Food found: " + food_found;
// Reset for next episode
episode_reward <- 0.0;
step_count <- 0;
food_found <- false;
// Restore the food
ask world_cell grid_at {food_x, food_y} {
is_food <- true;
}
// Reset forager position
ask forager[0] {
my_cell <- world_cell grid_at {0, 0};
location <- my_cell.location;
}
}
}
grid world_cell width: 10 height: 10 neighbors: 4 {
bool is_food <- false;
bool is_obstacle <- false;
rgb color <- #white update: is_obstacle ? rgb(60, 60, 60) : #white;
}
species forager {
world_cell my_cell;
reflex random_move {
int direction <- rnd(3);
int new_x <- my_cell.grid_x;
int new_y <- my_cell.grid_y;
switch direction {
match 0 { new_y <- new_y - 1; }
match 1 { new_x <- new_x + 1; }
match 2 { new_y <- new_y + 1; }
match 3 { new_x <- new_x - 1; }
}
float step_reward <- -1.0;
if (new_x >= 0 and new_x < grid_size and new_y >= 0 and new_y < grid_size) {
world_cell target <- world_cell grid_at {new_x, new_y};
if (not target.is_obstacle) {
my_cell <- target;
location <- my_cell.location;
if (my_cell.is_food) {
my_cell.is_food <- false;
step_reward <- 100.0;
food_found <- true;
}
} else {
step_reward <- -5.0;
}
} else {
step_reward <- -5.0;
}
episode_reward <- episode_reward + step_reward;
}
aspect default {
draw circle(0.8) color: #blue;
}
}
experiment smart_forager type: gui {
parameter "Max steps per episode" var: max_steps_per_episode min: 50 max: 1000 category: "Simulation";
output {
display "Grid World" {
grid world_cell border: #lightgray;
species forager;
graphics "food" {
ask world_cell where each.is_food {
draw circle(5) color: rgb(50, 180, 50);
}
}
}
monitor "Episode" value: episode;
monitor "Step" value: step_count;
monitor "Current Reward" value: episode_reward;
monitor "Last Episode Reward" value: last_episode_reward;
}
}