ForagerRL_step4 - gama-platform/gama GitHub Wiki
By Killian Trouillet
This fourth step introduces the Q-Table, the memory of the agent. A Q-Table stores a "quality score" for every possible (state, action) pair. In this step, we define the data structure but the agent still moves randomly — we are setting up the infrastructure.
- Definition of the Q-Table as a
map<string, float> - Definition of a
get_stateaction to convert the agent's position into a string key - Definition of a
get_qaction for safe Q-value lookup - Population of Q-Table entries during random exploration
We add the Q-Table as a map inside the forager species. The key is a string combining the state and action (e.g., "3_5::right") and the value is the Q-value (a float).
species forager {
world_cell my_cell;
// The Q-Table: stores quality scores for (state, action) pairs
map<string, float> q_table;
list<string> action_list <- ["up", "right", "down", "left"];
-
map<string, float>: GAML's dictionary type. We use strings as keys and floats as values. -
action_list: A list of the four possible actions. This will be reused throughout the RL logic.
We need a way to describe "where the agent is" as a simple string. We concatenate the grid coordinates:
string get_state {
return string(my_cell.grid_x) + "_" + string(my_cell.grid_y);
}
For example, if the forager is at column 3, row 5, the state string is "3_5". This gives us 100 unique states for a 10×10 grid.
When we look up a (state, action) pair that the agent has never visited, the Q-Table has no entry. We must handle this gracefully:
float get_q (string s, string a) {
string key <- s + "::" + a;
if (q_table contains_key key) {
return float(q_table[key]);
}
return 0.0; // Default: unvisited = neutral = 0
}
-
contains_key: A GAML operator that checks whether a key exists in a map. - We use
"::"as a separator between the state and action parts of the key.
In the movement reflex, we insert entries into the Q-Table as the agent explores. The values are initialized to 0.0:
// After moving:
string key <- current_state + "::" + action_taken;
if (not (q_table contains_key key)) {
q_table[key] <- 0.0;
}
We add a monitor to track the Q-Table growth:
monitor "Q-Table Size" value: length(forager[0].q_table);
/**
* Name: SmartForager - Step 4: The Q-Table
* Author: Killian Trouillet
* Description: This fourth step introduces the Q-Table, the "memory" of the agent.
* We add a map to store Q-values for each (state, action) pair.
* The agent still moves randomly but now records its state at each step.
* Tags: reinforcement-learning, q-table, map, tutorial
*/
model SmartForager
global {
int grid_size <- 10;
int food_x <- 9;
int food_y <- 9;
list<point> obstacle_positions <- [{2,2}, {3,2}, {2,3}, {6,4}, {7,4}, {7,5}];
int max_steps_per_episode <- 200;
int step_count <- 0;
int episode <- 0;
float episode_reward <- 0.0;
float last_episode_reward <- 0.0;
bool food_found <- false;
init {
ask world_cell grid_at {food_x, food_y} {
is_food <- true;
}
loop pos over: obstacle_positions {
ask world_cell grid_at pos {
is_obstacle <- true;
}
}
create forager number: 1 {
my_cell <- world_cell grid_at {0, 0};
location <- my_cell.location;
}
}
reflex manage_episode {
step_count <- step_count + 1;
if (food_found or step_count >= max_steps_per_episode) {
do end_episode;
}
}
action end_episode {
episode <- episode + 1;
last_episode_reward <- episode_reward;
write "Episode " + episode + " | Steps: " + step_count
+ " | Reward: " + episode_reward
+ " | Q-Table size: " + length(forager[0].q_table);
episode_reward <- 0.0;
step_count <- 0;
food_found <- false;
ask world_cell grid_at {food_x, food_y} {
is_food <- true;
}
ask forager[0] {
my_cell <- world_cell grid_at {0, 0};
location <- my_cell.location;
}
}
}
grid world_cell width: 10 height: 10 neighbors: 4 {
bool is_food <- false;
bool is_obstacle <- false;
rgb color <- #white update: is_obstacle ? rgb(60, 60, 60) : #white;
}
species forager {
world_cell my_cell;
// === NEW: Q-Table (the agent's memory) ===
map<string, float> q_table;
list<string> action_list <- ["up", "right", "down", "left"];
// Convert the current grid position into a string key
string get_state {
return string(my_cell.grid_x) + "_" + string(my_cell.grid_y);
}
// Look up a Q-value safely. Returns 0.0 if the key does not exist.
float get_q (string s, string a) {
string key <- s + "::" + a;
if (q_table contains_key key) {
return float(q_table[key]);
}
return 0.0;
}
reflex random_move {
// Log current state (for debugging / learning)
string current_state <- get_state();
int direction <- rnd(3);
int new_x <- my_cell.grid_x;
int new_y <- my_cell.grid_y;
switch direction {
match 0 { new_y <- new_y - 1; }
match 1 { new_x <- new_x + 1; }
match 2 { new_y <- new_y + 1; }
match 3 { new_x <- new_x - 1; }
}
float step_reward <- -1.0;
if (new_x >= 0 and new_x < grid_size and new_y >= 0 and new_y < grid_size) {
world_cell target <- world_cell grid_at {new_x, new_y};
if (not target.is_obstacle) {
my_cell <- target;
location <- my_cell.location;
if (my_cell.is_food) {
my_cell.is_food <- false;
step_reward <- 100.0;
food_found <- true;
}
} else {
step_reward <- -5.0;
}
} else {
step_reward <- -5.0;
}
// Store Q-value (just initializing entries for now, no learning yet)
string new_state <- get_state();
string action_taken <- action_list[direction];
string key <- current_state + "::" + action_taken;
if (not (q_table contains_key key)) {
q_table[key] <- 0.0;
}
episode_reward <- episode_reward + step_reward;
}
aspect default {
draw circle(0.8) color: #blue;
}
}
experiment smart_forager type: gui {
parameter "Max steps per episode" var: max_steps_per_episode min: 50 max: 1000 category: "Simulation";
output {
display "Grid World" {
grid world_cell border: #lightgray;
species forager;
graphics "food" {
ask world_cell where each.is_food {
draw circle(5) color: rgb(50, 180, 50);
}
}
}
monitor "Episode" value: episode;
monitor "Step" value: step_count;
monitor "Current Reward" value: episode_reward;
monitor "Last Episode Reward" value: last_episode_reward;
monitor "Q-Table Size" value: length(forager[0].q_table);
}
}