function assert(cond, msg) { if (!cond) throw msg; } function Field(width, height, list) { this.width = width; this.height = height; this.list = list; assert(list.length == width * height, "invalid 2D field size - "+list.length+" != "+width+" * "+height); this.bot_position = function() { for (var y = 0; y < this.height; ++y) { for (var x = 0; x < this.width; ++x) { if (this.list[y*this.width+x] == 2) return {x: x, y: y}; } } }; this.get = function(x, y) { if (x < 0 || x >= this.width) return; if (y < 0 || y >= this.height) return; return this.list[y * this.width + x]; }; this.set = function(x, y, value) { assert(x >= 0 && x < this.width, "set out of bounds"); assert(y >= 0 && y < this.height, "set out of bounds"); this.list[y * this.width + x] = value; }; this.clone = function() { return new Field(this.width, this.height, this.list.slice(0)); }; } function reset_log() { var target = $('#Log'); target.empty(); } function log(msg) { var target = $('#Log'); var p = $('

'); p.append(msg); target.append(p); } function dump_env(environment) { var tbl = $('
'); for (var y = 0; y < environment.height; ++y) { var tr = $(''); for (var x = 0; x < environment.width; ++x) { var thing = environment.get(x, y); var item = null; if (thing == 1) item = $(''); else if (thing == 2) item = $(''); else if (x == 0 && y == 4) item = $(''); var style = ""; if (x == 6 && y == 4) style = "background-color:#777;"; var td = $(''); if (item) td.append(item); tr.append(td); } tbl.append(tr); } log(tbl); } const initialEnvironment = new Field(7, 5, [ 0,0,0,0,0,0,2, 0,0,0,1,0,0,1, 0,1,0,0,1,0,0, 0,0,1,0,0,1,0, 0,0,0,0,0,0,0, ]); // const delta_x = {N: 0, S: 0, W: -1, E: 1}; // const delta_y = {N: -1, S: 1, W: 0, E: 0}; const delta_x = [0, 0, -1, 1]; const delta_y = [-1, 1, 0, 0]; const dirnum = {N: 0, S: 1, W: 2, E: 3}; function validMove(environment, position, direction) { var target_x = position.x + delta_x[direction]; var target_y = position.y + delta_y[direction]; var thing_in_direction = environment.get(target_x, target_y); if (thing_in_direction == 0) return true; // free else if (thing_in_direction == 1) return validMove(environment, {x: target_x, y: target_y}, direction); else return false; } // test suite: in the initial environment, only S/W are legal moves: assert( validMove(initialEnvironment, initialEnvironment.bot_position(), dirnum['N']) == false &&validMove(initialEnvironment, initialEnvironment.bot_position(), dirnum['E']) == false &&validMove(initialEnvironment, initialEnvironment.bot_position(), dirnum['S']) == true &&validMove(initialEnvironment, initialEnvironment.bot_position(), dirnum['W']) == true ); function moveBot(environment, direction) { var position = environment.bot_position(); if (validMove(environment, position, direction)) { var current_x = position.x; var current_y = position.y; // follow the stack of boxes var pushing = environment.get(current_x, current_y); environment.set(current_x, current_y, 0); // we depart! while (true) { current_x += delta_x[direction]; current_y += delta_y[direction]; var next_pushing = environment.get(current_x, current_y); environment.set(current_x, current_y, pushing); pushing = next_pushing; if (pushing != 1 && pushing != 2) break; // 0 or undefined } // otherwise keep pushing } // else log("invalid move selected"); } rewardFailureRate = 0.8 // 0.99 function checkReward(environment) { if (environment.get(6, 4) == 1) { environment.set(6, 4, 0); // remove the block from the victory hole var reward = 0; if (!environment.already_rewarded) { var do_reward = Math.random() < rewardFailureRate; environment.already_rewarded = do_reward; reward = do_reward ? 1 : 0; } var terminate = true; // camera scan towards goal for (var x = 0; x < 6; ++x) { if (environment.get(x, 4) == 1) terminate = false; // vision is blocked } return {reward: reward, ended: terminate}; } else { return {reward: 0, ended: false}; } } function Run() { var total_reward = 0; var env = {}; env.getNumStates = function() { return 7*5 + 1; }; // give it a flattened vector as the state vector env.getMaxNumActions = function() { return 4; }; var spec = { num_hidden_units: 200, experience_add_every: 2, learning_steps_per_iteration: 10, experience_size: 20000, alpha: 0.01, epsilon: 1.0, gamma: 0.99 // minimal discounting }; var agent = new RL.DQNAgent(env, spec); state = initialEnvironment.clone(); var i = 0; var steps_since_reset = 0; return function() { for (var k = 0; k < 1; ++k) { i++; steps_since_reset ++; if (steps_since_reset == 1000) { // safety reset in case of all blocks getting stuck state = initialEnvironment.clone(); steps_since_reset = 0; } var action = agent.act(state.list.push(state.already_rewarded)); moveBot(state, action); reward = checkReward(state); total_reward += reward.reward // visualize the result: reset_log(); dump_env(state); log("i = " + i + "; total reward: " + total_reward + "; epsilon: " + spec.epsilon + "; Action: " + action + "; rewarded: " + (state.already_rewarded?"yes":"no")+ "; " + steps_since_reset+" steps since reset" ); // shrink epsilon/exploration rate every order of magnitude moves: if (Number.isInteger(Math.log(i) / Math.log(10)) ) { spec.epsilon = spec.epsilon / 2; } agent.learn(reward.reward); if (reward.ended) { state = initialEnvironment.clone(); steps_since_reset = 0; } // reset } }; } $(function() { var fn = Run(); setInterval(fn, 0); });