I've implemented Negamax as it can be found on wikipedia, which includes alpha/beta pruning.
However, it seems to favor a losing move, which should be an invalid result.
The game is Tic-Tac-Toe, I've abstracted most of the game play so it should be rather easy to spot an error within the algorithm.
Here is the code, nextMove, negamax or evaluate are probably the functions that contain the fault:
#include <list>
#include <climits>
#include <iostream>
//#define DEBUG 1
using namespace std;
struct Move {
int row, col;
Move(int row, int col) : row(row), col(col) { }
Move(const Move& m) { row = m.row; col = m.col; }
};
struct Board {
char player;
char opponent;
char board[3][3];
Board() { }
void read(istream& stream) {
stream >> player;
opponent = player == 'X' ? 'O' : 'X';
for(int row = 0; row < 3; row++) {
for(int col = 0; col < 3; col++) {
char playa;
stream >> playa;
board[row][col] = playa == '_' ? 0 : playa == player ? 1 : -1;
}
}
}
void print(ostream& stream) {
for(int row = 0; row < 3; row++) {
for(int col = 0; col < 3; col++) {
switch(board[row][col]) {
case -1:
stream << opponent;
break;
case 0:
stream << '_';
break;
case 1:
stream << player;
break;
}
}
stream << endl;
}
}
void do_move(const Move& move, int player) {
board[move.row][move.col] = player;
}
void undo_move(const Move& move) {
board[move.row][move.col] = 0;
}
bool isWon() {
if (board[0][0] != 0) {
if (board[0][0] == board[0][1] &&
board[0][1] == board[0][2])
return true;
if (board[0][0] == board[1][0] &&
board[1][0] == board[2][0])
return true;
}
if (board[2][2] != 0) {
if (board[2][0] == board[2][1] &&
board[2][1] == board[2][2])
return true;
if (board[0][2] == board[1][2] &&
board[1][2] == board[2][2])
return true;
}
if (board[1][1] != 0) {
if (board[0][1] == board[1][1] &&
board[1][1] == board[2][1])
return true;
if (board[1][0] == board[1][1] &&
board[1][1] == board[1][2])
return true;
if (board[0][0] == board[1][1] &&
board[1][1] == board[2][2])
return true;
if (board[0][2] == board [1][1] &&
board[1][1] == board[2][0])
return true;
}
return false;
}
list<Move> getMoves() {
list<Move> moveList;
for(int row = 0; row < 3; row++)
for(int col = 0; col < 3; col++)
if (board[row][col] == 0)
moveList.push_back(Move(row, col));
return moveList;
}
};
ostream& operator<< (ostream& stream, Board& board) {
board.print(stream);
return stream;
}
istream& operator>> (istream& stream, Board& board) {
board.read(stream);
return stream;
}
int evaluate(Board& board) {
int score = board.isWon() ? 100 : 0;
for(int row = 0; row < 3; row++)
for(int col = 0; col < 3; col++)
if (board.board[row][col] == 0)
score += 1;
return score;
}
int negamax(Board& board, int depth, int player, int alpha, int beta) {
if (board.isWon() || depth <= 0) {
#if DEBUG > 1
cout << "Found winner board at depth " << depth << endl;
cout << board << endl;
#endif
return player * evaluate(board);
}
list<Move> allMoves = board.getMoves();
if (allMoves.size() == 0)
return player * evaluate(board);
for(list<Move>::iterator it = allMoves.begin(); it != allMoves.end(); it++) {
board.do_move(*it, -player);
int val = -negamax(board, depth - 1, -player, -beta, -alpha);
board.undo_move(*it);
if (val >= beta)
return val;
if (val > alpha)
alpha = val;
}
return alpha;
}
void nextMove(Board& board) {
list<Move> allMoves = board.getMoves();
Move* bestMove = NULL;
int bestScore = INT_MIN;
for(list<Move>::iterator it = allMoves.begin(); it != allMoves.end(); it++) {
board.do_move(*it, 1);
int score = -negamax(board, 100, 1, INT_MIN + 1, INT_MAX);
board.undo_move(*it);
#if DEBUG
cout << it->row << ' ' << it->col << " = " << score << endl;
#endif
if (score > bestScore) {
bestMove = &*it;
bestScore = score;
}
}
if (!bestMove)
return;
cout << bestMove->row << ' ' << bestMove->col << endl;
#if DEBUG
board.do_move(*bestMove, 1);
cout << board;
#endif
}
int main() {
Board board;
cin >> board;
#if DEBUG
cout << "Starting board:" << endl;
cout << board;
#endif
nextMove(board);
return 0;
}
Giving this input:
O
X__
___
___
The algorithm chooses to place a piece at 0, 1, causing a guaranteed loss, do to this trap(nothing can be done to win or end in a draw):
XO_
X__
___
Perhaps it has something to do with the evaluation function? If so, how could I fix it?