2

I recently started learning C and as a learning project I tried creating a tic tac toe bot using the Minimax algorithm. Here is the code that I have written so far:

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BOARDSIZE 9

const unsigned char WINNINGSTATES[8][3] = {{0, 1, 2}, {3, 4, 5}, {6, 7, 8},
                                           {0, 3, 6}, {1, 4, 7}, {2, 5, 8},
                                           {0, 4, 8}, {2, 4, 6}};

enum XOE { EMPTY, X, O };
struct Board {
  enum XOE current_player;
  enum XOE grid[BOARDSIZE];
};

struct legal_moves {
  int count;
  int moves[];
};

struct board_evaluation {
  _Bool x_won;
  _Bool o_won;
  _Bool draw;
  _Bool inplay;
};

struct Board empty_board() {

  struct Board newboard;
  newboard.current_player = X;
  memset(newboard.grid, 0, sizeof(newboard.grid));

  return newboard;
}

struct legal_moves *get_legal(struct Board board) {

  int count = 0;
  for (int i = 0; i < BOARDSIZE; i++) {
    if (board.grid[i] == EMPTY) {
      count += 1;
    }
  }

  if (count == 0) {
    return malloc(sizeof(struct legal_moves));
  }

  struct legal_moves *legal =
      malloc(sizeof(struct legal_moves) + sizeof(int) * count);
  legal->count = count;

  count = 0;
  for (int i = 0; i < BOARDSIZE; i++) {
    if (board.grid[i] == EMPTY) {
      legal->moves[count] = i;
      count += 1;
    }
  }

  return legal;
}

struct Board move(struct Board board, int index) {
  board.grid[index] = board.current_player;

  if (board.current_player == X) {
    board.current_player = O;
    return board;
  }
  board.current_player = X;
  return board;
}

struct board_evaluation evaluate_board(struct Board board) {

  enum XOE possible_winner;
  _Bool winner_found;

  for (int w = 0; w < 8; w++) {
    possible_winner = board.grid[WINNINGSTATES[w][0]];
    if (possible_winner == EMPTY) {
      continue;
    }

    winner_found = true;
    for (int s = 0; s < 3; s++) {
      if (board.grid[WINNINGSTATES[w][s]] != possible_winner) {
        winner_found = false;
        break;
      }
    }
    if (winner_found) {
      if (possible_winner == X) {
        return (struct board_evaluation){true, false, false, false};
      }
      return (struct board_evaluation){false, true, false, false};
    }
  }

  struct legal_moves *moves = get_legal(board);

  if (moves->count == 0) {
    free(moves);
    return (struct board_evaluation){false, false, true, false};
  }
  free(moves);
  return (struct board_evaluation){false, false, false, true};
}

void display_board(struct Board board) {
  for (int i = 0; i < 9; i++) {
    if (board.grid[i] == X) {
      printf("X");
    }
    if (board.grid[i] == O) {
      printf("O");
    }
    if (board.grid[i] == EMPTY) {
      printf("-");
    }
  }
  printf("\n");
}

int get_score(struct board_evaluation evaluation) {
  if (evaluation.x_won) {
    return 1;
  }
  if (evaluation.draw) {
    return 0;
  }
  if (evaluation.o_won) {
    return -1;
  }
  return 0;
}

int minimax(struct Board board, int depth) {
  struct board_evaluation test = evaluate_board(board);
  _Bool ismaximizing = board.current_player == X;

  if (!test.inplay) {
    return get_score(test);
  }
  if (depth > 3) {
    return get_score(evaluate_board(board));
  }

  struct legal_moves *possible_moves = get_legal(board);
  struct Board new_board;

  int current_score = 0;
  int best_score = ismaximizing ? -10 : 10;

  for (int i = 0; i < possible_moves->count; i++) {
    new_board = move(board, possible_moves->moves[i]);
    current_score = minimax(new_board, depth + 1);
    if (ismaximizing) {
      if (current_score >= best_score) {
        best_score = current_score;
      }
      continue;
    }

    if (current_score <= best_score) {
      best_score = current_score;
    }
  }
  free(possible_moves);
  return best_score;
}

int get_best(struct Board board) {
  struct legal_moves *possible_moves = get_legal(board);
  struct Board new_board;
  _Bool ismaximizing = board.current_player == X;

  int current_score = 0;
  int best_score = ismaximizing ? -10 : 10;
  int best_move = possible_moves->moves[0];

  for (int i = 0; i < possible_moves->count; i++) {
    new_board = move(board, possible_moves->moves[i]);
    current_score = minimax(new_board, 0);
    printf("%d -> %d\n", possible_moves->moves[i], current_score);
    if (ismaximizing) {
      if (current_score > best_score) {
        best_score = current_score;
        best_move = possible_moves->moves[i];
        display_board(new_board);
      }
      continue;
    }

    if (current_score < best_score) {
      best_score = current_score;
      best_move = possible_moves->moves[i];
    }
  }
  free(possible_moves);
  return best_move;
}

int main() {
  struct Board new_board = {
      X, {EMPTY, EMPTY, EMPTY, EMPTY, X, X, EMPTY, EMPTY, EMPTY}};
  printf("%d\n", get_best(new_board));
}

I am inputing the board position:

[ ][ ][ ]
[ ][X][X]
[ ][ ][ ]

with X to play

Obviously the output should be 3 as it results in a instant win for player X, but instead the code outputs 0, this is due to the minimax function returning 1 for each of the root moves, and I cannot figure out why. Your help is highly appreciated!

2
  • 2
    You might want to read "How to Debug Small Programs" and edit your question to add what you tried so far to investigate the issue. Commented yesterday
  • Your test position isn't actually a valid game position for OXO. There should be at least two O if there are already two X with X to move. Commented yesterday

2 Answers 2

5

The algorithm may suggest moves that do not lead to the fastest win, just to a certain win.

In your example, X will still win when it plays on spot 0, as O has no way to block all threats. When your algorithm finds multiple moves that evaluate to the best value, it will choose the first one, which is why in your example it suggests spot 0, even though it had determined that a move at spot 3 would be "just as good".

If you want the algorithm to suggest a move that leads to the fastest win, then one possible approach is to diminish the absolute value you get back from a recursive minimax call, which makes a win/loss less interesting the deeper it was found in the recursion tree. To implement that idea, make the following changes to your code:

  • In get_score return -9 and 9 instead of -1 and 1 respectively, so that you have some room to diminish the magnitude of that value later on.

  • In minimax, right after the recursive call, add this statement:

    current_score += (current_score < 0) - (current_score > 0);
    

So this turns a -9 into -8, and 9 into 8, and -8 into -7, ...etc.

Now the example will give you 3 as output.

Other Remarks

To get correct play, you will need to increase the search depth from 3 to 4. For example, with depth 3 the second player will not find the correct move when the first starts the game at spot 0.

To avoid undefined behaviour, remove the following code:

if (count == 0) {
  return malloc(sizeof(struct legal_moves));
}

...as this case is also treated by the code the follows below it, and which properly initialises the count member.

Sign up to request clarification or add additional context in comments.

Comments

5

Code has at least these problems:

Uninitialized data

Code returns a pointer to uninitialized data.

if (count == 0) {
    return malloc(sizeof(struct legal_moves));
  }

Yet calling code use that pointer to uninitialized data.

struct legal_moves *moves = get_legal(board);
if (moves->count == 0) {  // `moves->count` may be garbage.

Initialize the data (member .count) before returning.

It appears code will perform OK with if (count == 0) { return malloc(sizeof(struct legal_moves)); } eliminated.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.