diff --git a/best_genome b/best_genome index 2b7a09a..c4ce4d5 100644 Binary files a/best_genome and b/best_genome differ diff --git a/config.txt b/config.txt index a1fc871..0295912 100644 --- a/config.txt +++ b/config.txt @@ -1,14 +1,14 @@ [NEAT] -fitness_criterion = mean -fitness_threshold = 400 -pop_size = 1000 +fitness_criterion = max +fitness_threshold = 32768 +pop_size = 5000 reset_on_extinction = False [DefaultGenome] # node activation options -activation_default = relu -activation_mutate_rate = 1.0 -activation_options = relu +activation_default = sigmoid +activation_mutate_rate = 0.0 +activation_options = sigmoid # node aggregation options aggregation_default = sum @@ -44,8 +44,8 @@ node_add_prob = 0.2 node_delete_prob = 0.2 # network parameters -num_hidden = 2 -num_inputs = 17 +num_hidden = 4 +num_inputs = 16 num_outputs = 4 # node response options diff --git a/src/ai/evaluation.py b/src/ai/evaluation.py index e128ad7..858ac22 100644 --- a/src/ai/evaluation.py +++ b/src/ai/evaluation.py @@ -1,9 +1,12 @@ +import random import time import neat from loguru import logger from py2048 import Menu +from .fitness import calculate_fitness + def eval_genomes(genomes, config: neat.Config): app = Menu() @@ -12,22 +15,17 @@ def eval_genomes(genomes, config: neat.Config): for genome_id, genome in genomes: genome.fitness = 0 net = neat.nn.FeedForwardNetwork.create(genome, config) - start_time = time.perf_counter() + start_time = time.perf_counter() while True: - output = net.activate( - ( - *app.game.board.matrix(), - app.game.board.score, - ) - ) + output = net.activate((*app.game.board.matrix(),)) decision = output.index(max(output)) decisions = { - 0: app.game.move_up, + 0: app.game.move_left, 1: app.game.move_down, - 2: app.game.move_left, + 2: app.game.move_up, 3: app.game.move_right, } @@ -35,23 +33,15 @@ def eval_genomes(genomes, config: neat.Config): app._hande_events() app.game.draw(app._surface) - max_val = app.game.board.max_val() time_passed = time.perf_counter() - start_time - score = app.game.board.score - if max_val >= 32: - calculate_fitness(genome, max_val) - logger.info(f"{max_val=}\t{score=:_}\t{genome_id=}") + + if app.game.board.is_game_over(): + max_tile, score = calculate_fitness(genome, app) + + logger.info(f"{max_tile=}\t{score=:_}\t{genome_id=}") app.game.restart() break - elif app.game.board.is_game_over() or ( - app.game.board._is_full() and time_passed >= 0.1 - ): - calculate_fitness(genome, -max_val) - logger.info(f"{max_val=}\t{score=:_}\t{genome_id=}") - app.game.restart() - break - - -def calculate_fitness(genome: neat.DefaultGenome, score: int): - genome.fitness += score + elif app.game.board._is_full() and time_passed >= 0.1: + decisions[random.choice((0, 1, 2, 3))]() + max_tile, score = calculate_fitness(genome, app) diff --git a/src/ai/fitness.py b/src/ai/fitness.py new file mode 100644 index 0000000..e6bd0ba --- /dev/null +++ b/src/ai/fitness.py @@ -0,0 +1,64 @@ +import neat +from py2048 import Menu +from py2048.utils import Position + + +def calculate_fitness(genome: neat.DefaultGenome, app: Menu) -> tuple[int, int]: + board = app.game.board + score = board.score + max_tile = board.max_val() + empty_cells = 16 - len(board.sprites()) + smoothness = calc_smoothness(app) + monotonicity = calc_monotonicity(app) + + genome.fitness = score + max_tile**3 + smoothness + monotonicity + + return max_tile, score + + +def calc_smoothness(app: Menu) -> int: + smoothness = 0 + + for row in range(4): + for col in range(4): + current_value = app.game.board.get_tile_value(Position(row, col)) + if current_value: + right_value = app.game.board.get_tile_value(Position(row, col + 1)) + if right_value: + smoothness -= abs(current_value - right_value) + left_value = app.game.board.get_tile_value(Position(row, col - 1)) + if left_value: + smoothness -= abs(current_value - left_value) + + for col in range(4): + for row in range(4): + current_value = app.game.board.get_tile_value(Position(row, col)) + if current_value: + up_value = app.game.board.get_tile_value(Position(row - 1, col)) + if up_value: + smoothness -= abs(current_value - up_value) + + down_value = app.game.board.get_tile_value(Position(row + 1, col)) + if down_value: + smoothness -= abs(current_value - down_value) + + return smoothness + + +def calc_monotonicity(app: Menu): + monotonicity = 0 + for row in range(4): + row_values = [ + app.game.board.get_tile_value(Position(row, col)) for col in range(4) + ] + + monotonicity += sum(sorted(row_values)) + + for col in range(4): + col_values = [ + app.game.board.get_tile_value(Position(row, col)) for row in range(4) + ] + + monotonicity += sum(sorted(col_values)) + + return monotonicity diff --git a/src/ai/training.py b/src/ai/training.py index 47ee69b..83384ad 100644 --- a/src/ai/training.py +++ b/src/ai/training.py @@ -11,12 +11,10 @@ def train(generations: int) -> None: """Train the AI for a given number of generations.""" config = get_config() population = neat.Population(config) - population.add_reporter(neat.StdOutReporter(True)) - stats = neat.StatisticsReporter() - population.add_reporter(stats) - population.add_reporter(neat.Checkpointer(1)) + + population.add_reporter(neat.Checkpointer(None)) winner = population.run(eval_genomes, generations) logger.info(winner) - save_genome(winner, BASE_PATH / "best_genome") + save_genome(winner) diff --git a/src/py2048/objects/board.py b/src/py2048/objects/board.py index e136dd3..92fd2e0 100644 --- a/src/py2048/objects/board.py +++ b/src/py2048/objects/board.py @@ -129,6 +129,13 @@ class Board(pygame.sprite.Group): return tile return None + def get_tile_value(self, position: Position) -> int: + """Return the value of the tile at the specified position.""" + tile = self.get_tile(position) + if tile: + return tile.value + return 0 + def matrix(self) -> list[int]: """Return a 1d matrix of values of the tiles.""" matrix: list[int] = [] diff --git a/src/py2048/screens/game.py b/src/py2048/screens/game.py index 54d0e9b..7ec346e 100644 --- a/src/py2048/screens/game.py +++ b/src/py2048/screens/game.py @@ -46,9 +46,9 @@ class Game: """Moved the board in the given direction and updates the score.""" self.board.move(direction) self.update_score(self.board.score) - if self.board.is_game_over(): - logger.info("Game over!") - self.restart() + # if self.board.is_game_over(): + # logger.info(f"Game over! Score was {self.board.score}.") + # self.restart() def move_up(self) -> None: self.move(Direction.UP) diff --git a/src/py2048/screens/menu.py b/src/py2048/screens/menu.py index 5fd664c..0801590 100644 --- a/src/py2048/screens/menu.py +++ b/src/py2048/screens/menu.py @@ -91,7 +91,7 @@ class Menu: elif event.type == pygame.KEYDOWN: if event.key == pygame.K_q: self.exit() - if self._game_active: + if self._game_active or self._ai_active: self.game.handle_events(event) def play(self) -> None: @@ -114,12 +114,7 @@ class Menu: 3: self.game.move_right, } - output = self.network.activate( - ( - *self.game.board.matrix(), - self.game.board.score, - ) - ) + output = self.network.activate((*self.game.board.matrix(),)) decision = output.index(max(output)) decisions[decision]()