Python实现图形化井字棋——人机对战
井字棋,英文名叫TicQ-Tac-Toe,是一种在3*3格子上进行的连珠游戏,和五子棋类似,由于棋盘一般不画边框,格线排成井字故得名。游戏需要的工具仅为纸和笔,然后由分别代表O和X的两个游戏者轮流在格子里留下标记(一般来说先手者为X),任意三个标记形成一条直线,则为获胜。
最近学习了人工智能的博弈算法,同时老师布置了相关算法的井字棋代码实现作业,为了简便明了,我将使用python进行程序编写,算法使用了Minmax,Alpha-beta剪枝以及蒙特卡洛三种代码实现,欢迎大家批评指正与交流学习。下面分部分进行代码讲解:
首先是算法部分的结构:
class JinZi: def __init__(self): #用于初始化井字棋数据 def full(self): #判断棋盘棋子是否满 def empty_pos(self): #判断位置为空 def winner(self,pos,qizi): #判断是否获胜 def moving(self,pos,turn): #通用落子函数 def minmax(self,depth,is_ai): #核心算法(此处为最小最大算法) def ai_move(self): #ai计算落子 def human_move(self,pos): #玩家落子 def valid_move(self,pos): #判断落子位置是否合理 def reset(self): #重开接下来是UI部分结构:
class GameGUI: def __init__(self): #棋盘UI初始化 def draw_board(self): #绘制棋盘 def draw_status(self): #绘制棋子 def shift_mousepos(self,mouse_pos): #鼠标位置->棋盘位置转换 def reset_game(self): #重开 def run(self): #主循环下面是三个核心算发部分:
①最小最大算法:
AI同时站在自己和对手的角度计算,自己要取得最大分,而对手要失去最少的分,因此在进行回溯时,会一层max,一层min进行比对
def minmax(self,depth,is_ai): #到游戏结束的层就走 if (self.current_winner == self.ai): return {'position':None,'score':10-depth} elif (self.current_winner == self.human): return {'position':None,'score':depth-10} elif (self.full()): return {'position':None,'score':0} #初始化叶子值 if(is_ai): best = {'position':None,'score':-math.inf} else: best = {'position':None,'score':math.inf} #遍历落子 for pos in self.empty_pos(): turn = self.ai if(is_ai) else self.human self.board[pos] = turn temp_winner = self.current_winner if(self.winner(pos,turn)): self.current_winner = turn #递归求取最大胜利 result = self.minmax(depth+1,not is_ai) #取消模拟 self.board[pos] = ' ' self.current_winner = temp_winner #保留模拟记录 result['position'] = pos #实时更新最佳分数 if(is_ai): if(result['score'] > best['score']): best = result else: if(result['score'] < best['score']): best = result return best②alpha-beta剪枝算法:
这个算法算是最小最大算法的优化算法,新增了alpha和beta两个参数,并且在分数更新的同时更新alpha和beta的值,并进行剪枝操作,剪枝的条件是α>=β,即对相邻两层的节点进行比较,如果满足条件则说明该节点及其后续节点没有扩展价值,可以直接剪掉
def minmax(self, depth, is_ai, alpha=-math.inf, beta=math.inf): # 到游戏结束的层就走 if (self.current_winner == self.ai): return {'position': None, 'score': 10 - depth} elif (self.current_winner == self.human): return {'position': None, 'score': depth - 10} elif (self.full()): return {'position': None, 'score': 0} # 初始化叶子值 if (is_ai): best = {'position': None, 'score': -math.inf} else: best = {'position': None, 'score': math.inf} # 遍历落子 for pos in self.empty_pos(): turn = self.ai if (is_ai) else self.human self.board[pos] = turn temp_winner = self.current_winner if (self.winner(pos, turn)): self.current_winner = turn # 递归求取最大胜利,传递alpha和beta参数 result = self.minmax(depth + 1, not is_ai, alpha, beta) # 取消模拟 self.board[pos] = ' ' self.current_winner = temp_winner # 保留模拟记录 result['position'] = pos # 实时更新最佳分数和alpha/beta值 if (is_ai): if (result['score'] > best['score']): best = result # 更新alpha值 alpha = max(alpha, best['score']) # Alpha-Beta剪枝:如果alpha >= beta,剪掉剩余分支 if alpha >= beta: break else: if (result['score'] < best['score']): best = result # 更新beta值 beta = min(beta, best['score']) # Alpha-Beta剪枝:如果beta <= alpha,剪掉剩余分支 if beta <= alpha: break return best③蒙特卡洛搜索树算法:
蒙特卡洛算法和前两者有本质区别,它其实在井字棋上性能并不如前两种,但对于更加复杂的难以计算得到的博弈情况,蒙特卡洛算法则更具有优势。蒙特卡洛算法本质上是对概率的计算,即进行大量实验,以求得最有可能获胜的落子方式。可以使用UCT的蒙特卡洛决策算法:(公式:)。下面展示的是蒙特卡洛搜索树算法:
class MCTSNode: def __init__(self, board, parent=None, pos=None, turn='X'): self.board = board.copy() # 棋盘状态 self.parent = parent self.pos = pos # 导致此节点的落子位置 self.turn = turn # 当前轮到谁下棋 self.children = [] self.visits = 0 self.wins = 0 self.untried_moves = self.get_possible_moves() def get_possible_moves(self): return [i for i, value in enumerate(self.board) if value == ' '] def is_terminal(self): return self.get_winner() is not None or len(self.untried_moves) == 0 def get_winner(self): # 检查赢家 for i in range(0, 9, 3): if self.board[i] == self.board[i+1] == self.board[i+2] != ' ': return self.board[i] for i in range(3): if self.board[i] == self.board[i+3] == self.board[i+6] != ' ': return self.board[i] if self.board[0] == self.board[4] == self.board[8] != ' ': return self.board[0] if self.board[2] == self.board[4] == self.board[6] != ' ': return self.board[2] return None def expand(self): if self.untried_moves: pos = random.choice(self.untried_moves) self.untried_moves.remove(pos) new_board = self.board.copy() new_board[pos] = self.turn next_turn = 'O' if self.turn == 'X' else 'X' child_node = MCTSNode(new_board, self, pos, next_turn) self.children.append(child_node) return child_node return None def simulate(self): board = self.board.copy() current_turn = self.turn while True: # 检查是否结束 winner = self.get_winner_from_board(board) if winner is not None: return 1 if winner == 'X' else 0 # X是AI if ' ' not in board: return 0.5 # 平局 # 随机落子 empty = [i for i, v in enumerate(board) if v == ' '] pos = random.choice(empty) board[pos] = current_turn current_turn = 'O' if current_turn == 'X' else 'X' def get_winner_from_board(self, board): for i in range(0, 9, 3): if board[i] == board[i+1] == board[i+2] != ' ': return board[i] for i in range(3): if board[i] == board[i+3] == board[i+6] != ' ': return board[i] if board[0] == board[4] == board[8] != ' ': return board[0] if board[2] == board[4] == board[6] != ' ': return board[2] return None def backpropagate(self, result): self.visits += 1 self.wins += result if self.parent: self.parent.backpropagate(result) def is_fully_expanded(self): return len(self.untried_moves) == 0 def best_child(self, exploration_weight=1.41): best_score = -float('inf') best_child = None for child in self.children: if child.visits == 0: uct_score = float('inf') else: exploitation = child.wins / child.visits exploration = exploration_weight * math.sqrt(math.log(self.visits) / child.visits) uct_score = exploitation + exploration if uct_score > best_score: best_score = uct_score best_child = child return best_child --------------------------------------------------------------------------------- def mcts_search(self, iterations=1000): root = MCTSNode(self.board, turn=self.ai) for _ in range(iterations): node = root # Selection while node.is_fully_expanded() and not node.is_terminal(): node = node.best_child() # Expansion if not node.is_terminal(): node = node.expand() # Simulation result = node.simulate() # Backpropagation node.backpropagate(result)下面是MinMax的完整代码示例:
import math import pygame import sys import time sun_time = 0 class JinZi: def __init__(self): self.board=[' ' for _ in range(3*3)] self.ai='X' self.human='O' self.current_winner=None self.think_time = 0 def full(self): return ' ' not in self.board def empty_pos(self): return [i for i,value in enumerate(self.board) if(value == ' ')] def winner(self,pos,qizi): row=pos // 3 col=pos % 3 #行 win_r=all(qizi == temp_row for temp_row in self.board[row*3:row*3+3]) #列 win_c=all(qizi == self.board[col+i*3] for i in range(3)) if(win_r or win_c): return True #斜 if(pos % 2 == 0): return all(qizi == self.board[i] for i in [0,4,8]) or all(qizi == self.board[i] for i in [2,4,6]) return False def moving(self,pos,turn): if(self.board[pos] == ' '): self.board[pos] = turn if(self.winner(pos,turn)): self.current_winner = turn return True return False def minmax(self,depth,is_ai): #到游戏结束的层就走 if (self.current_winner == self.ai): return {'position':None,'score':10-depth} elif (self.current_winner == self.human): return {'position':None,'score':depth-10} elif (self.full()): return {'position':None,'score':0} #初始化叶子值 if(is_ai): best = {'position':None,'score':-math.inf} else: best = {'position':None,'score':math.inf} #遍历落子 for pos in self.empty_pos(): turn = self.ai if(is_ai) else self.human self.board[pos] = turn temp_winner = self.current_winner if(self.winner(pos,turn)): self.current_winner = turn #递归求取最大胜利 result = self.minmax(depth+1,not is_ai) #取消模拟 self.board[pos] = ' ' self.current_winner = temp_winner #保留模拟记录 result['position'] = pos #实时更新最佳分数 if(is_ai): if(result['score'] > best['score']): best = result else: if(result['score'] < best['score']): best = result return best def ai_move(self): start_time = time.time() move = self.minmax(0,True) end_time = time.time() self.think_time = end_time - start_time global sun_time sun_time += self.think_time if(move['position'] != None): self.moving(move['position'],self.ai) print(f"本次思考时间:{self.think_time}") return True return False def human_move(self,pos): if(self.current_winner is None): if(not self.full()): return self.moving(pos,self.human) return False def valid_move(self,pos): return self.board[pos] == ' ' def reset(self): global sun_time print(f"总耗时:{sun_time}") sun_time = 0 self.board = [' ' for _ in range(9)] self.current_winner = None class GameGUI: def __init__(self): pygame.init() self.width = 600 self.height = 700 self.screen = pygame.display.set_mode((self.width,self.height)) pygame.display.set_caption("井字棋") #time self.ai_delay = 200 self.ai_timer = 0 #/time #color #/color self.game = JinZi() self.gameover = False self.turn = self.game.human #front self.font = pygame.font.SysFont('Arial',48) #/front self.cell_size = self.width // 3 self.line_width = 5 self.button_rect = pygame.Rect(self.width//2 - 100,self.height-80,200,50) def draw_board(self): self.screen.fill([28, 170, 156]) for i in range(1,3): pygame.draw.line(self.screen,[255,255,255],(i*self.cell_size,0),(i*self.cell_size,self.width),self.line_width) pygame.draw.line(self.screen,[255,255,255],(0,i*self.cell_size),(self.width,i*self.cell_size),self.line_width) for i in range(9): x = (i%3) * self.cell_size + self.cell_size // 2 y = (i//3) * self.cell_size + self.cell_size // 2 x_line = x - self.cell_size // 4 y_line = y - self.cell_size // 4 if(self.game.board[i] == 'O'): pygame.draw.circle(self.screen,[239, 231, 200],(x,y),self.cell_size//4,self.line_width+5) elif(self.game.board[i] == 'X'): pygame.draw.line(self.screen,[84,84,84],(x_line,y_line),(x+self.cell_size // 4,y+self.cell_size // 4),self.line_width+5) pygame.draw.line(self.screen,[84,84,84],(x+self.cell_size // 4,y_line),(x_line,y+self.cell_size // 4),self.line_width+5) def draw_status(self): y = self.height - 30 if(self.game.current_winner): text = (f"Winner:{self.game.current_winner}") elif(self.game.full()): text = (f"Draw!") else: text = (f"Turn:{self.turn}") #显示对局情况 text_status = self.font.render(text,True,(255,255,255)) self.screen.blit(text_status,(0,0)) #绘制重来按钮 pygame.draw.rect(self.screen,(200,200,200), self.button_rect) pygame.draw.rect(self.screen, (255,255,255), self.button_rect, 2) button_text = self.font.render("New Game", True, (255,255,255)) button_rect = button_text.get_rect(center=self.button_rect.center) self.screen.blit(button_text, button_rect) def shift_mousepos(self,mouse_pos): x,y=mouse_pos if(y>self.width): return None row = y // self.cell_size col = x // self.cell_size return row*3+col def reset_game(self): self.game.reset() self.gameover = False self.turn = self.game.human def run(self): clock = pygame.time.Clock() ai_turn = False while(True): dt = clock.tick(60) for event in pygame.event.get(): if(event.type == pygame.QUIT): pygame.quit() sys.exit() if(event.type == pygame.MOUSEBUTTONDOWN and not self.gameover): #人回合 if(self.turn == self.game.human and not ai_turn): pos = self.shift_mousepos(event.pos) if(pos is not None and self.game.valid_move(pos)): if(self.game.human_move(pos)): if(self.game.current_winner or self.game.full()): self.gameover = True else: self.turn = self.game.ai ai_turn = True self.ai_timer = self.ai_delay if(event.type == pygame.MOUSEBUTTONDOWN): if(self.button_rect.collidepoint(event.pos)): self.reset_game() self.ai_timer = 0 if(self.turn == self.game.ai and not self.gameover and not self.game.current_winner and not self.game.full()): self.ai_timer -= dt if(self.ai_timer <= 0): if(self.game.ai_move()): self.turn = self.game.human if(self.game.current_winner or self.game.full()): self.gameover = True ai_turn = False #更新界面 self.draw_board() self.draw_status() pygame.display.flip() clock.tick(60) if __name__ == "__main__": gui = GameGUI() gui.run() print(f"总耗时:{sun_time}")以上便是本次全部内容,感谢各位鉴赏。
