マネジメントゲームを焼きなまし法などを使って最適化

# マネジメントゲームを焼きなまし法などを使って最適化学校の国際経営論という授業でマネジメントゲームというとても本格的な経営ボードゲーム(?)をみんなでしているのでそこで勝つために作ったソースコードです。やらないといけないことめっちゃあるのに半日これに使ってしまいました。(反省) ## ソースコード https://github.com/Nully00/ManagementGameSimulatedAnnealing ## マネジメントゲームのルールルールは通常のボードゲームだとあり得ないくらい複雑なのでここでは簡単にかきます。まず、このゲームの目標は決算(約35ターン後)の時点でできるだけ多くの資金を持っていることです。 1ゲーム6人で行い、1ターンの間に(買う、売る、雇う)という3つのアクションを選択できます。 (行動権を得ることができないケースもあります。) 買うアクションでは、ABCの3つの商品から選択ができます。 A→ハイリスクハイリターン B→安定 C→薄利多売売るアクションでは、現在販売ゾーンにおいている商品を売ることができます。雇うアクションでは、パートと正社員を選択して雇うことができます。雇った数に応じて一回のアクションでできる量が増えます。 (1ターンに売買できる量が増えるなど) 大体こんな感じで進んで行きます。その他にも、地域の選択や売れる条件などとても複雑な状況や制約が絡んできます。 ## 今回の進め方今回はC商品(薄利多売)でできる戦略の最適化をしていこうと思います。また、今回は相手はいないもの(正確には最初に売買リスクを0にして敵をなくす)として考えます。 ※ゲーム中に研究やチラシといった仕組みを使用するとライバルの動きをある程度制御できます。流れとしては、パラメーターを変えた模擬焼きなましを複数回行って答えを出します。(アンサンブル学習のような) ## 入力から最終的な答えまずは、いったんゲームの仕組みを作成し、入力から答えを出せるようにします。 ```csharp= namespace ManagementGame { public enum Action { Buy, Sell, Hire } } ``` ```csharp= namespace ManagementGame { public readonly struct TradingAction { public TradingAction(Action action, int amount) { this.action = action; this.amount = amount; } public readonly Action action { get; } public readonly int amount { get; } } } ``` ```csharp= namespace ManagementGame { public interface IRule { int GetInitialFunds(); int MaxPurchasableItems(int money, int currentEmployeeCapacity); int Purchase(int funds, int quantity); int MaxSellableItems(int currentStock, int currentEmployeeCapacity); int Sell(int funds, int quantity); int HirePartTime(int funds, int quantity); int HireFullTime(int funds, int quantity); int MaxHireablePartTimeEmployees(int funds, int currentPartTimeEmployees); int MaxHireableFullTimeEmployees(int funds, int currentFullTimeEmployees); int GetCurrentEmployeePower(int partTimeCount, int fullTimeCount); int FinalFullTimeEmployeeCost(int fullTimeCount); } } ``` ```csharp= namespace ManagementGame { public class Rule : IRule { public int InitialFunds { get; set; } = 300; public int TurnCount { get; set; } = 30; public int PurchasePrice { get; set; } = 10; public int SalePrice { get; set; } = 16; public int PartTimeHireCost { get; set; } = 10; public int FullTimeHireCost { get; set; } = 5; public int EndOfGameFullTimeHireCost { get; set; } = 25; public int MaxPartTimeEmployees { get; set; } = 6; public int MaxFullTimeEmployees { get; set; } = 9; public int MaxPartTimeEmployeesAtOnce { get; set; } = 3; public int PartTimeEmployeePower { get; set; } = 1; public int FullTimeEmployeePower { get; set; } = 2; public int GetInitialFunds() { return InitialFunds; } public int MaxPurchasableItems(int money, int currentEmployeeCapacity) { int value = money / PurchasePrice; return (value <= currentEmployeeCapacity) ? value : currentEmployeeCapacity; } public int Purchase(int funds, int quantity) { return funds - (PurchasePrice * quantity); } public int MaxSellableItems(int currentStock, int currentEmployeeCapacity) { return (currentStock <= currentEmployeeCapacity) ? currentStock : currentEmployeeCapacity; } public int Sell(int funds, int quantity) { return funds + (SalePrice * quantity); } public int HirePartTime(int funds, int quantity) { return funds - (PartTimeHireCost * quantity); } public int HireFullTime(int funds, int quantity) { return funds - (FullTimeHireCost * quantity); } public int MaxHireablePartTimeEmployees(int funds, int currentPartTimeEmployees) { int maxHirable = funds / PartTimeHireCost; int remainingHiringCapacity = MaxPartTimeEmployees - currentPartTimeEmployees; return Math.Min(Math.Min(maxHirable, remainingHiringCapacity), MaxPartTimeEmployeesAtOnce); } public int MaxHireableFullTimeEmployees(int funds, int currentFullTimeEmployees) { int maxHirable = funds / FullTimeHireCost; int remainingHiringCapacity = MaxFullTimeEmployees - currentFullTimeEmployees; return Math.Min(maxHirable, remainingHiringCapacity); } public int GetCurrentEmployeePower(int partTimeCount, int fullTimeCount) { return (partTimeCount * PartTimeEmployeePower) + (fullTimeCount * FullTimeEmployeePower); } public int FinalFullTimeEmployeeCost(int fullTimeCount) { return EndOfGameFullTimeHireCost * fullTimeCount; } } } ``` ```csharp= namespace ManagementGame { public class SimulationResultAnalyzer { private readonly IRule Rule; private readonly TradingAction[] TradingActions; public bool IsLoggingEnabled { get; set; } public SimulationResultAnalyzer(TradingAction[] tradingActions, IRule rule) { Rule = rule ?? throw new ArgumentNullException(nameof(rule)); TradingActions = tradingActions ?? throw new ArgumentNullException(nameof(tradingActions)); IsLoggingEnabled = false; } private void Log(string message) { if (IsLoggingEnabled) { Console.WriteLine(message); } } public int Result() { int funds = Rule.GetInitialFunds(); int partTimeCount = 0; int fullTimeCount = 0; int itemCount = 0; Log($"Initial funds: {funds} \n"); foreach (var tradingAction in TradingActions) { Log($"Processing action: {tradingAction.action}, amount: {tradingAction.amount}"); switch (tradingAction.action) { case Action.Buy: ProcessBuying(tradingAction.amount); break; case Action.Sell: ProcessSelling(tradingAction.amount); break; case Action.Hire: ProcessHiring(tradingAction.amount); break; default: throw new InvalidOperationException("Invalid action type."); } Log($"Funds after action: {funds}, Item count: {itemCount}, Part-time: {partTimeCount}, Full-time: {fullTimeCount}\n"); } funds -= Rule.FinalFullTimeEmployeeCost(fullTimeCount); Log($"Final funds after settlement: {funds}"); return funds; void ProcessBuying(int requestedPurchaseCount) { int employeePower = Rule.GetCurrentEmployeePower(partTimeCount, fullTimeCount); int maxPurchaseCount = Rule.MaxPurchasableItems(funds, employeePower); int actualPurchaseCount = Math.Min(maxPurchaseCount, requestedPurchaseCount); funds = Rule.Purchase(funds, actualPurchaseCount); itemCount += actualPurchaseCount; Log($"Bought {actualPurchaseCount} items"); } void ProcessSelling(int requestedSellCount) { int employeePower = Rule.GetCurrentEmployeePower(partTimeCount, fullTimeCount); int maxSellCount = Rule.MaxSellableItems(itemCount, employeePower); int actualSellCount = Math.Min(maxSellCount, requestedSellCount); funds = Rule.Sell(funds, actualSellCount); itemCount -= actualSellCount; Log($"Sold {actualSellCount} items"); } void ProcessHiring(int requestedHireCount) { int maxPartTimeHireCount = Rule.MaxHireablePartTimeEmployees(funds, partTimeCount); int actualPartTimeHireCount = Math.Min(maxPartTimeHireCount, requestedHireCount); HirePartTimeEmployees(actualPartTimeHireCount); int remainingHireCount = requestedHireCount - actualPartTimeHireCount; HireFullTimeEmployees(remainingHireCount); } void HirePartTimeEmployees(int count) { if (count <= 0) return; funds = Rule.HirePartTime(funds, count); partTimeCount += count; Log($"Hired {count} part-time employees"); } void HireFullTimeEmployees(int count) { if (count <= 0) return; int maxFullTimeHireCount = Rule.MaxHireableFullTimeEmployees(funds,fullTimeCount); int actualFullTimeHireCount = Math.Min(maxFullTimeHireCount, count); funds = Rule.HireFullTime(funds, actualFullTimeHireCount); fullTimeCount += actualFullTimeHireCount; Log($"Hired {actualFullTimeHireCount} full-time employees"); } } } } ``` これで疑似的に答えを出せるようになりました。ただの足し引きなので解説はありません。 ## 焼きなましの実装続いて焼きなまし法を実装します。 ```csharp= namespace SimulatedAnnealing { public interface IProblemState { double GetEnergy(); IProblemState GetNextState(); } public class SimulatedAnnealingSolver { private readonly Random Random = new Random(); public IProblemState Solve(IProblemState initialState, double initialTemperature, double coolingRate, int numberOfIterations, double logOutputPercent = 0) { IProblemState currentState = initialState; double temperature = initialTemperature; int logInterval = (int)(numberOfIterations * logOutputPercent); for (int i = 0; i < numberOfIterations; i++) { IProblemState nextState = currentState.GetNextState(); double energyDelta = nextState.GetEnergy() - currentState.GetEnergy(); if (energyDelta < 0 || Random.NextDouble() < Math.Exp(-energyDelta / temperature)) { currentState = nextState; } temperature *= coolingRate; if (logOutputPercent != 0 && i % logInterval == 0) { Console.WriteLine($"Iteration {i}: Energy = {currentState.GetEnergy()}, Temperature = {temperature}"); } } return currentState; } } } ``` 何も変哲もない焼きなまし法です。一定間隔でログを出せるようにしています。 IProblemStateインターフェースを実際に最適化する問題に継承させます。 ```scharp= using SimulatedAnnealing; namespace ManagementGame { public class OptimizationProblemState : IProblemState { public TradingAction[] TradingActions { get; } private static readonly Random Random = new Random(); private readonly IRule Rule; private readonly SimulationResultAnalyzer Analyzer; public int minCount { get; set; } = 0; public int maxCount { get; set; } = 50; public OptimizationProblemState(TradingAction[] tradingActions, IRule rule) { TradingActions = tradingActions; Rule = rule; Analyzer = new SimulationResultAnalyzer(TradingActions, rule); } public double GetEnergy() { return -1 * Analyzer.Result(); } public IProblemState GetNextState() { TradingAction[] newTradingActions = (TradingAction[])TradingActions.Clone(); int indexToChange = Random.Next(TradingActions.Length); Action newAction; int newAmount; if(indexToChange == 0) { newAction = Action.Hire; newAmount = Random.Next(minCount, maxCount + 1); } else if(indexToChange == TradingActions.Length - 1) { newAction = Action.Sell; newAmount = Random.Next(minCount, maxCount + 1); } else if (TradingActions[indexToChange - 1].action == Action.Buy) { newAction = Action.Sell; newAmount = maxCount; } else { do { newAction = (Action)Random.Next(Enum.GetNames(typeof(Action)).Length); } while (newAction == TradingActions[indexToChange - 1].action || newAction == TradingActions[indexToChange + 1].action); newAmount = Random.Next(minCount, maxCount + 1); } newTradingActions[indexToChange] = new TradingAction(newAction, newAmount); return new OptimizationProblemState(newTradingActions, Rule); } public TradingAction[] GetActions() { return TradingActions; } } } ``` ここが一番苦労しました。最適化問題はある程度人間の力が必要なのでここで前後で被らないようにしたり、最初と最後のアクションを固定したりといくつか制約を入れています。 ## 実行最後に実行をします。今回は繰り返して一番良いスコアのものを取得する方式をとっています。 ```csharp= using SimulatedAnnealing; namespace ManagementGame { internal class Program { static void Main(string[] args) { var numberOfAttempts = 1000; var tempRange = (min: 1000.0, max: 10000.0); var coolingRateRange = (min: 0.0001, max: 0.05); var iterationsRange = (min: 500, max: 10000); Random random = new Random(); TradingAction[] initialActions = new TradingAction[35]; for (int i = 0; i < initialActions.Length; i++) { initialActions[i] = new TradingAction(Action.Hire, 1); } IRule rule = new Rule(); OptimizationProblemState initialState = new OptimizationProblemState(initialActions, rule); SimulatedAnnealingSolver simulatedAnnealingSolver = new SimulatedAnnealingSolver(); IProblemState? bestState = null; double bestEnergy = double.MaxValue; for (int attempt = 0; attempt < numberOfAttempts; attempt++) { double initialTemperature = random.NextDouble() * (tempRange.max - tempRange.min) + tempRange.min; double coolingRate = random.NextDouble() * (coolingRateRange.max - coolingRateRange.min) + coolingRateRange.min; int numberOfIterations = random.Next(iterationsRange.min, iterationsRange.max + 1); IProblemState optimizedState = simulatedAnnealingSolver.Solve(initialState, initialTemperature, coolingRate, numberOfIterations, 0); double energy = optimizedState.GetEnergy(); Console.WriteLine($"Attempt {attempt + 1}: Current Energy = {energy}, Best Energy so far = {bestEnergy}"); if (energy < bestEnergy) { bestEnergy = energy; bestState = optimizedState; Console.WriteLine($"New best energy found: {bestEnergy}"); } } Console.WriteLine("-----Result-----"); TradingAction[] optimizedActions = ((OptimizationProblemState)bestState).GetActions(); SimulationResultAnalyzer simulationResultAnalyzer = new SimulationResultAnalyzer(optimizedActions, rule); simulationResultAnalyzer.IsLoggingEnabled = true; simulationResultAnalyzer.Result(); } } } ``` ## 結果よし、準備万端だ。