Contempt v2 (#27)

rosenthj · web-flow · commit 9e1af2584f7c · 2020-10-05T00:54:17.000-04:00
Added contempt and armageddon settings back after they were temporarily removed earlier in the development cycle for v0.9. Binaries and official release of 0.9 are following shortly.

* Added a version of contempt and armageddon which should be similar to the v0.8 versions
* Added UCI_ShowWDL setting and updated version number
Bench 811188
diff --git a/src/general/settings.h b/src/general/settings.h
@@ -33,7 +33,7 @@
 namespace settings {
 
 const std::string engine_name = "Winter";
-const std::string engine_version = "0.8.16";
+const std::string engine_version = "0.9";
 const std::string engine_author = "Jonathan Rosenthal";
 
 #if defined(__BMI2__)
diff --git a/src/general/types.h b/src/general/types.h
@@ -184,37 +184,9 @@ inline float score_to_wpct(WDLScore score) {
   return score.to_wpct();
 }
 
-//// TODO remove this abomination once rest of code is ready.
-//inline WDLScore score_to_wdl_estimate(Score score) {
-//  return WDLScore::from_score(score);
-////  return WDLScore((score + kRescale) / 2);
-//  //float wpct = score_to_wpct(score);
-//  //return WDLScore(wpct, wpct);
-//}
-
-//inline NScore wpct_to_cp(float wpct) {
-//  constexpr float kEpsilon = 0.000001;
-//  wpct = std::max(std::min(wpct, 1-kEpsilon), kEpsilon);
-//  return std::round(std::log(wpct / (1-wpct)) * 1024);
-//}
-
-// Rounds score to next valid score
-//inline Score get_valid_score(Score score) {
-//  if (!is_valid_score(score) || score == kNoScore) {
-//    if (score < kMinScore) {
-//      return kMinScore;
-//    }
-//    if (score < kMinStaticEval) {
-//      return kMinStaticEval;
-//    }
-//    if (score > kMaxScore) {
-//      return kMaxScore;
-//    }
-//    assert(score > kMaxStaticEval);
-//    return kMaxStaticEval;
-//  }
-//  return score;
-//}
+inline Color other_color(Color color) {
+  return color ^ 0x1;
+}
 
 constexpr int kLowerBound = 1;
 constexpr int kUpperBound = 2;
diff --git a/src/net_evaluation.cc b/src/net_evaluation.cc
@@ -31,7 +31,7 @@ using Filter = Array2d<NetLayerType, 3, 3>;
 using DeconvFilter = Array2d<NetLayerType, 3, 3>;
 
 //using CReLULayerType = Vec<float, act_block_size>;
-std::array<float, 2> contempt = { 0.5, 0.5 };
+std::array<int32_t, 2> contempt = { 0, 0 };
 
 struct CNNHelper {
   std::vector<Square> our_p;
@@ -803,7 +803,7 @@ NetLayerType NetForward(CNNLayerType &cnn_layer_one, const CNNHelper &helper) {
   return out;
 }
 
-Score NetForward(NetLayerType &layer_one, float c = 0.5) {
+Score NetForward(NetLayerType &layer_one) {
   layer_one += bias_layer_one;
   layer_one.relu();
 
@@ -845,7 +845,10 @@ Score ScoreBoard(const Board &board) {
     layer_one = ScoreBoard<NetLayerType, kBlack>(board, ec);
   }
   layer_one += cnn_out;
-  return NetForward(layer_one, contempt[board.get_turn()]);
+  if (contempt[board.get_turn()] != 0) {
+    return AddContempt(NetForward(layer_one), board.get_turn());
+  }
+  return NetForward(layer_one);
 }
 
 template<size_t size>
@@ -1325,4 +1328,47 @@ void EstimateFeatureImpact() {
 }
 #endif
 
+void SetContempt(Color color, int32_t value) {
+  contempt[color] = value;
+  contempt[other_color(color)] = -value;
+  if (value == 0) {
+    contempt[other_color(color)] = 0;
+  }
+}
+
+std::array<Score, 2> GetDrawArray() {
+  if (contempt[kWhite] == 0) {
+    return std::array<Score, 2> { kDrawScore, kDrawScore };
+  }
+  return std::array<Score, 2> { AddContempt(kDrawScore, kWhite), AddContempt(kDrawScore, kBlack) };
+}
+
+Score AddContempt(Score score, Color color) {
+  assert(score.is_static_eval());
+  int32_t diff = score.win_draw - score.win;
+  if (contempt[color] > 0) { // Contempt is positive, draws are counted as losses
+    diff = (diff * contempt[color]) / 100;
+    return WDLScore { score.win, score.win_draw - diff };
+  }
+  // contempt is negative, draws are counted as wins
+  diff = -(diff * contempt[color]) / 100;
+  return WDLScore { score.win + diff, score.win_draw};
+}
+
+Score RemoveContempt(Score score, Color color) {
+  if (!score.is_static_eval() || contempt[color] == 0
+      || contempt[color] >= 100 || contempt[color] <= -100) {
+    return score;
+  }
+  int32_t diff = score.win_draw - score.win;
+  if (contempt[color] >= 0) {
+    int32_t orig_diff = (diff * 100) / (100 - contempt[color]);
+    diff = orig_diff - diff;
+    return WDLScore { score.win, score.win_draw + diff };
+  }
+  int32_t orig_diff = (diff * 100) / (100 + contempt[color]);
+  diff = orig_diff - diff;
+  return WDLScore { score.win - diff, score.win_draw };
+}
+
 }
diff --git a/src/net_evaluation.h b/src/net_evaluation.h
@@ -52,6 +52,11 @@ void GenerateDatasetFromUCIGames(std::string filename, std::string out_name = "e
                                  size_t reroll_pct = 0);
 #endif
 
+void SetContempt(Color color, int32_t value);
+std::array<Score, 2> GetDrawArray();
+
+Score AddContempt(Score score, Color color);
+Score RemoveContempt(Score score, Color color);
 }
 
 // TODO: Move to external file
@@ -112,5 +117,4 @@ constexpr size_t kChannelsPerSide = kChanKingsIdx + 1;
 constexpr size_t kNumChannels = 2 * kChannelsPerSide;
 }
 
-
 #endif /* NET_EVALUATION_H_ */
diff --git a/src/search.cc b/src/search.cc
@@ -62,8 +62,9 @@ int kNodeCountSampleAt = 1000;
 const int kMaxDepthSampled = 32;
 #endif
 
-int contempt = 0;
+int32_t contempt = 0;
 bool armageddon = false;
+std::array<Score, 2> draw_score { kDrawScore, kDrawScore };
 
 int rsearch_mode;
 Milliseconds rsearch_duration;
@@ -142,6 +143,7 @@ const Depth kLMPBaseNW = 3, kLMPBasePV = 5;
 const int32_t kLMPScalar = 12, kLMPQuad = 4;
 const Array2d<Depth, 2, 6> kLMP = init_lmp_breakpoints(kLMPBaseNW, kLMPBasePV, kLMPScalar, kLMPQuad);
 #endif
+bool uci_show_wdl = true;
 
 // Parameters used to initialize the LMR reduction table
 LMRInitializer lmr_initializer {
@@ -599,7 +601,7 @@ Score QuiescentSearch(Thread &t, Score alpha, const Score beta) {
 
   //End search immediately if trivial draw is reached
   if (t.board.IsTriviallyDrawnEnding()) {
-    return kDrawScore;
+    return draw_score[t.board.get_turn()];
   }
 
   //TT probe
@@ -771,12 +773,14 @@ void update_counter_move_history(Thread &t, const std::vector<Move> &quiets, con
 }
 
 inline const Score get_singular_beta(Score beta, Depth depth) {
-  //return beta - 4*depth;
   WDLScore result = WDLScore{beta.win - 2*depth, beta.win_draw - 2*depth};
   if (result.win < 0) {
     result.win_draw += result.win;
     result.win = 0;
   }
+  if (result.win_draw < 0) {
+    result.win_draw = 0;
+  }
   return result;
 }
 
@@ -789,15 +793,14 @@ Score AlphaBeta(Thread &t, Score alpha, const Score beta, Depth depth, bool expe
   assert(node_type != NodeType::kPV || !expected_cut_node);
 
   const Score original_alpha = alpha;
-//  Score lower_bound_score = kMinScore+t.board.get_num_made_moves();
 
   //Immediately return 0 if we detect a draw.
   if (t.board.IsDraw() || (settings::kRepsForDraw == 3 && t.board.CountRepetitions(min_ply) >= 2)) {
     t.nodes++;
     if (t.board.IsFiftyMoveDraw() && t.board.InCheck() && t.board.GetMoves<kNonQuiescent>().empty()) {
       return GetMatedOnMoveScore(t.board.get_num_made_moves());
     }
-    return kDrawScore;
+    return draw_score[t.board.get_turn()];
   }
 
   //We drop to QSearch if we run out of depth.
@@ -807,10 +810,6 @@ Score AlphaBeta(Thread &t, Score alpha, const Score beta, Depth depth, bool expe
       return net_evaluation::ScoreBoard(t.board);
     }
     return QuiescentSearch<Mode>(t, alpha, beta);
-//    t.board.Print();
-//    Score score = QuiescentSearch<Mode>(t, alpha, beta);
-//    std::cout << "AB QSearch return: (w:" << score.win << ", wd:" << score.win_draw << ")" << std::endl;
-//    return score;
   }
 
   // To avoid counting nodes twice if all we do is fall through to QSearch,
@@ -890,7 +889,7 @@ Score AlphaBeta(Thread &t, Score alpha, const Score beta, Depth depth, bool expe
     if (in_check) {
       return GetMatedOnMoveScore(t.board.get_num_made_moves());
     }
-    return kDrawScore;
+    return draw_score[t.board.get_turn()];
   }
 
 //  if (Mode == kSamplingSearchMode && node_type == NodeType::kNW && depth <= kMaxDepthSampled) {
@@ -1096,11 +1095,11 @@ Score RootSearchLoop(Thread &t, Score original_alpha, const Score beta,
   Score alpha = original_alpha;
   Score lower_bound_score = kMinScore;
   //const bool in_check = board.InCheck();
-  if (settings::kRepsForDraw == 3 && alpha < kDrawScore.get_previous_score() && t.board.MoveInListCanRepeat(moves)) {
-    if (beta <= kDrawScore) {
-      return kDrawScore;
+  if (settings::kRepsForDraw == 3 && alpha < draw_score[t.board.get_turn()].get_previous_score() && t.board.MoveInListCanRepeat(moves)) {
+    if (beta <= draw_score[t.board.get_turn()]) {
+      return draw_score[t.board.get_turn()];
     }
-    alpha = kDrawScore.get_previous_score();
+    alpha = draw_score[t.board.get_turn()].get_previous_score();
   }
   const bool in_check = t.board.InCheck();
   for (size_t i = 0; i < moves.size(); ++i) {
@@ -1109,8 +1108,8 @@ Score RootSearchLoop(Thread &t, Score original_alpha, const Score beta,
     if (i == 0) {
       Score score = -AlphaBeta<NodeType::kPV, Mode>(t, -beta, -alpha, current_depth - 1);
       assert(score.is_valid());
-      if (settings::kRepsForDraw == 3 && score < kDrawScore && t.board.CountRepetitions() >= 2) {
-        score = kDrawScore;
+      if (settings::kRepsForDraw == 3 && score < draw_score[t.board.get_turn()] && t.board.CountRepetitions() >= 2) {
+        score = draw_score[t.board.get_turn()];
       }
       t.board.UnMake();
       if (score >= beta) {
@@ -1133,8 +1132,8 @@ Score RootSearchLoop(Thread &t, Score original_alpha, const Score beta,
       if (score > alpha) {
         score = -AlphaBeta<NodeType::kPV, Mode>(t, -beta, -alpha, current_depth - 1);
       }
-      if (settings::kRepsForDraw == 3 && score < kDrawScore && t.board.CountRepetitions() >= 2) {
-        score = kDrawScore;
+      if (settings::kRepsForDraw == 3 && score < draw_score[t.board.get_turn()] && t.board.CountRepetitions() >= 2) {
+        score = draw_score[t.board.get_turn()];
       }
       lower_bound_score = std::max(score, lower_bound_score);
       t.board.UnMake();
@@ -1227,15 +1226,10 @@ void PrintUCIInfoString(Thread &t, const Depth depth, const Time &begin,
 
     if (!score.is_mate_score()) {
       std::cout << " score cp ";
-      if (armageddon) {
-        // TODO change this to reflect armageddon odds.
-        std::cout << (score.to_cp() / 8);
-      }
-      else {
-        std::cout << (score.to_cp() / 8);
+      std::cout << (net_evaluation::RemoveContempt(score, t.board.get_turn()).to_cp() / 8);
+      if (uci_show_wdl) {
+        std::cout << " " << net_evaluation::RemoveContempt(score, t.board.get_turn()).get_uci_string();
       }
-      std::cout << " " << score.get_uci_string();
-
     }
     else {
       if (score.is_disadvantage()) {
@@ -1338,14 +1332,14 @@ void Thread::search() {
 template<int Mode>
 Move RootSearch(Board &board, Depth depth, Milliseconds duration = Milliseconds(24 * 60 * 60 * 1000)) {
   table::UpdateGeneration();
-  // TODO fix contempt and armageddon.
-//  if (armageddon) {
-//    net_evaluation::SetContempt(60, kWhite);
-//  }
-//  else {
-//    net_evaluation::SetContempt(contempt, board.get_turn());
-//  }
-//  draw_score = net_evaluation::GetDrawArray();
+  if (armageddon) {
+    net_evaluation::SetContempt(kWhite, 60);
+  }
+  else {
+    net_evaluation::SetContempt(board.get_turn(), contempt);
+  }
+  draw_score = net_evaluation::GetDrawArray();
+  assert(armageddon || contempt != 0 || draw_score[kWhite] == kDrawScore);
   min_ply = board.get_num_made_moves();
   Threads.reset_node_count();
   Threads.reset_depths();
@@ -2129,14 +2123,18 @@ std::vector<Board> GenerateEvalSampleSet(std::string filename) {
   return boards;
 }
 
-void SetContempt(int contempt_) {
-  contempt = (contempt_ + 100) / 2;
+void SetContempt(int32_t contempt_) {
+  contempt = contempt_;
 }
 
 void SetArmageddon(bool armageddon_) {
   armageddon = armageddon_;
 }
 
+void SetUCIShowWDL(bool show_wdl) {
+  uci_show_wdl = show_wdl;
+}
+
 #ifdef TUNE
 void SetInitialAspirationDelta(int32_t delta) {
   kInitialAspirationDelta = delta;
diff --git a/src/search.h b/src/search.h
@@ -68,8 +68,9 @@ void LoadSearchVariablesHardCoded();
 void EvaluateCaptureMoveValue(int n);
 void EvaluateScoreDistributions(const int focus);
 
-void SetContempt(int contempt);
 void SetArmageddon(bool armageddon);
+void SetContempt(int32_t contempt);
+void SetUCIShowWDL(bool show_wdl);
 
 #ifdef TUNE
 void SetInitialAspirationDelta(int32_t delta);
diff --git a/src/uci.cc b/src/uci.cc

Original file line number	Diff line number	Diff line change
`@@ -52,6 +52,11 @@ void GenerateDatasetFromUCIGames(std::string filename, std::string out_name = "e`
`52`	`52`	`size_t reroll_pct = 0);`
`53`	`53`	`#endif`
`54`	`54`
	`55`	`+void SetContempt(Color color, int32_t value);`
	`56`	`+std::array<Score, 2> GetDrawArray();`
	`57`	`+`
	`58`	`+Score AddContempt(Score score, Color color);`
	`59`	`+Score RemoveContempt(Score score, Color color);`
`55`	`60`	`}`
`56`	`61`
`57`	`62`	`// TODO: Move to external file`
`@@ -112,5 +117,4 @@ constexpr size_t kChannelsPerSide = kChanKingsIdx + 1;`
`112`	`117`	`constexpr size_t kNumChannels = 2 * kChannelsPerSide;`
`113`	`118`	`}`
`114`	`119`
`115`		`-`
`116`	`120`	`#endif /* NET_EVALUATION_H_ */`