diff --git a/CMakeLists.txt b/CMakeLists.txt index 49c6a6b..cc6f0c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,7 +92,7 @@ SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # static compilation -option(SYNTH "Also build synthesis" ON) +option(EXTRA_SYNTH "Also build synthesis" ON) option(BUILD_SHARED_LIBS "Build the shared library" ON) option(STATICCOMPILE "Compile to static executable" OFF) if (STATICCOMPILE) @@ -117,8 +117,8 @@ find_package (sbva REQUIRED) find_package(treedecomp CONFIG REQUIRED) message(STATUS "Found SBVA includes at: ${SBVA_INCLUDE_DIRS}") find_package(GMP REQUIRED) -if (SYNTH) - add_definitions(-DSYNTH) +if (EXTRA_SYNTH) + add_definitions(-DEXTRA_SYNTH) find_package(MLPACK REQUIRED) find_library(cadical PATHS ${CMAKE_CURRENT_SOURCE_DIR}/../cadical/build/ diff --git a/html/index.html b/html/index.html new file mode 100644 index 0000000..28f59fe --- /dev/null +++ b/html/index.html @@ -0,0 +1,281 @@ + + + + + + + + + + Arjun in your browser + + + +
+

Arjun Functional Synthesis in Your Browser

+
Copy your CNF formula in the input box below and click "Run + arjun.js" to run the Functional Synthesis engine in your browser. + The output will be displayed below the input box. Format + description is available here
+ You need to have "c p show ... 0" for all the variables that are inputs +
+ + +
+ +
+ + +
+ +
+ + + Loading WebAssembly module... +
+
+
+
+ +
+ +
Ready - enter your input and click "Run arjun.js"
+
+
+ + + + diff --git a/scripts/build_emscripten.sh b/scripts/build_emscripten.sh index e0cb1c7..ced1d75 100755 --- a/scripts/build_emscripten.sh +++ b/scripts/build_emscripten.sh @@ -2,6 +2,8 @@ set -e rm -rf lib* Test* tests* include tests CM* cmake* arjun Makefile rjun-src -emcmake cmake -DCMAKE_INSTALL_PREFIX=$EMINSTALL -DSYNTH=OFF .. +emcmake cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=$EMINSTALL -DEXTRA_SYNTH=OFF .. emmake make -j26 emmake make install +cp arjun.wasm ../html +cp $EMINSTALL/bin/arjun.js ../html diff --git a/scripts/build_no_extra_synth.sh b/scripts/build_no_extra_synth.sh new file mode 100755 index 0000000..b5e0e66 --- /dev/null +++ b/scripts/build_no_extra_synth.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -e + +rm -rf lib* Test* tests* include tests CM* cmake* arjun Makefile rjun-src +cmake -DEXTRA_SYNTH=OFF -DENABLE_TESTING=OFF .. +make -j26 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fba8fee..027076b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,7 +23,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CRYPTOMINISAT5_INCLUDE_DIRS}) include_directories(${SBVA_INCLUDE_DIRS}) include_directories( ${GMP_INCLUDE_DIR} ) -if (SYNTH) +if (EXTRA_SYNTH) include_directories(${EVALMAXSAT_INCLUDE_DIRS}) include_directories( ${MLPACK_INCLUDE_DIR} ) endif() @@ -58,12 +58,12 @@ set(libfiles autarky.cpp ccnr/ccnr.cpp ${CMAKE_CURRENT_BINARY_DIR}/GitSHA1.cpp + manthan.cpp + interpolant.cpp ) -if (SYNTH) - set(libfiles ${libfiles} - manthan.cpp - interpolant.cpp) +if (EXTRA_SYNTH) + list(APPEND libfiles manthan_learn.cpp) endif() add_library(arjun ${libfiles}) @@ -78,7 +78,7 @@ target_link_libraries(arjun LINK_PUBLIC ${TREEDECOMP_LIBRARIES} ) -add_executable(arjun-bin main.cpp) +add_executable(arjun-bin main.cpp synth.cpp) add_executable(arjun-example example.cpp) add_executable(test-synth test-synth.cpp) @@ -91,10 +91,20 @@ set_target_properties(arjun PROPERTIES VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR} SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR} ) -set_target_properties(arjun-bin PROPERTIES - OUTPUT_NAME arjun - RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR} - INSTALL_RPATH_USE_LINK_PATH TRUE) + +if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + set_target_properties(arjun-bin PROPERTIES + OUTPUT_NAME arjun + RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR} + INSTALL_RPATH_USE_LINK_PATH TRUE + LINK_FLAGS "-s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -s EXPORTED_RUNTIME_METHODS='[\"callMain\", \"ccall\", \"cwrap\", \"FS\", \"print\"]' -s FORCE_FILESYSTEM=1" + ) +else() + set_target_properties(arjun-bin PROPERTIES + OUTPUT_NAME arjun + RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR} + INSTALL_RPATH_USE_LINK_PATH TRUE) + endif() target_link_libraries(arjun-bin ${arjun_bin_exec_link_libs} arjun) set_target_properties(arjun-example PROPERTIES diff --git a/src/arjun.cpp b/src/arjun.cpp index 8156d77..1f7f099 100644 --- a/src/arjun.cpp +++ b/src/arjun.cpp @@ -39,9 +39,7 @@ #include "constants.h" #include "autarky.h" #include "unate_def.h" -#ifdef SYNTH #include "manthan.h" -#endif using namespace ArjunInt; @@ -125,12 +123,10 @@ DLL_PUBLIC void Arjun::standalone_autarky(SimplifiedCNF& cnf) { autarky.find_autarkies(cnf); } -#ifdef SYNTH DLL_PUBLIC void Arjun::standalone_backward_round_synth(SimplifiedCNF& cnf, const ManthanConf& mconf) { Minimize common(arjdata->conf); common.backward_round_synth(cnf, mconf); } -#endif DLL_PUBLIC void Arjun::standalone_unsat_define(SimplifiedCNF& cnf) { Extend extend(arjdata->conf); @@ -149,13 +145,12 @@ DLL_PUBLIC SimplifiedCNF Arjun::standalone_get_simplified_cnf( Puura puura(arjdata->conf); return puura.get_fully_simplified_renumbered_cnf(cnf, simp_conf); } -#ifdef SYNTH -DLL_PUBLIC SimplifiedCNF Arjun::standalone_manthan(const SimplifiedCNF& cnf, const ManthanConf& mconf, uint32_t max_repairs) + +DLL_PUBLIC SimplifiedCNF Arjun::standalone_manthan(const SimplifiedCNF& cnf, const ManthanConf& mconf) { Manthan manthan(arjdata->conf, mconf, cnf); - return manthan.do_manthan(max_repairs); + return manthan.do_manthan(); } -#endif DLL_PUBLIC void Arjun::standalone_rev_bce(SimplifiedCNF& cnf) { diff --git a/src/arjun.h b/src/arjun.h index b498ffe..4c18bda 100644 --- a/src/arjun.h +++ b/src/arjun.h @@ -1355,28 +1355,28 @@ class Arjun struct ManthanConf { ManthanConf() = default; ManthanConf(const ManthanConf& other) = default; - int do_filter_samples = 1; - int do_biased_sampling = 0; + int filter_samples = 1; + int biased_sampling = 0; /// Also to try: - uint32_t num_samples = 5000; - uint32_t num_samples_ccnr = 0; - uint32_t minimumLeafSize = 10; + uint32_t samples = 5000; + uint32_t samples_ccnr = 0; + uint32_t min_leaf_size = 10; // TODO experiment with 0.003 - double minGainSplit = 0.001; - uint32_t maximumDepth = 0; + double min_gain_split = 0.001; + uint32_t max_depth = 0; uint32_t sampler_fixed_conflicts = 100; - int do_minimize_conflict = 1; + int minimize_conflict = 1; uint32_t simplify_every = 1000; std::string write_manthan_cnf; - int do_maxsat_better_ctx = 0; + int maxsat_better_ctx = 0; int maxsat_order = 1; int do_unique_input_samples = 1; - int do_use_all_variables_as_features = 1; + int use_all_vars_as_feats = 1; int ctx_solver_type = 1; int repair_solver_type = 1; int repair_cache_size = 1000; int backward_synth_order = 0; - int manthan_bve = 0; + int manthan_base = 0; int manthan_order = 0; int manthan_on_the_fly_order = 0; int one_repair_per_loop = 0; @@ -1386,6 +1386,8 @@ class Arjun int force_bw_equal = 1; int bva_xor_vars = 0; int silent_var_update = 1; + int inv_learnt = 0; + uint32_t max_repairs = std::numeric_limits::max(); }; /// Standalone functions @@ -1404,7 +1406,7 @@ class Arjun void standalone_sbva(SimplifiedCNF& orig, int64_t sbva_steps = 200, uint32_t sbva_cls_cutoff = 2, uint32_t sbva_lits_cutoff = 2, int sbva_tiebreak = 1); - SimplifiedCNF standalone_manthan(const SimplifiedCNF& cnf, const ManthanConf& manthan_conf, uint32_t max_repairs); + SimplifiedCNF standalone_manthan(const SimplifiedCNF& cnf, const ManthanConf& manthan_conf); void standalone_autarky(SimplifiedCNF& cnf); //Set config diff --git a/src/autarky.cpp b/src/autarky.cpp index 5bf2058..b1efed0 100644 --- a/src/autarky.cpp +++ b/src/autarky.cpp @@ -29,6 +29,7 @@ #include "constants.h" using namespace ArjunNS; +using namespace ArjunInt; using namespace CMSat; using std::vector; using std::fixed; diff --git a/src/autarky.h b/src/autarky.h index b8ac10b..fe9bbe0 100644 --- a/src/autarky.h +++ b/src/autarky.h @@ -34,11 +34,12 @@ #include "src/formula.h" using namespace CMSat; -using namespace ArjunInt; using namespace ArjunNS; using std::vector; using std::set; +namespace ArjunInt { + class Autarky { public: Autarky(const Config& _conf); @@ -59,3 +60,4 @@ class Autarky { const Config conf; }; +} diff --git a/src/formula.h b/src/formula.h index 03f90be..352a613 100644 --- a/src/formula.h +++ b/src/formula.h @@ -24,12 +24,7 @@ #pragma once -#ifdef CMS_LOCAL_BUILD -#include "cryptominisat.h" -#else #include -#endif - #include #include #include @@ -46,7 +41,7 @@ using CMSat::lit_Error; using CMSat::SATSolver; using std::map; -namespace ArjunNS { +namespace ArjunInt { struct CL { constexpr CL(const vector& _lits) : lits(_lits) {} @@ -68,12 +63,12 @@ class FHolder { // solver_train vector clauses; Lit out = lit_Error; - aig_ptr aig = nullptr; + ArjunNS::aig_ptr aig = nullptr; }; set get_dependent_vars(const Formula& f, uint32_t v) const { set ret; - AIG::get_dependent_vars(f.aig, ret, v); + ArjunNS::AIG::get_dependent_vars(f.aig, ret, v); return ret; } @@ -88,14 +83,14 @@ class FHolder { Formula ret; ret = compose_ite(fleft, fright, branch.out, helpers); ret.clauses.insert(ret.clauses.end(), branch.clauses.begin(), branch.clauses.end()); - ret.aig = AIG::new_ite(fleft.aig, fright.aig, branch.aig); + ret.aig = ArjunNS::AIG::new_ite(fleft.aig, fright.aig, branch.aig); return ret; } Formula neg(const Formula& f) { Formula ret = f; ret.out = ~f.out; - ret.aig = AIG::new_not(f.aig); + ret.aig = ArjunNS::AIG::new_not(f.aig); return ret; } @@ -119,7 +114,7 @@ class FHolder { assert(fleft.aig != nullptr); assert(fright.aig != nullptr); - ret.aig = AIG::new_or(fleft.aig, fright.aig); + ret.aig = ArjunNS::AIG::new_or(fleft.aig, fright.aig); return ret; } @@ -150,7 +145,7 @@ class FHolder { ret.clauses.push_back(CL({b, fresh, ~r})); ret.clauses.push_back(CL({b, ~fresh, r})); ret.out = Lit(fresh_v, false); - ret.aig = AIG::new_ite(fleft.aig, fright.aig, branch); + ret.aig = ArjunNS::AIG::new_ite(fleft.aig, fright.aig, branch); return ret; } @@ -160,15 +155,12 @@ class FHolder { } private: - AIGManager aig_mng; + ArjunNS::AIGManager aig_mng; T* solver = nullptr; Lit my_true_lit = lit_Error; }; -} - - -inline std::ostream& operator<<(std::ostream& os, const ArjunNS::FHolder::Formula& f) { +inline std::ostream& operator<<(std::ostream& os, const FHolder::Formula& f) { os << " === Formula out: " << f.out << " === " << endl; for (const auto& cl : f.clauses) { for (const auto& l : cl.lits) os << std::setw(6) << l; @@ -178,3 +170,5 @@ inline std::ostream& operator<<(std::ostream& os, const ArjunNS::FHolder & clause, const std::vector & oantec) { diff --git a/src/interpolant.h b/src/interpolant.h index 4bec03e..d36a7ac 100644 --- a/src/interpolant.h +++ b/src/interpolant.h @@ -42,6 +42,8 @@ using std::vector; using std::map; using namespace ArjunNS; +namespace ArjunInt { + struct MyTracer : public CaDiCaL::Tracer { MyTracer(const uint32_t _orig_num_vars, const set& input_vars, const ArjunInt::Config& _conf, map& _lit_to_aig, const AIGManager& _aig_mng) : @@ -139,4 +141,4 @@ class Interpolant { vector var_to_indic; //maps an ORIG VAR to an INDICATOR VAR vector defs; //definition of variables in terms of AIG. ORIGINAL number space }; - +} diff --git a/src/main.cpp b/src/main.cpp index 3277087..ce059bc 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -28,6 +28,7 @@ #include #endif +#include #include #include #include @@ -39,23 +40,7 @@ #include "arjun.h" #include "config.h" #include "helper.h" - -#define myopt(name, var, fun, hhelp) \ - program.add_argument(name) \ - .action([&](const auto& a) {var = std::fun(a.c_str());}) \ - .default_value(var) \ - .help(hhelp) -#define myopt2(name1, name2, var, fun, hhelp) \ - program.add_argument(name1, name2) \ - .action([&](const auto& a) {var = std::fun(a.c_str());}) \ - .default_value(var) \ - .help(hhelp) -#define myflag(name, var, hhelp) \ - program.add_argument(name) \ - .action([&](const auto&) {var = 1;}) \ - .default_value(var) \ - .flag() \ - .help(hhelp) +#include "synth.h" using std::cout; using std::endl; @@ -81,8 +66,7 @@ int simptofile = true; int sampl_start_at_zero = false; int do_synth_bve = true; int do_pre_backbone = 0; -int manthan_rep_mult = 4; -int manthan_strategy = 0; +string mstrategy = "const(max_repairs=400),const(max_repairs=400,inv_learnt=1),bve"; int synthesis = false; int do_unate = false; @@ -105,82 +89,133 @@ string print_version() { return ss.str(); } +static int fc_int(const std::string& s) { + int val = 0; + std::from_chars(s.data(), s.data() + s.size(), val); + return val; +} +static double fc_double(const std::string& s) { + size_t pos; + double val = std::stod(s, &pos); + if (pos != s.size()) throw std::invalid_argument("trailing characters in double: " + s); + return val; +} +static const std::string& fc_string(const std::string& s) { return s; } + +template +void myopt(const char* name, T& var, F fun, const char* hhelp) { + using r = std::decay_t>; + static_assert(std::is_floating_point_v == std::is_floating_point_v, + "Floating-point mismatch: use fc_double for floating-point vars, fc_int for integral vars"); + static_assert(std::is_integral_v == std::is_integral_v, + "Integral/string mismatch: use fc_int for integral vars, fc_string for string vars"); + program.add_argument(name) + .action([&var, fun](const auto& a) { var = fun(a); }) + .default_value(var) + .help(hhelp); +} +template +void myopt2(const char* name1, const char* name2, T& var, F fun, const char* hhelp) { + using r = std::decay_t>; + static_assert(std::is_floating_point_v == std::is_floating_point_v, + "Floating-point mismatch: use fc_double for floating-point vars, fc_int for integral vars"); + static_assert(std::is_integral_v == std::is_integral_v, + "Integral/string mismatch: use fc_int for integral vars, fc_string for string vars"); + program.add_argument(name1, name2) + .action([&var, fun](const auto& a) { var = fun(a); }) + .default_value(var) + .help(hhelp); +} +template +void myflag(const char* name, T& var, const char* hhelp) { + static_assert(std::is_same_v, "myflag var must be int"); + program.add_argument(name) + .action([&var](const auto&) { var = 1; }) + .default_value(var) + .flag() + .help(hhelp); +} + void add_arjun_options() { - myopt2("-v", "--verb", conf.verb, atoi, "Verbosity"); + myopt2("-v", "--verb", conf.verb, fc_int, "Verbosity"); program.add_argument("-v", "--version") \ .action([&](const auto&) {print_version(); exit(0);}) \ .flag() .help("Print version and exit"); - myopt("--mode", mode , atoi, "0=counting, 1=weightd counting"); - myopt("--allindep", etof_conf.all_indep , atoi, "All variables can be made part of the indepedent support. Indep support is given ONLY to help the solver."); - myopt("--maxc", conf.backw_max_confl, atoi,"Maximum conflicts per variable in backward mode"); - myopt("--revbce", do_revbce, atoi,"Perform reverse BCE"); - myopt("--sbva", etof_conf.num_sbva_steps, atoi,"SBVA timeout. 0 = no sbva"); - myopt("--prebackbone", do_pre_backbone, atoi,"Perform backbone before other things"); - myopt("--seed", conf.seed, atoi, "Random seed"); + myopt("--mode", mode , fc_int, "0=counting, 1=weightd counting"); + myopt("--allindep", etof_conf.all_indep , fc_int, "All variables can be made part of the indepedent support. Indep support is given ONLY to help the solver."); + myopt("--maxc", conf.backw_max_confl, fc_int,"Maximum conflicts per variable in backward mode"); + myopt("--revbce", do_revbce, fc_int,"Perform reverse BCE"); + myopt("--sbva", etof_conf.num_sbva_steps, fc_int,"SBVA timeout. 0 = no sbva"); + myopt("--prebackbone", do_pre_backbone, fc_int,"Perform backbone before other things"); + myopt("--seed", conf.seed, fc_int, "Random seed"); // synth main myflag("--synth", synthesis, "Run synthesis"); myflag("--synthmore", synthesis, "Run synthesis, with more aggressive BVE options"); - myopt("--maxsat", mconf.do_maxsat_better_ctx, atoi, "Use maxsat to find better counterexamples during Manthan"); - myopt("--synthbve", do_synth_bve, atoi,"Perform BVE for synthesis"); - myopt("--extend", etof_conf.do_extend_indep, atoi,"Extend independent set just before CNF dumping"); - myopt("--minimconfl", mconf.do_minimize_conflict, atoi,"Minimize conflict size when repairing"); - myopt("--simpevery", mconf.simplify_every, atoi,"Simplify solvers inside Manthan every K loops"); - myopt("--unate", do_unate, atoi,"Perform unate analysis"); - myopt("--unatedef", do_unate_def, atoi,"Perform definition-aware unate analysis"); - myopt("--autarky", etof_conf.do_autarky, atoi,"Perform unate analysis"); - myopt("--mbve", mconf.manthan_bve, atoi,"Use BVE with constants instead of training"); - myopt("--monflyorder", mconf.manthan_on_the_fly_order, atoi,"Use on-the-fly training order and post-training topological order"); - myopt("--moneperloop", mconf.one_repair_per_loop, atoi,"One repair per CEX loop"); + myopt("--maxsat", mconf.maxsat_better_ctx, fc_int, "Use maxsat to find better counterexamples during Manthan"); + myopt("--synthbve", do_synth_bve, fc_int,"Perform BVE for synthesis"); + myopt("--extend", etof_conf.do_extend_indep, fc_int,"Extend independent set just before CNF dumping"); + myopt("--minimconfl", mconf.minimize_conflict, fc_int,"Minimize conflict size when repairing"); + myopt("--simpevery", mconf.simplify_every, fc_int,"Simplify solvers inside Manthan every K loops"); + myopt("--unate", do_unate, fc_int,"Perform unate analysis"); + myopt("--unatedef", do_unate_def, fc_int,"Perform definition-aware unate analysis"); + myopt("--autarky", etof_conf.do_autarky, fc_int,"Perform unate analysis"); + myopt("--monflyorder", mconf.manthan_on_the_fly_order, fc_int,"Use on-the-fly training order and post-training topological order"); + myopt("--moneperloop", mconf.one_repair_per_loop, fc_int,"One repair per CEX loop"); + myopt("--minvertlearn", mconf.inv_learnt, fc_int,"Invert learnt functions"); // repairing on vars - myopt("--bwequal", mconf.force_bw_equal, atoi,"Force BW vars' indicators to be TRUE -- prevents repairing with them, but faster to repair"); - myopt("--bvaxor", mconf.bva_xor_vars, atoi,"Add XOR over input vars as BVA vars, so we can repair with them"); - myopt("--silentupdate", mconf.silent_var_update, atoi,"Silently update variables while repairing"); + myopt("--bwequal", mconf.force_bw_equal, fc_int,"Force BW vars' indicators to be TRUE -- prevents repairing with them, but faster to repair"); + myopt("--bvaxor", mconf.bva_xor_vars, fc_int,"Add XOR over input vars as BVA vars, so we can repair with them"); + myopt("--silentupdate", mconf.silent_var_update, fc_int,"Silently update variables while repairing"); // Strategy - myopt("--mtryrepmult", manthan_rep_mult, atoi,"Repair tries will be multiplied by this"); - myopt("--mstrategy", manthan_strategy, atoi,"Go directly to strategy N"); + myopt("--mstrategy", mstrategy, fc_string, + "Comma-separated synthesis strategy list, e.g. " + "\"learn(samples=1,max_repairs=100),learn(max_repairs=800),bve\". " + "Each non-last strategy runs for 20*max_repairs tries; the last runs unlimited. " + "Params: manthan_bve, samples, samples_ccnr, minGainSplit, " + "maximumDepth, sampler_fixed_conflicts, and other ManthanConf fields."); // Order - myopt("--morder", mconf.manthan_order, atoi,"Order vars: indicence (0), cluster-incidence (1), BVE (2)"); - myopt("--maxsatorder", mconf.maxsat_order, atoi,"Which order to use to try to fix vars? 0 = norm, 1 = rev"); - myopt("--mbackwsynthorder", mconf.backward_synth_order, atoi,"Which order to use to try to do backward? 0 = normal, 1 = reverse"); + myopt("--morder", mconf.manthan_order, fc_int,"Order vars: indicence (0), cluster-incidence (1), BVE (2)"); + myopt("--maxsatorder", mconf.maxsat_order, fc_int,"Which order to use to try to fix vars? 0 = norm, 1 = rev"); + myopt("--mbackwsynthorder", mconf.backward_synth_order, fc_int,"Which order to use to try to do backward? 0 = normal, 1 = reverse"); // solver config - myopt("--ctxsolver", mconf.ctx_solver_type, atoi,"Context solver type. 0 = CryptoMiniSat, 1 = CaDiCaL"); - myopt("--repairsolver", mconf.repair_solver_type, atoi,"Context solver type. 0 = CryptoMiniSat, 1 = CaDiCaL"); - myopt("--repaircache", mconf.repair_cache_size, atoi,"Repair cache size. 0 = no cache"); + myopt("--ctxsolver", mconf.ctx_solver_type, fc_int,"Context solver type. 0 = CryptoMiniSat, 1 = CaDiCaL"); + myopt("--repairsolver", mconf.repair_solver_type, fc_int,"Context solver type. 0 = CryptoMiniSat, 1 = CaDiCaL"); + myopt("--repaircache", mconf.repair_cache_size, fc_int,"Repair cache size. 0 = no cache"); // synth -- sampling - myopt("--samples", mconf.num_samples, atoi,"Number of samples"); - myopt("--samplesccnr", mconf.num_samples_ccnr, atoi,"Number of samples from CCNR"); - myopt("--uniqsamp", mconf.do_unique_input_samples, atoi, "Unique samples on input vars"); - myopt("--filtersamples", mconf.do_filter_samples, atoi,"Filter samples from useless ones"); - myopt("--biasedsampling", mconf.do_biased_sampling, atoi,"Biased sampling"); - myopt("--fixedconf", mconf.sampler_fixed_conflicts, atoi,"Restart conflict limit in CMSGen"); + myopt("--samples", mconf.samples, fc_int,"Number of samples"); + myopt("--samplesccnr", mconf.samples_ccnr, fc_int,"Number of samples from CCNR"); + myopt("--uniqsamp", mconf.do_unique_input_samples, fc_int, "Unique samples on input vars"); + myopt("--filtersamples", mconf.filter_samples, fc_int,"Filter samples from useless ones"); + myopt("--biasedsampling", mconf.biased_sampling, fc_int,"Biased sampling"); + myopt("--fixedconf", mconf.sampler_fixed_conflicts, fc_int,"Restart conflict limit in CMSGen"); // synth -- decision tree - myopt("--maxdepth", mconf.maximumDepth, atoi,"Maximum depth of decision tree"); - myopt("--minleaf", mconf.minimumLeafSize, atoi,"Minimum leaf size in decision tree"); - myopt("--mingainsplit", mconf.minGainSplit, atof,"Minimum gain for a split in decision tree"); - myopt("--learnuseall", mconf.do_use_all_variables_as_features, atoi,"Use all variables as features in decision tree learning. 0 = only inputs"); + myopt("--maxdepth", mconf.max_depth, fc_int,"Maximum depth of decision tree"); + myopt("--minleaf", mconf.min_leaf_size, fc_int,"Minimum leaf size in decision tree"); + myopt("--mingainsplit", mconf.min_gain_split, fc_double,"Minimum gain for a split in decision tree"); + myopt("--learnuseall", mconf.use_all_vars_as_feats, fc_int,"Use all variables as features in decision tree learning. 0 = only inputs"); // synth -- debug - myopt("--manthancnf", mconf.write_manthan_cnf, string, "Write Manthan CNF to this file"); - myopt("--debugsynth", conf.debug_synth, string,"Debug synthesis, prefix with this fname"); + myopt("--manthancnf", mconf.write_manthan_cnf, fc_string, "Write Manthan CNF to this file"); + myopt("--debugsynth", conf.debug_synth, fc_string,"Debug synthesis, prefix with this fname"); // Simplification options for minim - myopt("--probe", conf.probe_based, atoi,"Do probing during orignal Arjun"); - myopt("--bvepresimp", conf.bve_pre_simplify, atoi,"simplify"); - myopt("--simp", conf.simp, atoi,"Do ~ sort of simplification during indep minimixation"); - myopt("--probe", conf.probe_based, atoi,"Use simple probing to set (and define) some variables"); - myopt("--intree", conf.intree, atoi,"intree"); + myopt("--probe", conf.probe_based, fc_int,"Do probing during orignal Arjun"); + myopt("--bvepresimp", conf.bve_pre_simplify, fc_int,"simplify"); + myopt("--simp", conf.simp, fc_int,"Do ~ sort of simplification during indep minimixation"); + myopt("--probe", conf.probe_based, fc_int,"Use simple probing to set (and define) some variables"); + myopt("--intree", conf.intree, fc_int,"intree"); // Gate options - myopt("--gates", do_gates, atoi,"Turn on/off all gate-based definability"); - myopt("--nogatebelow", conf.no_gates_below, atof,"Don't use gates below this incidence relative position (1.0-0.0) to minimize the independent set. Gates are not very accurate, but can save a LOT of time. We use them to get rid of most of the uppert part of the sampling set only. Default is 99% is free-for-all, the last 1% we test. At 1.0 we test everything, at 0.0 we try using gates for everything."); - myopt("--orgate", conf.or_gate_based, atoi,"Use 3-long gate detection in SAT solver to define variables"); - myopt("--irreggate", conf.irreg_gate_based, atoi,"Use irregular gate-based removal of vars from indep set"); - myopt("--itegate", conf.ite_gate_based, atoi,"Use ITE gate detection in SAT solver to define some variables"); - myopt("--xorgate", conf.xor_gates_based, atoi,"Use XOR detection in SAT solver to define some variables"); + myopt("--gates", do_gates, fc_int,"Turn on/off all gate-based definability"); + myopt("--nogatebelow", conf.no_gates_below, fc_double,"Don't use gates below this incidence relative position (1.0-0.0) to minimize the independent set. Gates are not very accurate, but can save a LOT of time. We use them to get rid of most of the uppert part of the sampling set only. Default is 99% is free-for-all, the last 1% we test. At 1.0 we test everything, at 0.0 we try using gates for everything."); + myopt("--orgate", conf.or_gate_based, fc_int,"Use 3-long gate detection in SAT solver to define variables"); + myopt("--irreggate", conf.irreg_gate_based, fc_int,"Use irregular gate-based removal of vars from indep set"); + myopt("--itegate", conf.ite_gate_based, fc_int,"Use ITE gate detection in SAT solver to define some variables"); + myopt("--xorgate", conf.xor_gates_based, fc_int,"Use XOR detection in SAT solver to define some variables"); // AppMC program.add_argument("--appmc") @@ -189,32 +224,32 @@ void add_arjun_options() { .help("Set CNF simplification options for appmc"); // Detailed Configuration - myopt("--sbvaclcut", etof_conf.sbva_cls_cutoff, atoi,"SBVA heuristic cutoff. Higher -> only appied to more clauses"); - myopt("--sbvalitcut", etof_conf.sbva_lits_cutoff, atoi,"SBVA heuristic cutoff. Higher -> only appied to larger clauses"); - myopt("--findbins", conf.oracle_find_bins, atoi,"How aggressively find binaries via oracle"); - myopt("--sbvabreak", etof_conf.sbva_tiebreak, atoi,"SBVA tie break: 1=sbva or 0=bva"); - myopt("--gaussj", conf.gauss_jordan, atoi,"Use XOR finding and Gauss-Jordan elimination"); - myopt("--bve", simp_conf.do_bve, atoi,"Perform BVE during CNF simplification"); - myopt("--iter1", simp_conf.iter1, atoi,"Puura iterations before oracle"); - myopt("--iter1grow", simp_conf.bve_grow_iter1, atoi,"Puura BVE grow rate allowed before Oracle"); - myopt("--iter2", simp_conf.iter2, atoi,"Puura iterations after oracle"); - myopt("--iter2grow", simp_conf.bve_grow_iter2, atoi,"Puura BVE grow rate allowed after Oracle"); - myopt("--bvegrownonstop", simp_conf.bve_grow_nonstop, atoi,"Do not stop BVE if nothing got eliminated, keep going until grow factor limit"); - myopt("--bveresolvmaxsz", simp_conf.bve_too_large_resolvent, atoi,"Puura BVE max resolvent size in literals. -1 == no limit"); - myopt("--oraclesparsify", simp_conf.oracle_sparsify, atoi,"Use Oracle to sparsify"); - myopt("--oraclevivif", simp_conf.oracle_vivify, atoi,"Use oracle to vivify"); - myopt("--oraclevivifgetl", simp_conf.oracle_vivify_get_learnts, atoi,"Use oracle to vivify get learnts"); - myopt("--distill", conf.distill, atoi, "Distill clauses before minimization of indep"); - myopt("--weakenlim", simp_conf.weaken_limit, atoi, "Limit to weaken BVE resolvents"); - myopt("--bce", etof_conf.do_bce, atoi, "Use blocked clause elimination (BCE) statically"); - myopt("--red", redundant_cls, atoi,"Also dump redundant clauses"); + myopt("--sbvaclcut", etof_conf.sbva_cls_cutoff, fc_int,"SBVA heuristic cutoff. Higher -> only appied to more clauses"); + myopt("--sbvalitcut", etof_conf.sbva_lits_cutoff, fc_int,"SBVA heuristic cutoff. Higher -> only appied to larger clauses"); + myopt("--findbins", conf.oracle_find_bins, fc_int,"How aggressively find binaries via oracle"); + myopt("--sbvabreak", etof_conf.sbva_tiebreak, fc_int,"SBVA tie break: 1=sbva or 0=bva"); + myopt("--gaussj", conf.gauss_jordan, fc_int,"Use XOR finding and Gauss-Jordan elimination"); + myopt("--bve", simp_conf.do_bve, fc_int,"Perform BVE during CNF simplification"); + myopt("--iter1", simp_conf.iter1, fc_int,"Puura iterations before oracle"); + myopt("--iter1grow", simp_conf.bve_grow_iter1, fc_int,"Puura BVE grow rate allowed before Oracle"); + myopt("--iter2", simp_conf.iter2, fc_int,"Puura iterations after oracle"); + myopt("--iter2grow", simp_conf.bve_grow_iter2, fc_int,"Puura BVE grow rate allowed after Oracle"); + myopt("--bvegrownonstop", simp_conf.bve_grow_nonstop, fc_int,"Do not stop BVE if nothing got eliminated, keep going until grow factor limit"); + myopt("--bveresolvmaxsz", simp_conf.bve_too_large_resolvent, fc_int,"Puura BVE max resolvent size in literals. -1 == no limit"); + myopt("--oraclesparsify", simp_conf.oracle_sparsify, fc_int,"Use Oracle to sparsify"); + myopt("--oraclevivif", simp_conf.oracle_vivify, fc_int,"Use oracle to vivify"); + myopt("--oraclevivifgetl", simp_conf.oracle_vivify_get_learnts, fc_int,"Use oracle to vivify get learnts"); + myopt("--distill", conf.distill, fc_int, "Distill clauses before minimization of indep"); + myopt("--weakenlim", simp_conf.weaken_limit, fc_int, "Limit to weaken BVE resolvents"); + myopt("--bce", etof_conf.do_bce, fc_int, "Use blocked clause elimination (BCE) statically"); + myopt("--red", redundant_cls, fc_int,"Also dump redundant clauses"); // Debug - myopt("--renumber", etof_conf.do_renumber, atoi,"Renumber variables to start from 1...N in CNF."); - myopt("--specifiedorder", conf.specified_order_fname, string, "Try to remove variables from the independent set in this order. File must contain a variable on each line. Variables start at ZERO. Variable from the BOTTOM will be removed FIRST. This is for DEBUG ONLY"); - myopt("--minimize", do_minim_indep, atoi,"Minimize indep set"); - myopt("--debugminim", debug_minim, string,"Create this file that is the CNF after indep set minimization"); - myopt("--cmsmult", conf.cms_glob_mult, atof,"Multiply timeouts in CMS by this. Default is -1, which means no change. Useful for debugging"); + myopt("--renumber", etof_conf.do_renumber, fc_int,"Renumber variables to start from 1...N in CNF."); + myopt("--specifiedorder", conf.specified_order_fname, fc_string, "Try to remove variables from the independent set in this order. File must contain a variable on each line. Variables start at ZERO. Variable from the BOTTOM will be removed FIRST. This is for DEBUG ONLY"); + myopt("--minimize", do_minim_indep, fc_int,"Minimize indep set"); + myopt("--debugminim", debug_minim, fc_string,"Create this file that is the CNF after indep set minimization"); + myopt("--cmsmult", conf.cms_glob_mult, fc_double,"Multiply timeouts in CMS by this. Default is -1, which means no change. Useful for debugging"); program.add_argument("files").remaining().help("input file and output file"); } @@ -278,7 +313,6 @@ void check_cnf_sat(const ArjunNS::SimplifiedCNF& cnf) { } } -#ifdef SYNTH void do_synthesis() { if (etof_conf.all_indep) { cout << "ERROR: synthesis with --allindep makes no sense" << endl; @@ -298,74 +332,37 @@ void do_synthesis() { check_cnf_sat(cnf); cout << "c o ignoring --backbone option, doing backbone for synth no matter what" << endl; cnf.get_var_types(conf.verb | verbose_debug_enabled, "start do_synthesis"); + if (do_synth_bve && !cnf.synth_done()) { - /* simp_conf.bve_too_large_resolvent = -1; */ cnf = arjun->standalone_get_simplified_cnf(cnf, simp_conf); if (!conf.debug_synth.empty()) cnf.write_aig_defs_to_file(conf.debug_synth + "-simplified_cnf.aig"); } - if (etof_conf.do_autarky && !cnf.synth_done()) { arjun->standalone_autarky(cnf); if (!conf.debug_synth.empty()) cnf.write_aig_defs_to_file(conf.debug_synth + "-autarky.aig"); } - if (etof_conf.do_extend_indep && !cnf.synth_done()) { arjun->standalone_unsat_define(cnf); if (!conf.debug_synth.empty()) cnf.write_aig_defs_to_file(conf.debug_synth + "-extend_synth.aig"); cnf.simplify_aigs(conf.verb); } - - if (do_minim_indep && !cnf.synth_done()) { + if (do_minim_indep && !cnf.synth_done()) { arjun->standalone_backward_round_synth(cnf, mconf); if (!conf.debug_synth.empty()) cnf.write_aig_defs_to_file(conf.debug_synth + "-minim_idep_synt.aig"); cnf.simplify_aigs(conf.verb); } - if (do_unate && !cnf.synth_done()) { arjun->standalone_unate(cnf); if (!conf.debug_synth.empty()) cnf.write_aig_defs_to_file(conf.debug_synth + "-unsat_unate.aig"); } - if (do_unate_def && !cnf.synth_done()) { arjun->standalone_unate_def(cnf); if (!conf.debug_synth.empty()) cnf.write_aig_defs_to_file(conf.debug_synth + "-unsat_unate_def.aig"); } - auto mconf_orig = mconf; - uint32_t tries; - if (manthan_strategy > 4) { - cout << "ERROR: unknown strategy " << manthan_strategy << endl; - exit(EXIT_FAILURE); - } - if (!cnf.synth_done() && (manthan_strategy == 0 || manthan_strategy == 1)) { - mconf = mconf_orig; - // Learning with no samples - mconf.manthan_bve = 0; - mconf.num_samples = 1; - mconf.num_samples_ccnr = 0; - mconf.manthan_bve = 0; - tries = 20*manthan_rep_mult; - if (manthan_strategy == 1) tries = std::numeric_limits::max(); - cnf = arjun->standalone_manthan(cnf, mconf, tries); - if (cnf.synth_done()) verb_print(1, "Manthan finished with strategy 1"); - } - if (!cnf.synth_done() && (manthan_strategy == 0 || manthan_strategy == 2)) { - // Learning with (larger) samples size - mconf = mconf_orig; - mconf.manthan_bve = 0; - tries = 100*manthan_rep_mult; - if (manthan_strategy == 2) tries = std::numeric_limits::max(); - cnf = arjun->standalone_manthan(cnf, mconf, tries); - if (cnf.synth_done()) verb_print(1, "Manthan finished with strategy 2"); - } - if (!cnf.synth_done() && (manthan_strategy == 0 || manthan_strategy == 3)) { - // BVE strategy - mconf = mconf_orig; - mconf.manthan_bve = 1; - tries = std::numeric_limits::max(); - cnf = arjun->standalone_manthan(cnf, mconf, tries); - if (cnf.synth_done()) verb_print(1, "Manthan finished with strategy 3"); - } + SynthRunner synth_runner(conf, arjun); + auto strategies = synth_runner.parse_mstrategy(mstrategy); + synth_runner.run_manthan_strategies(cnf, mconf, strategies); release_assert(cnf.synth_done() && "Synthesis should be done by now, but it is not!"); if (!conf.debug_synth.empty()) cnf.write_aig_defs_to_file(conf.debug_synth + "-5-manthan.aig"); if (!conf.debug_synth.empty()) { @@ -379,7 +376,6 @@ void do_synthesis() { if (conf.verb >= 3) final_cnf.write_aig_defs_to_file_txt(conf.debug_synth + "-final.txt"); } } -#endif void do_minimize() { ArjunNS::SimplifiedCNF cnf(fg); @@ -506,12 +502,7 @@ int main(int argc, char** argv) { if (!elimtofile.empty()) cout << "c o [arjun] Output file: " << elimtofile << endl; if (synthesis) { -#ifdef SYNTH do_synthesis(); -#else - cout << "c o [arjun] ERROR: synthesis not enabled in this build" << endl; - exit(EXIT_FAILURE); -#endif } else { do_minimize(); } diff --git a/src/manthan.cpp b/src/manthan.cpp index 6785141..efac31a 100644 --- a/src/manthan.cpp +++ b/src/manthan.cpp @@ -3,8 +3,7 @@ Copyright (c) 2020, Mate Soos and Kuldeep S. Meel. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is @@ -34,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -46,12 +44,11 @@ #include "time_mem.h" #include "ccnr/ccnr.h" -// These ask mlpack to give more info & warnings -//#define MLPACK_PRINT_INFO -//#define MLPACK_PRINT_WARN -#include - +#ifdef EXTRA_SYNTH +#include #include "EvalMaxSAT.h" +#include "manthan_learn.h" +#endif using std::min; using std::sort; @@ -75,13 +72,6 @@ using namespace CMSat; // no repair, learning/mbve does it: benchmarks-qdimacs/rankfunc57_unsigned_64.qdimacs.cnf // interesting, does not finish, but fast: benchmarks-qdimacs/query48_exquery_1344n.qdimacs.cnf -vector lits_to_ints(const vector& lits) { - vector ret; - ret.reserve(lits.size()); - for(const auto& l: lits) ret.push_back(lit_to_pl(l)); - return ret; -} - template void Manthan::inject_cnf(S& s) const { s.new_vars(cnf.nVars()); @@ -98,7 +88,7 @@ vector Manthan::get_cmsgen_samples(const uint32_t num) { inject_cnf(solver_samp); solver_samp.set_up_for_sample_counter(mconf.sampler_fixed_conflicts); - if (mconf.do_biased_sampling) { + if (mconf.biased_sampling) { array,2> dist; dist[0].resize(cnf.nVars(), 0.0); dist[1].resize(cnf.nVars(), 0.0); @@ -162,7 +152,7 @@ vector Manthan::get_cmsgen_samples(const uint32_t num) { assert(solver_samp.get_model().size() == cnf.nVars()); samples.push_back(solver_samp.get_model()); } - verb_print(1, "[manthan] CMSGen got " << samples.size() << " samples. Biased: " << (bool)mconf.do_biased_sampling + verb_print(1, "[manthan] CMSGen got " << samples.size() << " samples. Biased: " << (bool)mconf.biased_sampling << " T: " << setprecision(2) << std::fixed << (cpuTime() - my_time)); return samples; } @@ -357,7 +347,7 @@ void Manthan::print_y_order_occur() const { for(const auto& y: y_order) { const uint32_t pos = occur_lit[Lit(y, false).toInt()]; const uint32_t neg = occur_lit[Lit(y, true).toInt()]; - verb_print(1, "[manthan] y-order var " << setw(4) << y+1 + verb_print(2, "[manthan] y-order var " << setw(4) << y+1 << " BW: " << backward_defined.count(y) << " td_score " << setw(6) << fixed << setprecision(2) << td_score.at(y) << " pos occur " << setw(6) << pos @@ -688,41 +678,6 @@ std::unique_ptr Manthan::build_primal_graph() { return primal; } -void Manthan::full_train() { - // Sampling - verb_print(1, "[manthan] Starting training. Manthan Config. " - << "do_filter_samples=" << mconf.do_filter_samples - << ", num_samples=" << mconf.num_samples - << ", minimumLeafSize=" << mconf.minimumLeafSize - << ", minGainSplit=" << setprecision(6) << mconf.minGainSplit << setprecision(2) - << ", maximumDepth=" << mconf.maximumDepth); - double samp_start_time = cpuTime(); - vector samples = get_cmsgen_samples(mconf.num_samples); - { - vector samples2 = get_samples_ccnr(mconf.num_samples_ccnr); - samples.insert(samples.end(), samples2.begin(), samples2.end()); - } - sampl_time = cpuTime() - samp_start_time; - verb_print(1, COLYEL "[manthan] Got " << setw(8) << samples.size() << " samples." - << " samp/var: " << setw(8) << setprecision(2) << std::fixed << sampl_time/(double)to_define.size() - << " T: " << setprecision(2) << std::fixed << sampl_time); - sort_all_samples(samples); - - // Training -- updates depenndency_mat - const double train_start_time = cpuTime(); - for(const auto& v: y_order) { - if (backward_defined.count(v)) continue; - train(samples, v); - } - train_time = cpuTime() - train_start_time; - verb_print(1, COLYEL "[manthan] Training done." - << " funs: " << setw(6) << to_define.size() - << " fun/s: " << setw(6) << setprecision(2) << std::fixed << safe_div(to_define.size(), cpuTime() - train_start_time) - << " T: " << setw(6) << setprecision(2) << std::fixed << train_time - << " mem: " << memUsedTotal()/(1024.0*1024.0) << " MB"); - assert(check_map_dependency_cycles()); -} - void Manthan::print_repair_stats([[maybe_unused]] const string& txt, const string& color, [[maybe_unused]] const string& extra) const { vector rep(cnf.nVars()); for(uint32_t i = 0; i < cnf.nVars(); i++) rep[i] = i; @@ -789,7 +744,26 @@ void Manthan::add_xor_var() { verb_print(1, "[manthan] Added " << sampl_vars.size() - 1 << " XOR vars as BVA input vars"); } -SimplifiedCNF Manthan::do_manthan(const uint32_t max_repairs) { +void Manthan::const_functions() { + vector samples = get_cmsgen_samples(1); + for(const auto& y: Manthan::y_order) { + if (!to_define.count(y)) continue; + + vector filt_s = filter_samples(y, samples); + assert(var_to_formula.count(y) == 0); + bool val; + if (filt_s.empty()) { + val = true; + } else { + val = samples[0][y] == l_True; + } + if (mconf.inv_learnt) val = !val; + verb_print(3, "[manthan] const function for var " << y+1 << " is " << val); + var_to_formula[y] = fh->constant_formula(samples[0][y] == l_True); + } +} + +SimplifiedCNF Manthan::do_manthan() { assert(cnf.get_need_aig() && cnf.defs_invariant()); assert(mconf.simplify_every > 0 && "Can't give simplify_every=0"); const double my_time = cpuTime(); @@ -840,8 +814,19 @@ SimplifiedCNF Manthan::do_manthan(const uint32_t max_repairs) { fill_var_to_formula_with(backward_defined); fill_var_to_formula_with(helper_functions); - if (mconf.manthan_bve) bve_and_substitute(); - else full_train(); + if (mconf.manthan_base == 0) { +#ifdef EXTRA_SYNTH + ManthanLearn learn(*this, conf, mconf); + learn.full_train(); +#else + cout << "ERROR: manthan_base is set to 2 but we are not in EXTRA_SYNTH mode!" << endl; + exit(EXIT_FAILURE); +#endif + } if (mconf.manthan_base == 1) { + const_functions(); + } else if (mconf.manthan_base == 2) { + bve_and_substitute(); + } post_order_vars(); // Counterexample-guided repair @@ -862,7 +847,7 @@ SimplifiedCNF Manthan::do_manthan(const uint32_t max_repairs) { sample ctx; const bool finished = get_counterexample(ctx); if (finished) break; - if (tot_repaired > max_repairs) { + if (tot_repaired > mconf.max_repairs) { print_stats("", COLRED, " Reached max repairs"); return cnf; } @@ -872,10 +857,15 @@ SimplifiedCNF Manthan::do_manthan(const uint32_t max_repairs) { SLOW_DEBUG_DO(assert(ctx_y_hat_correct(ctx))); const uint32_t old_needs_repair_size = needs_repair.size(); - if (mconf.do_maxsat_better_ctx == -1) { + if (mconf.maxsat_better_ctx == -1) { // Nothing to do - } else if (mconf.do_maxsat_better_ctx == 1) { + } else if (mconf.maxsat_better_ctx == 1) { + #ifdef EXTRA_SYNTH find_better_ctx_maxsat(ctx); + #else + cout << "ERROR: maxsat_better_ctx is set to 1 but we are not in EXTRA_SYNTH mode!" << endl; + exit(EXIT_FAILURE); + #endif } else { find_better_ctx_normal(ctx); } @@ -1050,7 +1040,7 @@ bool Manthan::find_conflict(const uint32_t y_rep, sample& ctx, vector& conf verb_print(2, "find_conflict sz: " << setw(5) << conflict.size() << " conflict: " << conflict); uint32_t orig_size = conflict.size(); const double minimize_start_time = cpuTime(); - if (conflict.size() > 1 && mconf.do_minimize_conflict) { + if (conflict.size() > 1 && mconf.minimize_conflict) { minimize_conflict(conflict, assumps, to_repair); assert(std::find(conflict.begin(), conflict.end(), to_repair) != conflict.end() && "to_repair literal must be in conflict"); @@ -1491,7 +1481,7 @@ void Manthan::pre_order_vars() { assert(order_val.empty()); assert(y_order.empty()); const double my_time = cpuTime(); - verb_print(2, "[manthan] Fixing order " << (mconf.manthan_bve ? "[BVE]" : "[LEARN]") << "..."); + verb_print(2, "[manthan] Fixing order " << (mconf.manthan_base ? "[BVE]" : "[LEARN]") << "..."); switch(mconf.manthan_order) { case 0: learn_order(); break; @@ -1590,6 +1580,7 @@ void Manthan::bve_order() { assert(y_order.size() == to_define_full.size()); } +#ifdef EXTRA_SYNTH void Manthan::find_better_ctx_maxsat(sample& ctx) { verb_print(2, "Finding better ctx via maxsat."); EvalMaxSAT s_ctx; @@ -1638,6 +1629,7 @@ void Manthan::find_better_ctx_maxsat(sample& ctx) { assert(s_ctx.getCost() > 0); for(const auto& v: to_define_full) ctx[v] = s_ctx.getValue(v+1) ? l_True : l_False; } +#endif // Fills needs_repair with vars from y (i.e. output) using normal SAT solver with assumptions void Manthan::find_better_ctx_normal(sample& ctx) { @@ -1780,7 +1772,7 @@ void Manthan::inject_formulas_into_solver() { vector cl2; for(const auto& l: cl.lits) { auto v = l.var(); - if (to_define_full.count(v)) { cl2.push_back(Lit(y_to_y_hat[v], l.sign()));} + if (to_define_full.count(v)) { cl2.push_back(Lit(y_to_y_hat.at(v), l.sign()));} else cl2.push_back(l); } cex_solver.add_clause(cl2); @@ -1798,7 +1790,7 @@ void Manthan::inject_formulas_into_solver() { assert(var_to_formula.count(y)); for(const auto& cl: var_to_formula[y].clauses) assert(cl.inserted && "All clauses must have been inserted"); const auto& form_out = var_to_formula[y].out; - const auto& y_hat = y_to_y_hat[y]; + const auto& y_hat = y_to_y_hat.at(y); y_hat_to_indic[y_hat] = ind; indic_to_y_hat[ind] = y_hat; @@ -1867,6 +1859,7 @@ bool Manthan::get_counterexample(sample& ctx) { verb_print(2, COLYEL "[manthan] *** Counterexample found ***"); ctx = cex_solver.get_model(); compute_needs_repair(ctx); + assert(!needs_repair.empty() && "If we found a counterexample, there must be something to repair!"); return false; } else { assert(ret == l_False); @@ -1875,51 +1868,6 @@ bool Manthan::get_counterexample(sample& ctx) { } } -FHolder::Formula Manthan::recur(mlpack::tree::DecisionTree<>* node, - const uint32_t learned_v, const vector& used_vars, uint32_t depth, uint32_t& max_depth) { - max_depth = std::max(max_depth, depth); - /* for(uint32_t i = 0; i < depth; i++) cout << " "; */ - if (node->NumChildren() == 0) { - const bool val = node->ClassProbabilities()[1] > node->ClassProbabilities()[0]; - /* cout << "Leaf: "; */ - /* for(uint32_t i = 0; i < node->NumClasses(); i++) { */ - /* cout << "class "<< i << " prob: " << node->ClassProbabilities()[i] << " --- "; */ - /* } */ - /* cout << endl; */ - return fh->constant_formula(val); - } else { - uint32_t v = node->SplitDimension(); - assert(v < used_vars.size()); - v = used_vars[v]; - /* cout << "(learning " << learned_v+1<< ") Node. v: " << v+1 << std::flush; */ - if (to_define_full.count(v)) { - // v does not depend on learned_v! - assert(dependency_mat[v][learned_v] == 0); - for(uint32_t i = 0; i < cnf.nVars(); i++) { - if (dependency_mat[v][i] == 1) { - if (input.count(i)) continue; - // nothing that v depends on can depend on learned_v - assert(dependency_mat[i][learned_v] == 0); - } - } - set_depends_on(learned_v, v); - v = y_to_y_hat.at(v); - } else { - // it's input, so no need to update dependency matrix - assert(input.count(v) && "If not to_define_full, must be input"); - } - - /* cout << " -- all-0 goes -> " << node->CalculateDirection(point_0); */ - /* cout << " -- all-1 goes -> " << node->CalculateDirection(point_1) << endl; */ - assert(node->NumChildren() == 2); - auto form_0 = recur(&node->Child(0), learned_v, used_vars, depth+1, max_depth); - auto form_1 = recur(&node->Child(1), learned_v, used_vars, depth+1, max_depth); - bool val_going_right = node->CalculateDirection(point_1); - return fh->compose_ite(form_0, form_1, Lit(v, val_going_right), helpers); - } - assert(false); -} - // Checks if flipping variable v in sample s satisfies all clauses vector Manthan::filter_samples(const uint32_t v, const vector& samples) { auto check_satisfied_all_cls_with_flip = [](const sample& s, const uint32_t v2, const vector*>& clause_ptrs) -> bool { @@ -1998,147 +1946,6 @@ void Manthan::sort_all_samples(vector& samples) { } } -double Manthan::train(const vector& orig_samples, const uint32_t v) { - verb_print(2, "training variable: " << v+1); - - vector used_vars(input.begin(), input.end()); - if (mconf.do_use_all_variables_as_features) { - if (!mconf.manthan_on_the_fly_order) { - for(const auto& y: y_order) { - if (y == v) break; - assert(dependency_mat[y][v] != 1); - used_vars.push_back(y); - } - } else { - auto reaches = [&](const uint32_t from, const uint32_t to) -> bool { - if (from == to) return true; - vector seen(cnf.nVars(), 0); - vector st; - st.push_back(from); - seen[from] = 1; - while(!st.empty()) { - const uint32_t cur = st.back(); - st.pop_back(); - for(uint32_t nxt = 0; nxt < cnf.nVars(); nxt++) { - if (dependency_mat[cur][nxt] == 0) continue; - if (nxt == to) return true; - if (seen[nxt]) continue; - seen[nxt] = 1; - st.push_back(nxt); - } - } - return false; - }; - for(const auto& y: to_define_full) { - if (y == v) continue; - if (reaches(y, v)) continue; - used_vars.push_back(y); - } - } - } - /* assert(!orig_samples.empty()); */ - vector samples; - if (mconf.do_filter_samples) samples = filter_samples(v, orig_samples); - else { - for(const auto& s: orig_samples) - samples.push_back(&s); - } - assert(v < cnf.nVars()); - point_0.zeros(cnf.nVars()); - point_1.zeros(cnf.nVars()); - arma::Mat dataset; - arma::Row labels; - - dataset.resize(used_vars.size(), samples.size()); - verb_print(2, "Dataset size: " << dataset.n_rows << " x " << dataset.n_cols); - - for(uint32_t i = 0; i < samples.size(); i++) { - assert(samples[i]->size() == cnf.nVars()); - for(uint32_t k = 0; k < used_vars.size(); k++) { - const uint32_t dep_v = used_vars[k]; - dataset(k, i) = lbool_to_bool((*samples[i])[dep_v]); - } - } - labels.resize(samples.size()); - for(uint32_t i = 0; i < samples.size(); i++) labels[i] = lbool_to_bool((*samples[i])[v]); - const auto num_ones = arma::accu(labels); - verb_print(2, "Labels distribution for v " << setw(5) << v+1 << ": " << setw(6) << num_ones << " ones and " - << setw(6) << (samples.size() - num_ones) << " zeros"); - double train_error; - if (samples.empty()) { - var_to_formula[v] = fh->constant_formula(true); - train_error = 0.0; - } else { - // Create the RandomForest object and train it on the training data. - // - // All Available Parameters to Reduce Overfitting: - /* 1. minimumLeafSize (default: 10) */ - /* - Minimum number of points in each leaf node */ - /* - Increase to reduce overfitting (e.g., 20, 50, 100) */ - /* 2. minimumGainSplit (default: 1e-7) */ - /* - Minimum gain required for a node to split */ - /* - Increase to reduce overfitting (e.g., 0.001, 0.01, 0.05) */ - /* - Must be in range (0, 1) */ - /* 3. maximumDepth (default: 0 = unlimited) */ - /* - Maximum depth of the tree */ - /* - Set a limit to reduce overfitting (e.g., 5, 10, 15) */ - /* 4. dimensionSelector (optional) */ - /* - Advanced: Controls which features to consider for splitting */ - /* - Can use custom strategies (usually leave as default) */ - - /* DecisionTree<> r(dataset, labels, 2); */ - // More conservative (less overfitting) - /* DecisionTree::DecisionTree( */ - /* MatType data, */ - /* LabelsType labels, */ - /* const size_t numClasses, */ - /* const size_t minimumLeafSize, */ - /* const double minimumGainSplit, */ - /* const size_t maximumDepth, */ - /* DimensionSelectionType dimensionSelector) */ - mlpack::DecisionTree<> r(dataset, labels, 2, - mconf.minimumLeafSize, // minimumLeafSize: require 20+ samples per leaf (default 10) - mconf.minGainSplit, // minimumGainSplit: require k ratio gain to split - mconf.maximumDepth); // maximumDepth: max k levels deep (0 = unlimited) - - // Compute and print the training error. - arma::Row predictions; - r.Classify(dataset, predictions); - train_error = arma::accu(predictions != labels) * 100.0 / (double)labels.n_elem; - /* r.serialize(cout, 1); */ - - verb_print(2,"[DEBUG] About to call recur for v " << v+1 << " num children: " << r.NumChildren()); - assert(var_to_formula.count(v) == 0); - uint32_t max_depth = 0; - var_to_formula[v] = recur(&r, v, used_vars, 0, max_depth); - verb_print(1, "Training error: " << setprecision(2) << setw(6) << train_error << "%." - << " depth: " << setw(6) << max_depth - << " ones: " << setprecision(0) << fixed << setw(5) << (double)num_ones/samples.size()*100.0 << "%" - << " on v: " << setprecision(2) << setw(4) << v+1); - } - - // Forward dependency update - for(uint32_t i = 0; i < cnf.nVars(); i++) { - if (input.count(i)) continue; - if (dependency_mat[i][v]) { - for(uint32_t j = 0; j < cnf.nVars(); j++) { - if (input.count(j)) continue; - dependency_mat[i][j] |= dependency_mat[v][j]; - } - SLOW_DEBUG_DO(assert(check_map_dependency_cycles())); - } - } - verb_print(2, "Trained formula for y " << v+1 << ":" << endl << var_to_formula[v]); - verb_print(2, "Done training variable: " << v+1); - verb_print(2, "------------------------------"); - - return train_error; -} - bool Manthan::has_dependency_cycle_dfs(const uint32_t node, vector& color, vector& path) const { color[node] = 1; // Mark as being processed (gray) path.push_back(node); diff --git a/src/manthan.h b/src/manthan.h index c889193..e99ea66 100644 --- a/src/manthan.h +++ b/src/manthan.h @@ -22,6 +22,8 @@ THE SOFTWARE. */ +#pragma once + #include "arjun.h" #include "config.h" #include "metasolver.h" @@ -35,12 +37,7 @@ #include #include #include "formula.h" -#include - - -/* #define MLPACK_PRINT_INFO */ -/* #define MLPACK_PRINT_WARN */ -#include +#include "treedecomp/TreeDecomposition.hpp" using namespace CMSat; @@ -54,6 +51,9 @@ using sample = vector; using namespace ArjunInt; using namespace ArjunNS; +namespace ArjunInt { +class ManthanLearn; + class Manthan { public: Manthan(const Config& _conf, const Arjun::ManthanConf& _mconf, const SimplifiedCNF& _cnf) : @@ -64,7 +64,8 @@ class Manthan { { mtrand.seed(42); } - SimplifiedCNF do_manthan(const uint32_t max_repairs); + SimplifiedCNF do_manthan(); + friend class ManthanLearn; private: // y is original output var, i.e. to_define @@ -96,14 +97,11 @@ class Manthan { set y_hats; // the potential y_hats (due to ITE chains, some are "old" and unused) std::unique_ptr build_primal_graph(); - void full_train(); + void const_functions(); void bve_and_substitute(); aig_ptr one_level_substitute(const Lit l, const uint32_t v, map& transformed); - arma::vec point_0; - arma::vec point_1; void create_vars_for_y_hats(); - FHolder::Formula recur(mlpack::tree::DecisionTree<>* node, const uint32_t learned_v, const vector& var_remap, uint32_t depth, uint32_t& max_depth); vector incidence; vector td_score; @@ -155,10 +153,8 @@ class Manthan { set_depends_on(a, b.var()); } - - bool verify_final_cnf(const SimplifiedCNF& fcnf) const; - vector get_cmsgen_samples(const uint32_t num_samples); - vector get_samples_ccnr(const uint32_t num_samples); + vector get_cmsgen_samples(const uint32_t samples); + vector get_samples_ccnr(const uint32_t samples); void sort_all_samples(vector& samples); double train(const vector& samples, const uint32_t v); // returns training error vector> dependency_mat; // dependency_mat[a][b] = 1 if a depends on b @@ -174,8 +170,15 @@ class Manthan { assert(val != l_Undef); return val == l_True; } + vector lits_to_ints(const vector& lits) { + vector ret; + ret.reserve(lits.size()); + for(const auto& l: lits) ret.push_back(lit_to_pl(l)); + return ret; + } // debug + bool verify_final_cnf(const SimplifiedCNF& fcnf) const; bool is_unsat(const vector& conflict, uint32_t y_rep, const sample& ctx) const; bool ctx_y_hat_correct(const sample& ctx) const; bool ctx_is_sat(const sample& ctx) const; @@ -204,3 +207,4 @@ class Manthan { SimplifiedCNF cnf; AIGManager aig_mng; }; +} diff --git a/src/manthan_learn.cpp b/src/manthan_learn.cpp new file mode 100644 index 0000000..5837d7a --- /dev/null +++ b/src/manthan_learn.cpp @@ -0,0 +1,225 @@ +/* + Arjun + + Copyright (c) 2020, Mate Soos and Kuldeep S. Meel. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +#include "manthan_learn.h" +#include "constants.h" +#include +#include +#include "time_mem.h" + +using std::setprecision; +using std::fixed; +using std::setw; +using namespace ArjunInt; + +void ManthanLearn::full_train() { + // Sampling + verb_print(1, "[manthan] Starting training. Manthan Config. " + << "do_filter_samples=" << mconf.filter_samples + << ", samples=" << mconf.samples + << ", minimumLeafSize=" << mconf.min_leaf_size + << ", minGainSplit=" << setprecision(6) << mconf.min_gain_split << setprecision(2) + << ", maximumDepth=" << mconf.max_depth); + double samp_start_time = cpuTime(); + vector samples = m.get_cmsgen_samples(mconf.samples); + { + vector samples2 = m.get_samples_ccnr(mconf.samples_ccnr); + samples.insert(samples.end(), samples2.begin(), samples2.end()); + } + m.sampl_time = cpuTime() - samp_start_time; + verb_print(1, COLYEL "[manthan] Got " << setw(8) << samples.size() << " samples." + << " samp/var: " << setw(8) << setprecision(2) << std::fixed << m.sampl_time/(double)m.to_define.size() + << " T: " << setprecision(2) << std::fixed << m.sampl_time); + m.sort_all_samples(samples); + + // Training -- updates depenndency_mat + const double train_start_time = cpuTime(); + for(const auto& v: m.y_order) { + if (m.backward_defined.count(v)) continue; + train(samples, v); + } + m.train_time = cpuTime() - train_start_time; + verb_print(1, COLYEL "[manthan] Training done." + << " funs: " << setw(6) << m.to_define.size() + << " fun/s: " << setw(6) << setprecision(2) << std::fixed << safe_div(m.to_define.size(), cpuTime() - train_start_time) + << " T: " << setw(6) << setprecision(2) << std::fixed << m.train_time + << " mem: " << memUsedTotal()/(1024.0*1024.0) << " MB"); + assert(m.check_map_dependency_cycles()); +} + +double ManthanLearn::train(const vector& orig_samples, const uint32_t v) { + verb_print(2, "training variable: " << v+1); + + vector used_vars(m.input.begin(), m.input.end()); + if (mconf.use_all_vars_as_feats) { + if (!mconf.manthan_on_the_fly_order) { + for(const auto& y: m.y_order) { + if (y == v) break; + assert(m.dependency_mat[y][v] != 1); + used_vars.push_back(y); + } + } else { + auto reaches = [&](const uint32_t from, const uint32_t to) -> bool { + if (from == to) return true; + vector seen(m.cnf.nVars(), 0); + vector st; + st.push_back(from); + seen[from] = 1; + while(!st.empty()) { + const uint32_t cur = st.back(); + st.pop_back(); + for(uint32_t nxt = 0; nxt < m.cnf.nVars(); nxt++) { + if (m.dependency_mat[cur][nxt] == 0) continue; + if (nxt == to) return true; + if (seen[nxt]) continue; + seen[nxt] = 1; + st.push_back(nxt); + } + } + return false; + }; + for(const auto& y: m.to_define_full) { + if (y == v) continue; + if (reaches(y, v)) continue; + used_vars.push_back(y); + } + } + } + /* assert(!orig_samples.empty()); */ + vector samples; + if (mconf.filter_samples) samples = m.filter_samples(v, orig_samples); + else { + for(const auto& s: orig_samples) + samples.push_back(&s); + } + assert(v < m.cnf.nVars()); + arma::Mat dataset; + arma::Row labels; + + dataset.resize(used_vars.size(), samples.size()); + verb_print(2, "Dataset size: " << dataset.n_rows << " x " << dataset.n_cols); + + for(uint32_t i = 0; i < samples.size(); i++) { + assert(samples[i]->size() == m.cnf.nVars()); + for(uint32_t k = 0; k < used_vars.size(); k++) { + const uint32_t dep_v = used_vars[k]; + dataset(k, i) = m.lbool_to_bool((*samples[i])[dep_v]); + } + } + labels.resize(samples.size()); + for(uint32_t i = 0; i < samples.size(); i++) labels[i] = m.lbool_to_bool((*samples[i])[v]); + const auto num_ones = arma::accu(labels); + verb_print(2, "Labels distribution for v " << setw(5) << v+1 << ": " << setw(6) << num_ones << " ones and " + << setw(6) << (samples.size() - num_ones) << " zeros"); + double train_error; + if (samples.empty()) { + m.var_to_formula[v] = m.fh->constant_formula(!mconf.inv_learnt); + train_error = 0.0; + } else { + // Create the RandomForest object and train it on the training data. + mlpack::DecisionTree<> r(dataset, labels, 2, + mconf.min_leaf_size, // minimumLeafSize: require 20+ samples per leaf (default 10) + mconf.min_gain_split, // minimumGainSplit: require k ratio gain to split + mconf.max_depth); // maximumDepth: max k levels deep (0 = unlimited) + + // Compute and print the training error. + arma::Row predictions; + r.Classify(dataset, predictions); + train_error = arma::accu(predictions != labels) * 100.0 / (double)labels.n_elem; + /* r.serialize(cout, 1); */ + + verb_print(2,"[DEBUG] About to call recur for v " << v+1 << " num children: " << r.NumChildren()); + assert(m.var_to_formula.count(v) == 0); + uint32_t max_depth = 0; + m.var_to_formula[v] = recur(&r, v, used_vars, 0, max_depth); + if (mconf.inv_learnt) + m.var_to_formula[v] = m.fh->neg(m.var_to_formula[v]); + verb_print(1, "Training error: " << setprecision(2) << setw(6) << train_error << "%." + << " depth: " << setw(6) << max_depth + << " ones: " << setprecision(0) << fixed << setw(5) << (double)num_ones/samples.size()*100.0 << "%" + << " on v: " << setprecision(2) << setw(4) << v+1); + } + + // Forward dependency update + for(uint32_t i = 0; i < m.cnf.nVars(); i++) { + if (m.input.count(i)) continue; + if (m.dependency_mat[i][v]) { + for(uint32_t j = 0; j < m.cnf.nVars(); j++) { + if (m.input.count(j)) continue; + m.dependency_mat[i][j] |= m.dependency_mat[v][j]; + } + SLOW_DEBUG_DO(assert(check_map_dependency_cycles())); + } + } + verb_print(2, "Trained formula for y " << v+1 << ":" << endl << m.var_to_formula[v]); + verb_print(2, "Done training variable: " << v+1); + verb_print(2, "------------------------------"); + + return train_error; +} + +FHolder::Formula ManthanLearn::recur(mlpack::tree::DecisionTree<>* node, + const uint32_t learned_v, const vector& used_vars, uint32_t depth, uint32_t& max_depth) { + max_depth = std::max(max_depth, depth); + /* for(uint32_t i = 0; i < depth; i++) cout << " "; */ + if (node->NumChildren() == 0) { + const bool val = node->ClassProbabilities()[1] > node->ClassProbabilities()[0]; + /* cout << "Leaf: "; */ + /* for(uint32_t i = 0; i < node->NumClasses(); i++) { */ + /* cout << "class "<< i << " prob: " << node->ClassProbabilities()[i] << " --- "; */ + /* } */ + /* cout << endl; */ + return m.fh->constant_formula(val); + } else { + uint32_t v = node->SplitDimension(); + assert(v < used_vars.size()); + v = used_vars[v]; + /* cout << "(learning " << learned_v+1<< ") Node. v: " << v+1 << std::flush; */ + if (m.to_define_full.count(v)) { + // v does not depend on learned_v! + assert(m.dependency_mat[v][learned_v] == 0); + for(uint32_t i = 0; i < m.cnf.nVars(); i++) { + if (m.dependency_mat[v][i] == 1) { + if (m.input.count(i)) continue; + // nothing that v depends on can depend on learned_v + assert(m.dependency_mat[i][learned_v] == 0); + } + } + m.set_depends_on(learned_v, v); + v = m.y_to_y_hat.at(v); + } else { + // it's input, so no need to update dependency matrix + assert(m.input.count(v) && "If not to_define_full, must be input"); + } + + /* cout << " -- all-0 goes -> " << node->CalculateDirection(point_0); */ + /* cout << " -- all-1 goes -> " << node->CalculateDirection(point_1) << endl; */ + assert(node->NumChildren() == 2); + auto form_0 = recur(&node->Child(0), learned_v, used_vars, depth+1, max_depth); + auto form_1 = recur(&node->Child(1), learned_v, used_vars, depth+1, max_depth); + bool val_going_right = node->CalculateDirection(point_1); + return m.fh->compose_ite(form_0, form_1, Lit(v, val_going_right), m.helpers); + } + assert(false); +} diff --git a/src/manthan_learn.h b/src/manthan_learn.h new file mode 100644 index 0000000..bd64c57 --- /dev/null +++ b/src/manthan_learn.h @@ -0,0 +1,63 @@ +/* + Arjun + + Copyright (c) 2020, Mate Soos and Kuldeep S. Meel. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +#pragma once + +#include "manthan.h" +#include +#include +#include + +using std::vector; +using std::uint32_t; +using std::string; +using sample = vector; + +// These ask mlpack to give more info & warnings +//#define MLPACK_PRINT_INFO +//#define MLPACK_PRINT_WARN +#include + +namespace ArjunInt { + +class ManthanLearn { +public: + ManthanLearn(Manthan& _manthan, const Config& _conf, const ArjunNS::Arjun::ManthanConf& _mconf) : + m(_manthan), conf(_conf), mconf(_mconf) { + point_0.zeros(m.cnf.nVars()); + point_1.zeros(m.cnf.nVars()); + } + void full_train(); +private: + double train(const vector& orig_samples, const uint32_t v); + FHolder::Formula recur( + mlpack::tree::DecisionTree<>* node, const uint32_t learned_v, const vector& var_remap, uint32_t depth, uint32_t& max_depth); + arma::vec point_0; + arma::vec point_1; + + Manthan& m; + const Config& conf; + const ArjunNS::Arjun::ManthanConf& mconf; +}; +} diff --git a/src/metasolver.h b/src/metasolver.h index 920b8ac..f01fc4d 100644 --- a/src/metasolver.h +++ b/src/metasolver.h @@ -48,10 +48,12 @@ class MetaSolver { if (solver_type == SolverType::cms) { cms = std::make_unique(); cms->set_prefix("c o "); - } else { + } else if (solver_type == SolverType::cadical) { cadical = std::make_unique(); cadical_nvars = 0; cadical->prefix("c o "); + } else { + throw std::invalid_argument("Unsupported solver type"); } } diff --git a/src/puura.h b/src/puura.h index 2d12ec6..2e32d80 100644 --- a/src/puura.h +++ b/src/puura.h @@ -40,6 +40,8 @@ using namespace ArjunNS; using std::vector; using std::set; +namespace ArjunInt { + class Puura { public: Puura(const Config& _conf); @@ -75,3 +77,4 @@ class Puura { set sampl_set; vector dont_elim; }; +} diff --git a/src/synth.cpp b/src/synth.cpp new file mode 100644 index 0000000..fe2b6b8 --- /dev/null +++ b/src/synth.cpp @@ -0,0 +1,267 @@ +/* + Arjun + + Copyright (c) 2019-2020, Mate Soos and Kuldeep S. Meel. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#include "synth.h" + +#include +#include +#include +#include +#include +#include "constants.h" + +using std::string; +using std::vector; +using std::cout; +using std::endl; + +namespace { +using MC = ArjunNS::Arjun::ManthanConf; + +template T parse_val(const string& s) { + T v{}; std::from_chars(s.data(), s.data() + s.size(), v); return v; +} +template<> double parse_val(const string& s) { + return std::stod(s); +} + +template bool validate_val(const string& v) { + T x; auto [p,e] = std::from_chars(v.data(), v.data()+v.size(), x); + return e == std::errc{} && p == v.data()+v.size(); +} +template<> bool validate_val(const string& v) { + try { size_t pos; std::stod(v, &pos); return pos == v.size(); } + catch (...) { return false; } +} + +using PT = SynthRunner::ParamType; +struct ParamDef { PT type; std::function setter; }; + +const std::map param_table = { + {"max_repairs", {PT::UInt, [](MC& c, const string& v) { c.max_repairs = parse_val(v); }}}, + {"samples", {PT::UInt, [](MC& c, const string& v) { c.samples = parse_val(v); }}}, + {"samples_ccnr", {PT::UInt, [](MC& c, const string& v) { c.samples_ccnr = parse_val(v); }}}, + {"min_gain_split", {PT::Double, [](MC& c, const string& v) { c.min_gain_split = parse_val(v); }}}, + {"max_depth", {PT::UInt, [](MC& c, const string& v) { c.max_depth = parse_val(v); }}}, + {"sampler_fixed_conflicts", {PT::UInt, [](MC& c, const string& v) { c.sampler_fixed_conflicts = parse_val(v); }}}, + {"min_leaf_size", {PT::UInt, [](MC& c, const string& v) { c.min_leaf_size = parse_val(v); }}}, + {"filter_samples", {PT::Int, [](MC& c, const string& v) { c.filter_samples = parse_val(v); }}}, + {"biased_sampling", {PT::Int, [](MC& c, const string& v) { c.biased_sampling = parse_val(v); }}}, + {"minimize_conflict", {PT::Int, [](MC& c, const string& v) { c.minimize_conflict = parse_val(v); }}}, + {"simplify_every", {PT::UInt, [](MC& c, const string& v) { c.simplify_every = parse_val(v); }}}, + {"maxsat_better_ctx", {PT::Int, [](MC& c, const string& v) { c.maxsat_better_ctx = parse_val(v); }}}, + {"maxsat_order", {PT::Int, [](MC& c, const string& v) { c.maxsat_order = parse_val(v); }}}, + {"use_all_vars_as_feats", {PT::Int, [](MC& c, const string& v) { c.use_all_vars_as_feats = parse_val(v); }}}, + {"ctx_solver_type", {PT::Int, [](MC& c, const string& v) { c.ctx_solver_type = parse_val(v); }}}, + {"repair_solver_type", {PT::Int, [](MC& c, const string& v) { c.repair_solver_type = parse_val(v); }}}, + {"repair_cache_size", {PT::Int, [](MC& c, const string& v) { c.repair_cache_size = parse_val(v); }}}, + {"backward_synth_order", {PT::Int, [](MC& c, const string& v) { c.backward_synth_order = parse_val(v); }}}, + {"manthan_order", {PT::Int, [](MC& c, const string& v) { c.manthan_order = parse_val(v); }}}, + {"manthan_on_the_fly_order", {PT::Int, [](MC& c, const string& v) { c.manthan_on_the_fly_order = parse_val(v); }}}, + {"one_repair_per_loop", {PT::Int, [](MC& c, const string& v) { c.one_repair_per_loop = parse_val(v); }}}, + {"force_bw_equal", {PT::Int, [](MC& c, const string& v) { c.force_bw_equal = parse_val(v); }}}, + {"bva_xor_vars", {PT::Int, [](MC& c, const string& v) { c.bva_xor_vars = parse_val(v); }}}, + {"silent_var_update", {PT::Int, [](MC& c, const string& v) { c.silent_var_update = parse_val(v); }}}, + {"inv_learnt", {PT::Int, [](MC& c, const string& v) { c.inv_learnt = parse_val(v); }}}, +}; +} // namespace + +SynthRunner::SynthRunner(const ArjunInt::Config& _conf, std::unique_ptr& _arjun) : + conf(_conf), arjun(_arjun) {} + +string SynthRunner::trim(const string& s) { + string result; + for (char c : s) { + if (isalnum(c) || c == '.' || c == ',' || c == '=' || c == '(' || c == ')' || c == ' ' || c == '_') + result += c; + else { + cout << "ERROR: invalid character '" << c << "' in strategy string: " << s << endl; + exit(EXIT_FAILURE); + } + } + auto start = result.find_first_not_of(' '); + if (start == string::npos) return ""; + auto end = result.find_last_not_of(' '); + return result.substr(start, end - start + 1); +} + +// Split s by commas at parenthesis +vector SynthRunner::split_top_level(const string& s) { + vector parts; + int depth = 0; + string current; + for (char c : s) { + if (c == '(') { + depth++; + if (depth > 1) { + cout << "ERROR: nested parentheses are not allowed in strategy string: " << s << endl; + exit(EXIT_FAILURE); + } + } else if (c == ')') { + depth--; + if (depth < 0) { + cout << "ERROR: unmatched ')' in strategy string: " << s << endl; + exit(EXIT_FAILURE); + } + } + if (c == ',' && depth == 0) { + parts.push_back(trim(current)); + current.clear(); + } else { + current += c; + } + } + if (!trim(current).empty()) parts.push_back(trim(current)); + return parts; +} + +bool SynthRunner::validate_param_value(ParamType type, const string& v) { + switch (type) { + case ParamType::UInt: return validate_val(v); + case ParamType::Int: return validate_val(v); + case ParamType::Double: return validate_val(v); + } + return false; +} + +SynthStrategy SynthRunner::parse_one_strategy(const string& raw) { + SynthStrategy strat; + string s = trim(raw); + strat.raw = s; + + auto paren_pos = s.find('('); + if (paren_pos == string::npos) { + strat.type = s; + } else { + strat.type = trim(s.substr(0, paren_pos)); + auto rparen = s.rfind(')'); + if (rparen == string::npos) { + cout << "ERROR: missing closing ')' in strategy: " << raw << endl; + exit(EXIT_FAILURE); + } + string params_str = s.substr(paren_pos + 1, rparen - paren_pos - 1); + for (const auto& param : split_top_level(params_str)) { + if (param.empty()) continue; + auto eq = param.find('='); + if (eq == string::npos) { + cout << "ERROR: missing '=' in strategy param: " << param << endl; + exit(EXIT_FAILURE); + } + string k = trim(param.substr(0, eq)); + string v = trim(param.substr(eq + 1)); + + auto it = param_table.find(k); + if (it == param_table.end()) { + cout << "ERROR: unknown strategy parameter '" << k << "'" << endl; + exit(EXIT_FAILURE); + } + + if (!validate_param_value(it->second.type, v)) { + cout << "ERROR: cannot parse value '" << v + << "' for parameter '" << k << "'" << endl; + exit(EXIT_FAILURE); + } + + strat.overrides[k] = v; + } + } + + if (strat.type != "learn" && strat.type != "bve" && strat.type != "const") { + cout << "ERROR: unknown strategy type '" << strat.type << "'. Use 'learn' or 'bve'." << endl; + exit(EXIT_FAILURE); + } + return strat; +} + +vector SynthRunner::parse_mstrategy(const string& s) { + vector strategies; + for (const auto& tok : split_top_level(s)) { + if (!tok.empty()) strategies.push_back(parse_one_strategy(tok)); + } + if (strategies.empty()) { + cout << "ERROR: --mstrategy string is empty" << endl; + exit(EXIT_FAILURE); + } + return strategies; +} + +ArjunNS::Arjun::ManthanConf SynthRunner::apply_strategy(const ArjunNS::Arjun::ManthanConf& base, + const SynthStrategy& strat) { + auto mconf = base; + if (strat.type == "learn") { +#ifndef EXTRA_SYNTH + cout << "ERROR: strategy type 'learn' is only supported in EXTRA_SYNTH mode!" << endl; + exit(EXIT_FAILURE); +#endif + mconf.manthan_base = 0; + } else if (strat.type == "const") { + mconf.manthan_base = 1; + } else if (strat.type == "bve") { + mconf.manthan_base = 2; + } else { + cout << "ERROR: unknown strategy type '" << strat.type << "'" << endl; + exit(EXIT_FAILURE); + } + + for (const auto& [k, v] : strat.overrides) + param_table.at(k).setter(mconf, v); + return mconf; +} + +void SynthRunner::run_manthan_strategies( + ArjunNS::SimplifiedCNF& cnf, + const ArjunNS::Arjun::ManthanConf& mconf_orig, + const vector& strategies) +{ + if (strategies.empty()) { + cout << "ERROR: no strategies to run" << endl; + exit(EXIT_FAILURE); + } + for (size_t i = 0; i + 1 < strategies.size(); i++) { + if (strategies[i].overrides.count("max_repairs") == 0) { + cout << "ERROR: strategy " << i+1 << " (" << strategies[i].raw + << ") must set max_repairs (only the last strategy may omit it)" << endl; + exit(EXIT_FAILURE); + } + } + if (cnf.synth_done()) return; + for (size_t i = 0; i < strategies.size(); i++) { + const auto& strat = strategies[i]; + const bool is_last = (i == strategies.size() - 1); + + auto mconf = apply_strategy(mconf_orig, strat); + if (is_last && strat.overrides.count("max_repairs") == 0) + mconf.max_repairs = std::numeric_limits::max(); + + verb_print(1, "Running Manthan strategy " << i+1 << "/" << strategies.size() + << " -- " << strat.raw << " with max_repairs=" + << (mconf.max_repairs ? std::string("unlimited") : std::to_string(mconf.max_repairs))); + cnf = arjun->standalone_manthan(cnf, mconf); + if (cnf.synth_done()) { + verb_print(1,"Manthan finished with strategy " << i << "/" << strategies.size()-1 + << " -- " << strat.raw); + break; + } + } +} diff --git a/src/synth.h b/src/synth.h new file mode 100644 index 0000000..e537bb6 --- /dev/null +++ b/src/synth.h @@ -0,0 +1,69 @@ +/* + Arjun + + Copyright (c) 2019-2020, Mate Soos and Kuldeep S. Meel. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include +#include "arjun.h" +#include "config.h" + +struct SynthStrategy { + std::string type; // "learn" or "bve" + std::string raw; // original strategy string as parsed + std::map overrides; // param name -> string value +}; + +class SynthRunner { +public: + enum class ParamType { UInt, Int, Double }; + + SynthRunner(const ArjunInt::Config& conf, std::unique_ptr& arjun); + + // Parse a comma-separated strategy string, e.g.: + // "learn(samples=1,max_repairs=4), learn(max_repairs=20), bve" + // The last strategy always runs with unlimited tries. + std::vector parse_mstrategy(const std::string& s); + + // Run strategies in sequence on cnf. Skips finished strategies if cnf.synth_done(). + // Each non-last strategy runs for 20*max_repairs tries; the last runs unlimited. + void run_manthan_strategies( + ArjunNS::SimplifiedCNF& cnf, + const ArjunNS::Arjun::ManthanConf& mconf_orig, + const std::vector& strategies); + +private: + const ArjunInt::Config& conf; + std::unique_ptr& arjun; + + static std::string trim(const std::string& s); + static std::vector split_top_level(const std::string& s); + static bool validate_param_value(ParamType type, const std::string& v); + SynthStrategy parse_one_strategy(const std::string& raw); + ArjunNS::Arjun::ManthanConf apply_strategy( + const ArjunNS::Arjun::ManthanConf& base, + const SynthStrategy& strat); +}; diff --git a/src/test-synth.cpp b/src/test-synth.cpp index 7c93987..68a4ffd 100644 --- a/src/test-synth.cpp +++ b/src/test-synth.cpp @@ -37,6 +37,8 @@ #include #include +using namespace ArjunInt; + #define COLDEF "\033[0m" #define verb_print(a, x) \