Skip to content

Commit 7aa69a2

Browse files
authored
Specialized function dispatchers for very simple functions (#944)
Nanobind previously distinguished between a "complex" and a "simple" function dispatcher. This PR adds variants of the simple dispatcher that further specializes to 0 and 1-argument functions without overloads. One common class of functions that benefits are property getters. The speedup is pretty small (~2%), but we will take it :-).
1 parent c4a10ea commit 7aa69a2

File tree

1 file changed

+116
-2
lines changed

1 file changed

+116
-2
lines changed

src/nb_func.cpp

Lines changed: 116 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ NAMESPACE_BEGIN(detail)
2929
// Forward/external declarations
3030
extern Buffer buf;
3131

32+
static PyObject *nb_func_vectorcall_simple_0(PyObject *, PyObject *const *,
33+
size_t, PyObject *) noexcept;
34+
static PyObject *nb_func_vectorcall_simple_1(PyObject *, PyObject *const *,
35+
size_t, PyObject *) noexcept;
3236
static PyObject *nb_func_vectorcall_simple(PyObject *, PyObject *const *,
3337
size_t, PyObject *) noexcept;
3438
static PyObject *nb_func_vectorcall_complex(PyObject *, PyObject *const *,
@@ -335,8 +339,20 @@ PyObject *nb_func_new(const void *in_) noexcept {
335339

336340
func->max_nargs = max_nargs;
337341
func->complex_call = complex_call;
338-
func->vectorcall = complex_call ? nb_func_vectorcall_complex
339-
: nb_func_vectorcall_simple;
342+
343+
344+
PyObject* (*vectorcall)(PyObject *, PyObject * const*, size_t, PyObject *);
345+
if (complex_call) {
346+
vectorcall = nb_func_vectorcall_complex;
347+
} else {
348+
if (f->nargs == 0 && !prev_overloads)
349+
vectorcall = nb_func_vectorcall_simple_0;
350+
else if (f->nargs == 1 && !prev_overloads)
351+
vectorcall = nb_func_vectorcall_simple_1;
352+
else
353+
vectorcall = nb_func_vectorcall_simple;
354+
}
355+
func->vectorcall = vectorcall;
340356

341357
#if !defined(NB_FREE_THREADED)
342358
// Register the function
@@ -954,6 +970,104 @@ static PyObject *nb_func_vectorcall_simple(PyObject *self,
954970
return result;
955971
}
956972

973+
/// Simplified nb_func_vectorcall variant for non-overloaded functions with 0 args
974+
static PyObject *nb_func_vectorcall_simple_0(PyObject *self,
975+
PyObject *const *args_in,
976+
size_t nargsf,
977+
PyObject *kwargs_in) noexcept {
978+
func_data *fr = nb_func_data(self);
979+
const size_t nargs_in = (size_t) NB_VECTORCALL_NARGS(nargsf);
980+
981+
// Handler routine that will be invoked in case of an error condition
982+
PyObject *(*error_handler)(PyObject *, PyObject *const *, size_t,
983+
PyObject *) noexcept = nullptr;
984+
985+
PyObject *result = nullptr;
986+
987+
if (kwargs_in == nullptr && nargs_in == 0) {
988+
try {
989+
result = fr->impl((void *) fr->capture, (PyObject **) args_in,
990+
nullptr, (rv_policy) (fr->flags & 0b111), nullptr);
991+
if (result == NB_NEXT_OVERLOAD)
992+
error_handler = nb_func_error_overload;
993+
else if (!result)
994+
error_handler = nb_func_error_noconvert;
995+
} catch (builtin_exception &e) {
996+
if (!set_builtin_exception_status(e))
997+
error_handler = nb_func_error_overload;
998+
} catch (python_error &e) {
999+
e.restore();
1000+
} catch (...) {
1001+
nb_func_convert_cpp_exception();
1002+
}
1003+
} else {
1004+
error_handler = nb_func_error_overload;
1005+
}
1006+
1007+
if (NB_UNLIKELY(error_handler))
1008+
result = error_handler(self, args_in, nargs_in, kwargs_in);
1009+
1010+
return result;
1011+
}
1012+
1013+
/// Simplified nb_func_vectorcall variant for non-overloaded functions with 1 arg
1014+
static PyObject *nb_func_vectorcall_simple_1(PyObject *self,
1015+
PyObject *const *args_in,
1016+
size_t nargsf,
1017+
PyObject *kwargs_in) noexcept {
1018+
func_data *fr = nb_func_data(self);
1019+
const size_t nargs_in = (size_t) NB_VECTORCALL_NARGS(nargsf);
1020+
bool is_constructor = fr->flags & (uint32_t) func_flags::is_constructor;
1021+
1022+
// Handler routine that will be invoked in case of an error condition
1023+
PyObject *(*error_handler)(PyObject *, PyObject *const *, size_t,
1024+
PyObject *) noexcept = nullptr;
1025+
1026+
PyObject *result = nullptr;
1027+
1028+
if (kwargs_in == nullptr && nargs_in == 1 && args_in[0] != Py_None) {
1029+
PyObject *arg = args_in[0];
1030+
cleanup_list cleanup(arg);
1031+
uint8_t args_flags[1] = {
1032+
(uint8_t) (is_constructor ? (1 | (uint8_t) cast_flags::construct) : 1)
1033+
};
1034+
1035+
try {
1036+
result = fr->impl((void *) fr->capture, (PyObject **) args_in,
1037+
args_flags, (rv_policy) (fr->flags & 0b111), &cleanup);
1038+
if (result == NB_NEXT_OVERLOAD) {
1039+
error_handler = nb_func_error_overload;
1040+
} else if (!result) {
1041+
error_handler = nb_func_error_noconvert;
1042+
} else if (is_constructor) {
1043+
nb_inst *arg_nb = (nb_inst *) arg;
1044+
arg_nb->destruct = true;
1045+
arg_nb->state = nb_inst::state_ready;
1046+
if (NB_UNLIKELY(arg_nb->intrusive))
1047+
nb_type_data(Py_TYPE(arg))
1048+
->set_self_py(inst_ptr(arg_nb), arg);
1049+
}
1050+
} catch (builtin_exception &e) {
1051+
if (!set_builtin_exception_status(e))
1052+
error_handler = nb_func_error_overload;
1053+
} catch (python_error &e) {
1054+
e.restore();
1055+
} catch (...) {
1056+
nb_func_convert_cpp_exception();
1057+
}
1058+
1059+
if (NB_UNLIKELY(cleanup.used()))
1060+
cleanup.release();
1061+
} else {
1062+
error_handler = nb_func_error_overload;
1063+
}
1064+
1065+
if (NB_UNLIKELY(error_handler))
1066+
result = error_handler(self, args_in, nargs_in, kwargs_in);
1067+
1068+
return result;
1069+
}
1070+
9571071
static PyObject *nb_bound_method_vectorcall(PyObject *self,
9581072
PyObject *const *args_in,
9591073
size_t nargsf,

0 commit comments

Comments
 (0)