@@ -728,6 +728,33 @@ void scx_idle_disable(void)
728
728
/********************************************************************************
729
729
* Helpers that can be called from the BPF scheduler.
730
730
*/
731
+
732
+ static int validate_node (int node )
733
+ {
734
+ if (!static_branch_likely (& scx_builtin_idle_per_node )) {
735
+ scx_ops_error ("per-node idle tracking is disabled" );
736
+ return - EOPNOTSUPP ;
737
+ }
738
+
739
+ /* Return no entry for NUMA_NO_NODE (not a critical scx error) */
740
+ if (node == NUMA_NO_NODE )
741
+ return - ENOENT ;
742
+
743
+ /* Make sure node is in a valid range */
744
+ if (node < 0 || node >= nr_node_ids ) {
745
+ scx_ops_error ("invalid node %d" , node );
746
+ return - EINVAL ;
747
+ }
748
+
749
+ /* Make sure the node is part of the set of possible nodes */
750
+ if (!node_possible (node )) {
751
+ scx_ops_error ("unavailable node %d" , node );
752
+ return - EINVAL ;
753
+ }
754
+
755
+ return node ;
756
+ }
757
+
731
758
__bpf_kfunc_start_defs ();
732
759
733
760
static bool check_builtin_idle_enabled (void )
@@ -739,6 +766,23 @@ static bool check_builtin_idle_enabled(void)
739
766
return false;
740
767
}
741
768
769
+ /**
770
+ * scx_bpf_cpu_node - Return the NUMA node the given @cpu belongs to, or
771
+ * trigger an error if @cpu is invalid
772
+ * @cpu: target CPU
773
+ */
774
+ __bpf_kfunc int scx_bpf_cpu_node (s32 cpu )
775
+ {
776
+ #ifdef CONFIG_NUMA
777
+ if (!ops_cpu_valid (cpu , NULL ))
778
+ return NUMA_NO_NODE ;
779
+
780
+ return cpu_to_node (cpu );
781
+ #else
782
+ return 0 ;
783
+ #endif
784
+ }
785
+
742
786
/**
743
787
* scx_bpf_select_cpu_dfl - The default implementation of ops.select_cpu()
744
788
* @p: task_struct to select a CPU for
@@ -771,6 +815,28 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
771
815
return prev_cpu ;
772
816
}
773
817
818
+ /**
819
+ * scx_bpf_get_idle_cpumask_node - Get a referenced kptr to the
820
+ * idle-tracking per-CPU cpumask of a target NUMA node.
821
+ * @node: target NUMA node
822
+ *
823
+ * Returns an empty cpumask if idle tracking is not enabled, if @node is
824
+ * not valid, or running on a UP kernel. In this case the actual error will
825
+ * be reported to the BPF scheduler via scx_ops_error().
826
+ */
827
+ __bpf_kfunc const struct cpumask * scx_bpf_get_idle_cpumask_node (int node )
828
+ {
829
+ node = validate_node (node );
830
+ if (node < 0 )
831
+ return cpu_none_mask ;
832
+
833
+ #ifdef CONFIG_SMP
834
+ return idle_cpumask (node )-> cpu ;
835
+ #else
836
+ return cpu_none_mask ;
837
+ #endif
838
+ }
839
+
774
840
/**
775
841
* scx_bpf_get_idle_cpumask - Get a referenced kptr to the idle-tracking
776
842
* per-CPU cpumask.
@@ -795,6 +861,32 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void)
795
861
#endif
796
862
}
797
863
864
+ /**
865
+ * scx_bpf_get_idle_smtmask_node - Get a referenced kptr to the
866
+ * idle-tracking, per-physical-core cpumask of a target NUMA node. Can be
867
+ * used to determine if an entire physical core is free.
868
+ * @node: target NUMA node
869
+ *
870
+ * Returns an empty cpumask if idle tracking is not enabled, if @node is
871
+ * not valid, or running on a UP kernel. In this case the actual error will
872
+ * be reported to the BPF scheduler via scx_ops_error().
873
+ */
874
+ __bpf_kfunc const struct cpumask * scx_bpf_get_idle_smtmask_node (int node )
875
+ {
876
+ node = validate_node (node );
877
+ if (node < 0 )
878
+ return cpu_none_mask ;
879
+
880
+ #ifdef CONFIG_SMP
881
+ if (sched_smt_active ())
882
+ return idle_cpumask (node )-> smt ;
883
+ else
884
+ return idle_cpumask (node )-> cpu ;
885
+ #else
886
+ return cpu_none_mask ;
887
+ #endif
888
+ }
889
+
798
890
/**
799
891
* scx_bpf_get_idle_smtmask - Get a referenced kptr to the idle-tracking,
800
892
* per-physical-core cpumask. Can be used to determine if an entire physical
@@ -859,6 +951,35 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu)
859
951
return false;
860
952
}
861
953
954
+ /**
955
+ * scx_bpf_pick_idle_cpu_node - Pick and claim an idle cpu from @node
956
+ * @cpus_allowed: Allowed cpumask
957
+ * @node: target NUMA node
958
+ * @flags: %SCX_PICK_IDLE_* flags
959
+ *
960
+ * Pick and claim an idle cpu in @cpus_allowed from the NUMA node @node.
961
+ *
962
+ * Returns the picked idle cpu number on success, or -%EBUSY if no matching
963
+ * cpu was found.
964
+ *
965
+ * The search starts from @node and proceeds to other online NUMA nodes in
966
+ * order of increasing distance (unless SCX_PICK_IDLE_IN_NODE is specified,
967
+ * in which case the search is limited to the target @node).
968
+ *
969
+ * Always returns an error if ops.update_idle() is implemented and
970
+ * %SCX_OPS_KEEP_BUILTIN_IDLE is not set, or if
971
+ * %SCX_OPS_BUILTIN_IDLE_PER_NODE is not set.
972
+ */
973
+ __bpf_kfunc s32 scx_bpf_pick_idle_cpu_node (const struct cpumask * cpus_allowed ,
974
+ int node , u64 flags )
975
+ {
976
+ node = validate_node (node );
977
+ if (node < 0 )
978
+ return node ;
979
+
980
+ return scx_pick_idle_cpu (cpus_allowed , node , flags );
981
+ }
982
+
862
983
/**
863
984
* scx_bpf_pick_idle_cpu - Pick and claim an idle cpu
864
985
* @cpus_allowed: Allowed cpumask
@@ -877,16 +998,64 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu)
877
998
*
878
999
* Unavailable if ops.update_idle() is implemented and
879
1000
* %SCX_OPS_KEEP_BUILTIN_IDLE is not set.
1001
+ *
1002
+ * Always returns an error if %SCX_OPS_BUILTIN_IDLE_PER_NODE is set, use
1003
+ * scx_bpf_pick_idle_cpu_node() instead.
880
1004
*/
881
1005
__bpf_kfunc s32 scx_bpf_pick_idle_cpu (const struct cpumask * cpus_allowed ,
882
1006
u64 flags )
883
1007
{
1008
+ if (static_branch_maybe (CONFIG_NUMA , & scx_builtin_idle_per_node )) {
1009
+ scx_ops_error ("per-node idle tracking is enabled" );
1010
+ return - EBUSY ;
1011
+ }
1012
+
884
1013
if (!check_builtin_idle_enabled ())
885
1014
return - EBUSY ;
886
1015
887
1016
return scx_pick_idle_cpu (cpus_allowed , NUMA_NO_NODE , flags );
888
1017
}
889
1018
1019
+ /**
1020
+ * scx_bpf_pick_any_cpu_node - Pick and claim an idle cpu if available
1021
+ * or pick any CPU from @node
1022
+ * @cpus_allowed: Allowed cpumask
1023
+ * @node: target NUMA node
1024
+ * @flags: %SCX_PICK_IDLE_CPU_* flags
1025
+ *
1026
+ * Pick and claim an idle cpu in @cpus_allowed. If none is available, pick any
1027
+ * CPU in @cpus_allowed. Guaranteed to succeed and returns the picked idle cpu
1028
+ * number if @cpus_allowed is not empty. -%EBUSY is returned if @cpus_allowed is
1029
+ * empty.
1030
+ *
1031
+ * The search starts from @node and proceeds to other online NUMA nodes in
1032
+ * order of increasing distance (unless SCX_PICK_IDLE_IN_NODE is specified,
1033
+ * in which case the search is limited to the target @node).
1034
+ *
1035
+ * If ops.update_idle() is implemented and %SCX_OPS_KEEP_BUILTIN_IDLE is not
1036
+ * set, this function can't tell which CPUs are idle and will always pick any
1037
+ * CPU.
1038
+ */
1039
+ __bpf_kfunc s32 scx_bpf_pick_any_cpu_node (const struct cpumask * cpus_allowed ,
1040
+ int node , u64 flags )
1041
+ {
1042
+ s32 cpu ;
1043
+
1044
+ node = validate_node (node );
1045
+ if (node < 0 )
1046
+ return node ;
1047
+
1048
+ cpu = scx_pick_idle_cpu (cpus_allowed , node , flags );
1049
+ if (cpu >= 0 )
1050
+ return cpu ;
1051
+
1052
+ cpu = cpumask_any_distribute (cpus_allowed );
1053
+ if (cpu < nr_cpu_ids )
1054
+ return cpu ;
1055
+ else
1056
+ return - EBUSY ;
1057
+ }
1058
+
890
1059
/**
891
1060
* scx_bpf_pick_any_cpu - Pick and claim an idle cpu if available or pick any CPU
892
1061
* @cpus_allowed: Allowed cpumask
@@ -900,12 +1069,20 @@ __bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed,
900
1069
* If ops.update_idle() is implemented and %SCX_OPS_KEEP_BUILTIN_IDLE is not
901
1070
* set, this function can't tell which CPUs are idle and will always pick any
902
1071
* CPU.
1072
+ *
1073
+ * Always returns an error if %SCX_OPS_BUILTIN_IDLE_PER_NODE is set, use
1074
+ * scx_bpf_pick_any_cpu_node() instead.
903
1075
*/
904
1076
__bpf_kfunc s32 scx_bpf_pick_any_cpu (const struct cpumask * cpus_allowed ,
905
1077
u64 flags )
906
1078
{
907
1079
s32 cpu ;
908
1080
1081
+ if (static_branch_maybe (CONFIG_NUMA , & scx_builtin_idle_per_node )) {
1082
+ scx_ops_error ("per-node idle tracking is enabled" );
1083
+ return - EBUSY ;
1084
+ }
1085
+
909
1086
if (static_branch_likely (& scx_builtin_idle_enabled )) {
910
1087
cpu = scx_pick_idle_cpu (cpus_allowed , NUMA_NO_NODE , flags );
911
1088
if (cpu >= 0 )
@@ -922,11 +1099,16 @@ __bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed,
922
1099
__bpf_kfunc_end_defs ();
923
1100
924
1101
BTF_KFUNCS_START (scx_kfunc_ids_idle )
1102
+ BTF_ID_FLAGS (func , scx_bpf_cpu_node )
1103
+ BTF_ID_FLAGS (func , scx_bpf_get_idle_cpumask_node , KF_ACQUIRE )
925
1104
BTF_ID_FLAGS (func , scx_bpf_get_idle_cpumask , KF_ACQUIRE )
1105
+ BTF_ID_FLAGS (func , scx_bpf_get_idle_smtmask_node , KF_ACQUIRE )
926
1106
BTF_ID_FLAGS (func , scx_bpf_get_idle_smtmask , KF_ACQUIRE )
927
1107
BTF_ID_FLAGS (func , scx_bpf_put_idle_cpumask , KF_RELEASE )
928
1108
BTF_ID_FLAGS (func , scx_bpf_test_and_clear_cpu_idle )
1109
+ BTF_ID_FLAGS (func , scx_bpf_pick_idle_cpu_node , KF_RCU )
929
1110
BTF_ID_FLAGS (func , scx_bpf_pick_idle_cpu , KF_RCU )
1111
+ BTF_ID_FLAGS (func , scx_bpf_pick_any_cpu_node , KF_RCU )
930
1112
BTF_ID_FLAGS (func , scx_bpf_pick_any_cpu , KF_RCU )
931
1113
BTF_KFUNCS_END (scx_kfunc_ids_idle )
932
1114
0 commit comments