Skip to content

Commit 95ad4cf

Browse files
authored
Projected views in katana.local [KAT-2411] (#918)
* Adds iterators to DynamicBitset because I'm sick of not having them. * Cleans up the PropertyGraph::MakeProjectedGraph method to accept type IDs and allow null selection. * Add Graph.project to python to create a projected graph view.
1 parent 77b2655 commit 95ad4cf

File tree

27 files changed

+370
-210
lines changed

27 files changed

+370
-210
lines changed

libgalois/include/katana/DynamicBitset.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <boost/mpl/has_xxx.hpp>
3535

3636
#include "katana/AtomicWrapper.h"
37+
#include "katana/DynamicBitsetSlow.h"
3738
#include "katana/Galois.h"
3839
#include "katana/PODVector.h"
3940
#include "katana/config.h"
@@ -45,6 +46,7 @@ namespace katana {
4546
class KATANA_EXPORT DynamicBitset {
4647
public: // types
4748
using TItem = katana::CopyableAtomic<uint64_t>;
49+
using iterator = DynamicBitsetIterator<DynamicBitset>;
4850

4951
private: // variables
5052
katana::PODVector<TItem> bitvec_;
@@ -92,6 +94,18 @@ class KATANA_EXPORT DynamicBitset {
9294
*/
9395
auto& get_vec() { return bitvec_; }
9496

97+
iterator begin() const {
98+
iterator bit0{this, 0, 0};
99+
if (test(0)) {
100+
// If bit 0 is set then we have the right iterator
101+
return bit0;
102+
}
103+
// Otherwise, increment to find the first set bit.
104+
return ++bit0;
105+
}
106+
107+
iterator end() const { return {this, bitvec_.size(), 0}; }
108+
95109
/**
96110
* Resizes the bitset.
97111
*

libgraph/include/katana/PropertyGraph.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,12 +279,23 @@ class KATANA_EXPORT PropertyGraph {
279279
const katana::RDGManifest& rdg_manifest,
280280
const katana::RDGLoadOptions& opts, katana::TxnContext* txn_ctx);
281281

282-
/// Make a projected graph from a property graph. Shares state with
283-
/// the original graph.
284-
static std::unique_ptr<PropertyGraph> MakeProjectedGraph(
282+
[[deprecated]] static std::unique_ptr<PropertyGraph> MakeProjectedGraph(
285283
const PropertyGraph& pg, const std::vector<std::string>& node_types,
286284
const std::vector<std::string>& edge_types);
287285

286+
/// Make a projected graph from a property graph. Shares state with
287+
/// the original graph.
288+
static Result<std::unique_ptr<PropertyGraph>> MakeProjectedGraph(
289+
const PropertyGraph& pg,
290+
std::optional<std::vector<std::string>> node_types,
291+
std::optional<std::vector<std::string>> edge_types);
292+
293+
/// Make a projected graph from a property graph. Shares state with
294+
/// the original graph.
295+
static Result<std::unique_ptr<PropertyGraph>> MakeProjectedGraph(
296+
const PropertyGraph& pg, std::optional<SetOfEntityTypeIDs> node_types,
297+
std::optional<SetOfEntityTypeIDs> edge_types);
298+
288299
/// \return A copy of this with the same set of properties. The copy shares no
289300
/// state with this.
290301
Result<std::unique_ptr<PropertyGraph>> Copy(

libgraph/src/PropertyGraph.cpp

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,36 @@ std::unique_ptr<katana::PropertyGraph>
351351
katana::PropertyGraph::MakeProjectedGraph(
352352
const PropertyGraph& pg, const std::vector<std::string>& node_types,
353353
const std::vector<std::string>& edge_types) {
354+
auto ret = MakeProjectedGraph(
355+
pg, node_types.empty() ? std::nullopt : std::make_optional(node_types),
356+
edge_types.empty() ? std::nullopt : std::make_optional(edge_types));
357+
KATANA_LOG_VASSERT(ret.has_value(), "{}", ret.error());
358+
return std::move(ret.value());
359+
}
360+
361+
katana::Result<std::unique_ptr<katana::PropertyGraph>>
362+
katana::PropertyGraph::MakeProjectedGraph(
363+
const PropertyGraph& pg, std::optional<std::vector<std::string>> node_types,
364+
std::optional<std::vector<std::string>> edge_types) {
365+
std::optional<SetOfEntityTypeIDs> node_type_ids;
366+
if (node_types) {
367+
node_type_ids = KATANA_CHECKED(
368+
pg.GetNodeTypeManager().GetEntityTypeIDs(node_types.value()));
369+
}
370+
std::optional<SetOfEntityTypeIDs> edge_type_ids;
371+
if (edge_types) {
372+
edge_type_ids = KATANA_CHECKED(
373+
pg.GetEdgeTypeManager().GetEntityTypeIDs(edge_types.value()));
374+
}
375+
return MakeProjectedGraph(pg, node_type_ids, edge_type_ids);
376+
}
377+
378+
/// Make a projected graph from a property graph. Shares state with
379+
/// the original graph.
380+
katana::Result<std::unique_ptr<katana::PropertyGraph>>
381+
katana::PropertyGraph::MakeProjectedGraph(
382+
const PropertyGraph& pg, std::optional<SetOfEntityTypeIDs> node_types,
383+
std::optional<SetOfEntityTypeIDs> edge_types) {
354384
const auto& topology = pg.topology();
355385
if (topology.empty()) {
356386
return std::make_unique<PropertyGraph>();
@@ -366,7 +396,7 @@ katana::PropertyGraph::MakeProjectedGraph(
366396
NUMAArray<Node> original_to_projected_nodes_mapping;
367397
original_to_projected_nodes_mapping.allocateInterleaved(topology.NumNodes());
368398

369-
if (node_types.empty()) {
399+
if (!node_types) {
370400
num_new_nodes = topology.NumNodes();
371401
// set all nodes
372402
katana::do_all(katana::iterate(topology.Nodes()), [&](auto src) {
@@ -378,21 +408,14 @@ katana::PropertyGraph::MakeProjectedGraph(
378408
original_to_projected_nodes_mapping.begin(),
379409
original_to_projected_nodes_mapping.end(), Node{0});
380410

381-
std::set<katana::EntityTypeID> node_entity_type_ids;
382-
383-
for (auto node_type : node_types) {
384-
auto entity_type_id = pg.GetNodeEntityTypeID(node_type);
385-
node_entity_type_ids.insert(entity_type_id);
386-
}
387-
388411
katana::GAccumulator<uint32_t> accum_num_new_nodes;
389412

390413
katana::do_all(katana::iterate(topology.Nodes()), [&](auto src) {
391-
for (auto type : node_entity_type_ids) {
414+
for (auto type : node_types.value()) {
392415
if (pg.DoesNodeHaveType(src, type)) {
393416
accum_num_new_nodes += 1;
394417
bitset_nodes.set(src);
395-
// this sets the correspondign entry in the array to 1
418+
// this sets the corresponding entry in the array to 1
396419
// will perform a prefix sum on this array later on
397420
original_to_projected_nodes_mapping[src] = 1;
398421
return;
@@ -444,7 +467,7 @@ katana::PropertyGraph::MakeProjectedGraph(
444467
// initializes the edge-index array to all zeros
445468
katana::ParallelSTL::fill(out_indices.begin(), out_indices.end(), Edge{0});
446469

447-
if (edge_types.empty()) {
470+
if (!edge_types) {
448471
katana::GAccumulator<uint32_t> accum_num_new_edges;
449472
// set all edges incident to projected nodes
450473
katana::do_all(
@@ -464,13 +487,6 @@ katana::PropertyGraph::MakeProjectedGraph(
464487

465488
num_new_edges = accum_num_new_edges.reduce();
466489
} else {
467-
std::set<katana::EntityTypeID> edge_entity_type_ids;
468-
469-
for (auto edge_type : edge_types) {
470-
auto entity_type_id = pg.GetEdgeEntityTypeID(edge_type);
471-
edge_entity_type_ids.insert(entity_type_id);
472-
}
473-
474490
katana::GAccumulator<uint32_t> accum_num_new_edges;
475491

476492
katana::do_all(
@@ -481,7 +497,7 @@ katana::PropertyGraph::MakeProjectedGraph(
481497
for (Edge e : topology.OutEdges(old_src)) {
482498
auto dest = topology.OutEdgeDst(e);
483499
if (bitset_nodes.test(dest)) {
484-
for (auto type : edge_entity_type_ids) {
500+
for (auto type : edge_types.value()) {
485501
if (pg.DoesEdgeHaveTypeFromTopoIndex(e, type)) {
486502
accum_num_new_edges += 1;
487503
bitset_edges.set(e);

libgraph/test/projection.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,14 @@ main(int argc, char** argv) {
6868
std::vector<std::string> edge_types;
6969
SplitString(edgeTypes, &edge_types);
7070

71-
auto pg_view = katana::PropertyGraph::MakeProjectedGraph(
72-
full_graph, node_types, edge_types);
71+
auto pg_view_res = katana::PropertyGraph::MakeProjectedGraph(
72+
full_graph,
73+
node_types.empty() ? std::nullopt : std::make_optional(node_types),
74+
edge_types.empty() ? std::nullopt : std::make_optional(edge_types));
75+
if (!pg_view_res) {
76+
KATANA_LOG_FATAL("Failed to construct projection: {}", pg_view_res.error());
77+
}
78+
auto pg_view = std::move(pg_view_res.value());
7379

7480
katana::analytics::TemporaryPropertyGuard temp_node_property{
7581
full_graph.NodeMutablePropertyView()};

libgraph/test/transformation-view-optional-topology.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,13 @@ main(int argc, char** argv) {
8383
std::vector<std::string> edge_types;
8484
SplitString(edgeTypes, &edge_types);
8585

86-
auto pg_view =
87-
katana::PropertyGraph::MakeProjectedGraph(pg, node_types, edge_types);
86+
auto pg_view_res = katana::PropertyGraph::MakeProjectedGraph(
87+
pg, node_types.empty() ? std::nullopt : std::make_optional(node_types),
88+
edge_types.empty() ? std::nullopt : std::make_optional(edge_types));
89+
if (!pg_view_res) {
90+
KATANA_LOG_FATAL("Failed to construct projection: {}", pg_view_res.error());
91+
}
92+
auto pg_view = std::move(pg_view_res.value());
8893

8994
TestOptionalTopologyGenerationEdgeShuffleTopology(*pg_view);
9095
TestOptionalTopologyGenerationShuffleTopology(*pg_view);

libkatana_python_native/src/PropertyGraph.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,46 @@ DefPropertyGraph(py::module& m) {
205205
katana::DefWithNumba<&PropertyGraph::NumNodes>(cls, "num_nodes");
206206
katana::DefWithNumba<&PropertyGraph::NumEdges>(cls, "num_edges");
207207

208+
cls.def(
209+
"project",
210+
[](PropertyGraph& self, py::object node_types,
211+
py::object edge_types) -> Result<std::shared_ptr<PropertyGraph>> {
212+
std::optional<katana::SetOfEntityTypeIDs> node_type_ids;
213+
if (!node_types.is_none()) {
214+
node_type_ids = katana::SetOfEntityTypeIDs();
215+
node_type_ids->resize(self.GetNodeTypeManager().GetNumEntityTypes());
216+
for (auto& t : node_types) {
217+
node_type_ids->set(py::cast<EntityType>(t).type_id);
218+
}
219+
}
220+
std::optional<katana::SetOfEntityTypeIDs> edge_type_ids;
221+
if (!edge_types.is_none()) {
222+
edge_type_ids = katana::SetOfEntityTypeIDs();
223+
edge_type_ids->resize(self.GetEdgeTypeManager().GetNumEntityTypes());
224+
for (auto& t : edge_types) {
225+
edge_type_ids->set(py::cast<EntityType>(t).type_id);
226+
}
227+
}
228+
229+
py::gil_scoped_release
230+
guard; // graph projection may copy or load data.
231+
// is_none is safe without the GIL because it is just a pointer compare.
232+
return KATANA_CHECKED(PropertyGraph::MakeProjectedGraph(
233+
self, node_type_ids, edge_type_ids));
234+
},
235+
py::arg("node_types") = py::none(), py::arg("edge_types") = py::none(),
236+
R"""(
237+
Get a projected view of the graph which only contains nodes or edges of
238+
specific types.
239+
240+
:type node_types: Optional[Iterable[EntityType]]
241+
:param node_types: A set of node types to include in the projected graph,
242+
or ``None`` to keep all nodes.
243+
:type edge_types: Optional[Iterable[EntityType]]
244+
:param edge_types: A set of edge types to include in the projected graph,
245+
or ``None`` to keep all edges on the selected nodes.
246+
)""");
247+
208248
// GetLocalNodeID(NodeHandle) -> LocalNodeID - local node ID
209249
cls.def(
210250
"get_local_node_id",

libsupport/include/katana/DynamicBitsetSlow.h

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,97 @@
1414

1515
namespace katana {
1616

17+
/// An iterator over dynamic bitsets.
18+
///
19+
/// Increment is an optimized linear search over the bitset, so iteration is
20+
/// O(N) where N is the size of the bitset, not O(|S|) where |S| is the number
21+
/// of set bits in the bit set.
22+
23+
// TODO(amp): This is a template so it can be used for both DynamicBitsetSlow
24+
// and DynamicBitset. This can be made a simple class once DynamicBitsetSlow
25+
// is removed.
26+
template <typename DynamicBitsetType>
27+
class KATANA_EXPORT DynamicBitsetIterator
28+
: public std::iterator<
29+
std::forward_iterator_tag, uint64_t, int64_t, const uint64_t*,
30+
uint64_t> {
31+
const DynamicBitsetType* underlying_;
32+
uint64_t array_index_;
33+
uint8_t bit_offset_;
34+
35+
public:
36+
DynamicBitsetIterator(
37+
const DynamicBitsetType* underlying, uint64_t array_index,
38+
uint8_t bit_offset)
39+
: underlying_(underlying),
40+
array_index_(array_index),
41+
bit_offset_(bit_offset) {}
42+
43+
DynamicBitsetIterator& operator++() {
44+
// Step forward one to the bit we want to examine first.
45+
bit_offset_++;
46+
if (bit_offset_ > DynamicBitsetType::kNumBitsInUint64) {
47+
bit_offset_ = 0;
48+
array_index_++;
49+
}
50+
51+
const auto& bitvec = underlying_->get_vec();
52+
const size_t size = underlying_->size();
53+
54+
// Used only to make sure we stop on the last real used bit in cases where
55+
// the number of bits is not a multiple of kNumBitsInUint64.
56+
uint64_t current_bit_index = **this;
57+
58+
// The following code is optimized to make the search process fast for
59+
// sparse bitsets. It's performance for densely filled bitsets should be
60+
// good too.
61+
62+
// Iterate forward word by word
63+
for (; array_index_ < bitvec.size(); array_index_++) {
64+
uint64_t word = bitvec[array_index_].load(std::memory_order_relaxed);
65+
// For each word we check if it is non-zero (that is it contains a 1 bit)
66+
if (word != 0) {
67+
// Iterate over the bits in the work
68+
uint64_t bit_mask = uint64_t{1} << bit_offset_;
69+
for (; bit_offset_ < DynamicBitsetType::kNumBitsInUint64 &&
70+
current_bit_index < size;
71+
bit_offset_++, current_bit_index++, bit_mask <<= 1) {
72+
// Check if the bit is set. If it is we have reached where we need to be
73+
if ((word & bit_mask) != 0) {
74+
return *this;
75+
}
76+
}
77+
// Reset bit_offset_ here so that we start from our last bit_offset_ and
78+
// only reset when we roll over to the next word.
79+
bit_offset_ = 0;
80+
}
81+
}
82+
bit_offset_ = 0;
83+
array_index_ = bitvec.size();
84+
return *this;
85+
}
86+
87+
DynamicBitsetIterator operator++(int) {
88+
auto r = *this;
89+
++(*this);
90+
return r;
91+
}
92+
93+
reference operator*() const {
94+
return array_index_ * DynamicBitsetType::kNumBitsInUint64 + bit_offset_;
95+
}
96+
97+
bool operator==(const DynamicBitsetIterator& other) {
98+
return underlying_ == other.underlying_ &&
99+
array_index_ == other.array_index_ &&
100+
bit_offset_ == other.bit_offset_;
101+
}
102+
103+
bool operator!=(const DynamicBitsetIterator& other) {
104+
return !(*this == other);
105+
}
106+
};
107+
17108
//TODO(emcginnis): Remove this class entirely when DynamicBitset is available to libsupport
18109
/**
19110
* Concurrent, thread safe, serial implementation of a dynamically allocated bitset
@@ -24,6 +115,8 @@ class KATANA_EXPORT DynamicBitsetSlow {
24115
size_t num_bits_{0};
25116

26117
public:
118+
using iterator = DynamicBitsetIterator<DynamicBitsetSlow>;
119+
27120
static constexpr uint32_t kNumBitsInUint64 = sizeof(uint64_t) * CHAR_BIT;
28121

29122
explicit DynamicBitsetSlow(
@@ -75,6 +168,18 @@ class KATANA_EXPORT DynamicBitsetSlow {
75168
*/
76169
auto& get_vec() { return bitvec_; }
77170

171+
iterator begin() const {
172+
iterator bit0{this, 0, 0};
173+
if (test(0)) {
174+
// If bit 0 is set then we have the right iterator
175+
return bit0;
176+
}
177+
// Otherwise, increment to find the first set bit.
178+
return ++bit0;
179+
}
180+
181+
iterator end() const { return {this, bitvec_.size(), 0}; }
182+
78183
/**
79184
* Resizes the bitset.
80185
*

libsupport/test/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@ function(add_unit_test name)
1616
)
1717
endfunction()
1818

19-
add_unit_test(arrow)
2019
add_unit_test(array-from-scalars)
20+
add_unit_test(arrow)
2121
add_unit_test(bitmath)
2222
add_unit_test(cache)
2323
add_unit_test(disjoint_range_iterator)
24+
add_unit_test(dynamic-bitset)
2425
add_unit_test(env)
2526
add_unit_test(experimental)
2627
add_unit_test(logging)

0 commit comments

Comments
 (0)