From db40c9ef89cf617d6ddf67541311290553b9c39d Mon Sep 17 00:00:00 2001 From: tanmoyio Date: Tue, 13 Jun 2023 18:59:22 +0530 Subject: [PATCH 1/4] fix: Umap duplicate index --- graphistry/umap_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/graphistry/umap_utils.py b/graphistry/umap_utils.py index 8ed1dd347a..7e9c2ffac8 100644 --- a/graphistry/umap_utils.py +++ b/graphistry/umap_utils.py @@ -589,10 +589,10 @@ def umap( index = res._nodes.index if res._node is None: logger.debug("-Writing new node name") + res._nodes[config.IMPLICIT_NODE_ID] = range(len(res._nodes)) + res = res.nodes( # type: ignore - res._nodes.reset_index(drop=True) - .reset_index() - .rename(columns={"index": config.IMPLICIT_NODE_ID}), + res._nodes, config.IMPLICIT_NODE_ID, ) res._nodes.index = index From e2f2b6cf1d6abeb22954b495c681e27629c69408 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar Date: Wed, 2 Aug 2023 21:22:02 +0530 Subject: [PATCH 2/4] added umap_learn and cuml tests --- graphistry/tests/test_umap_utils.py | 36 +++++++++++++++++++++++++++++ graphistry/umap_utils.py | 13 +++++++++++ 2 files changed, 49 insertions(+) diff --git a/graphistry/tests/test_umap_utils.py b/graphistry/tests/test_umap_utils.py index a16fdddcc0..5fa3f78ea3 100644 --- a/graphistry/tests/test_umap_utils.py +++ b/graphistry/tests/test_umap_utils.py @@ -74,6 +74,24 @@ node_numeric = node_ints + node_floats node_target = triangleNodes[["y"]] +node_graph_with_index = pd.DataFrame( + { + "index": range(1, 13), + "a": ["a", "b", "c", "d"] * 3, + "b": ["w", "x", "y", "z"] * 3, + } +) + +edge_graph_with_index = pd.DataFrame( + { + "index": range(1, 13), + "a": ["a", "b", "c", "d"] * 3, + "b": ["w", "x", "y", "z"] * 3, + "src": [1, 2, 3, 4] * 3, + "dst": [4, 3, 1, 2] * 3, + } +) + def _eq(df1, df2): try: df1 = df1.to_pandas() @@ -150,6 +168,15 @@ def setUp(self): ) self.g2e = g2 + # graph with index + self.g_index_nodes = graphistry.nodes(node_graph_with_index) + self.g_index_nodes_umaped = self.g_index_nodes.umap(engine="umap_learn") + assert "_n" == self.g_index_nodes_umaped._node + + self.g_index_edges = graphistry.nodes(edge_graph_with_index) + self.g_index_edges_umaped = self.g_index_edges.umap(engine="umap_learn") + assert "_n" == self.g_index_edges_umaped._node + @pytest.mark.skipif(not has_umap, reason="requires umap feature dependencies") def test_columns_match(self): @@ -810,6 +837,15 @@ def test_base(self): graphistry.nodes(self.df).umap('auto')._node_embedding.shape == (self.samples, 2) graphistry.nodes(self.df).umap('engine')._node_embedding.shape == (self.samples, 2) + # graph with index + self.g_index_nodes = graphistry.nodes(node_graph_with_index) + self.g_index_nodes_umaped = self.g_index_nodes.umap(engine="cuml") + assert "_n" == self.g_index_nodes_umaped._node + + self.g_index_edges = graphistry.nodes(edge_graph_with_index) + self.g_index_edges_umaped = self.g_index_edges.umap(engine="cuml") + assert "_n" == self.g_index_edges_umaped._node + if __name__ == "__main__": unittest.main() diff --git a/graphistry/umap_utils.py b/graphistry/umap_utils.py index 7e9c2ffac8..69c375a897 100644 --- a/graphistry/umap_utils.py +++ b/graphistry/umap_utils.py @@ -587,6 +587,18 @@ def umap( if kind == "nodes": index = res._nodes.index + + if res._node is None: + logger.debug("-Writing new node name") + res = res.nodes( # type: ignore + res._nodes.reset_index(drop=True) + .reset_index() + .rename(columns={"index": config.IMPLICIT_NODE_ID}), + config.IMPLICIT_NODE_ID, + ) + res._nodes.index = index + + ''' if res._node is None: logger.debug("-Writing new node name") res._nodes[config.IMPLICIT_NODE_ID] = range(len(res._nodes)) @@ -596,6 +608,7 @@ def umap( config.IMPLICIT_NODE_ID, ) res._nodes.index = index + ''' nodes = res._nodes[res._node].values From 6141cd32902c2a6153eb11702a05e723b42833f8 Mon Sep 17 00:00:00 2001 From: Tanmoy Sarkar Date: Wed, 2 Aug 2023 21:45:10 +0530 Subject: [PATCH 3/4] removed old index --- graphistry/umap_utils.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/graphistry/umap_utils.py b/graphistry/umap_utils.py index 69c375a897..9b73c6c01b 100644 --- a/graphistry/umap_utils.py +++ b/graphistry/umap_utils.py @@ -588,17 +588,6 @@ def umap( if kind == "nodes": index = res._nodes.index - if res._node is None: - logger.debug("-Writing new node name") - res = res.nodes( # type: ignore - res._nodes.reset_index(drop=True) - .reset_index() - .rename(columns={"index": config.IMPLICIT_NODE_ID}), - config.IMPLICIT_NODE_ID, - ) - res._nodes.index = index - - ''' if res._node is None: logger.debug("-Writing new node name") res._nodes[config.IMPLICIT_NODE_ID] = range(len(res._nodes)) @@ -608,7 +597,6 @@ def umap( config.IMPLICIT_NODE_ID, ) res._nodes.index = index - ''' nodes = res._nodes[res._node].values From aca1c0cbdda9852efeacba9b643232f51355c817 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 5 Oct 2023 12:03:13 +0200 Subject: [PATCH 4/4] lint --- graphistry/embed_utils.py | 2 +- graphistry/nodexlistry.py | 6 +++--- graphistry/tests/test_tigergraph.py | 4 ++-- graphistry/umap_utils.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/graphistry/embed_utils.py b/graphistry/embed_utils.py index 9e64fdfa10..4155ed6e9f 100644 --- a/graphistry/embed_utils.py +++ b/graphistry/embed_utils.py @@ -542,7 +542,7 @@ def fetch_triplets_for_inference(x_r): def _score(self, triplets: Union[np.ndarray, TT]) -> TT: # type: ignore _, torch, _, _, _, _, _, _ = lazy_embed_import_dep() emb = self._kg_embeddings.clone().detach() - if type(triplets) != torch.Tensor: + if type(triplets) is not torch.Tensor: triplets = torch.tensor(triplets) score = self._embed_model.score(emb, triplets) prob = torch.sigmoid(score) diff --git a/graphistry/nodexlistry.py b/graphistry/nodexlistry.py index 24ce7985de..eb005c56a5 100644 --- a/graphistry/nodexlistry.py +++ b/graphistry/nodexlistry.py @@ -132,13 +132,13 @@ def xls(self, xls_or_url, source='default', verbose=None): p = print if verbose else (lambda x: 1) # source is either undefined, a string, or a (partial) bindings object - if type(source) == str and source not in self.source_to_mappings: + if type(source) is str and source not in self.source_to_mappings: p('Unknown source type', source) raise Exception('Unknown nodexl source type %s' % str(source)) - bindings = self.source_to_mappings[source] if type(source) == str else source + bindings = self.source_to_mappings[source] if type(source) is str else source p('Fetching...') - xls = pd.ExcelFile(xls_or_url) if type(xls_or_url) == str else xls_or_url + xls = pd.ExcelFile(xls_or_url) if type(xls_or_url) is str else xls_or_url p('Formatting edges') edges_df = self.xls_to_edges_df(xls, bindings['edges_df_transformer']) diff --git a/graphistry/tests/test_tigergraph.py b/graphistry/tests/test_tigergraph.py index 71a7ddf950..fffd53b0cf 100644 --- a/graphistry/tests/test_tigergraph.py +++ b/graphistry/tests/test_tigergraph.py @@ -7,7 +7,7 @@ class TestTiger(NoAuthTestCase): def test_tg_init_plain(self): tg = graphistry.tigergraph() - self.assertTrue(type(tg) == graphistry.plotter.Plotter) + self.assertTrue(type(tg) is graphistry.plotter.Plotter) def test_tg_init_many(self): tg = graphistry.tigergraph( @@ -20,7 +20,7 @@ def test_tg_init_many(self): pwd="tigergraph2", verbose=False, ) - self.assertTrue(type(tg) == graphistry.plotter.Plotter) + self.assertTrue(type(tg) is graphistry.plotter.Plotter) def test_tg_endpoint_url_simple(self): tg = graphistry.tigergraph( diff --git a/graphistry/umap_utils.py b/graphistry/umap_utils.py index 9b73c6c01b..0dc0784991 100644 --- a/graphistry/umap_utils.py +++ b/graphistry/umap_utils.py @@ -720,7 +720,7 @@ def _bind_xy_from_umap( else: emb = res._edge_embedding - if type(df) == type(emb): + if type(df) is type(emb): df[x_name] = emb.values.T[0] df[y_name] = emb.values.T[1] elif isinstance(df, pd.DataFrame) and 'cudf.core.dataframe' in str(getmodule(emb)):