-
Notifications
You must be signed in to change notification settings - Fork 1k
. #48
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
. #48
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,11 +30,11 @@ | |
| from ..utils import preprocess_nxgraph | ||
|
|
||
|
|
||
| def line_loss(y_true, y_pred): | ||
| def line_loss(y_true, y_pred): ##no problem | ||
| return -K.mean(K.log(K.sigmoid(y_true*y_pred))) | ||
|
|
||
|
|
||
| def create_model(numNodes, embedding_size, order='second'): | ||
| def create_model(numNodes, embedding_size, order='second'): ##no problem | ||
|
|
||
| v_i = Input(shape=(1,)) | ||
| v_j = Input(shape=(1,)) | ||
|
|
@@ -48,12 +48,18 @@ def create_model(numNodes, embedding_size, order='second'): | |
|
|
||
| v_i_emb_second = second_emb(v_i) | ||
| v_j_context_emb = context_emb(v_j) | ||
|
|
||
| first = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keep_dims=False), name='first_order')([v_i_emb, v_j_emb]) | ||
| second = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keep_dims=False), name='second_order')([v_i_emb_second, v_j_context_emb]) | ||
|
|
||
| try: | ||
| first = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keepdims=False), name='first_order')([v_i_emb, v_j_emb]) | ||
| except(TypeError): | ||
| first = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keep_dims=False), name='first_order')([v_i_emb, v_j_emb]) | ||
| try: | ||
| second = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keepdims=False), name='second_order')([v_i_emb_second, v_j_context_emb]) | ||
| except(TypeError): | ||
| second = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keep_dims=False), name='second_order')([v_i_emb_second, v_j_context_emb]) | ||
| if order == 'first': | ||
| output_list = [first] | ||
| elif order == 'second': | ||
|
|
@@ -205,9 +211,13 @@ def get_embeddings(self,): | |
|
|
||
| return self._embeddings | ||
|
|
||
| def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1, times=1): | ||
| def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1, times=1,workers=tf.data.experimental.AUTOTUNE,use_multiprocessing=True): | ||
|
||
| self.reset_training_config(batch_size, times) | ||
| hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=self.steps_per_epoch, | ||
| verbose=verbose) | ||
| try: | ||
| hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=self.steps_per_epoch, | ||
| verbose=verbose,workers=workers,use_multiprocessing=use_multiprocessing) | ||
| except: | ||
| hist = self.model.fit(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=self.steps_per_epoch, | ||
| verbose=verbose,workers=workers,use_multiprocessing=use_multiprocessing) | ||
|
|
||
| return hist | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,70 +1,82 @@ | ||
| # -*- coding:utf-8 -*- | ||
|
|
||
| """ | ||
| Author: | ||
| Weichen Shen,wcshen1994@163.com | ||
| Reference: | ||
| [1] Grover A, Leskovec J. node2vec: Scalable feature learning for networks[C]//Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2016: 855-864.(https://www.kdd.org/kdd2016/papers/files/rfp0218-groverA.pdf) | ||
| """ | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这一大块为啥删除了?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 修改的时候直接复制进来,给替换掉了。。。 |
||
|
|
||
| from gensim.models import Word2Vec | ||
| import pandas as pd | ||
| import networkx as nx | ||
| import csrgraph as cg | ||
|
|
||
| from ..walker import RandomWalker | ||
|
|
||
|
|
||
| class Node2Vec: | ||
| import gc | ||
| import numba | ||
| import time | ||
| import numpy as np | ||
| import pandas as pd | ||
| from gensim.models import word2vec | ||
|
|
||
| def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0): | ||
|
|
||
| self.graph = graph | ||
| self._embeddings = {} | ||
| self.walker = RandomWalker( | ||
| graph, p=p, q=q, use_rejection_sampling=use_rejection_sampling) | ||
|
|
||
| print("Preprocess transition probs...") | ||
| self.walker.preprocess_transition_probs() | ||
|
|
||
| self.sentences = self.walker.simulate_walks( | ||
| num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1) | ||
| class Node2Vec: | ||
|
|
||
| def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs): | ||
| def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0,threads=1): | ||
|
||
|
|
||
| if type(threads) is not int: | ||
| raise ValueError('Threads must be int!') | ||
| if walk_length<1: | ||
| raise ValueError('Walk lengh must be >1') | ||
| if num_walks<1: | ||
| raise ValueError('num_walks must be >1') | ||
| if type(walk_length) is not int or type(num_walks) is not int: | ||
| raise ValueError('Walk length or num_walks must be int') | ||
|
|
||
| self.walk_length=walk_length | ||
| self.num_walks=num_walks | ||
| self.p=p | ||
| self.q=q | ||
| self.threads=threads | ||
| # todo numba-based use_rejection_samplling | ||
|
|
||
| if not isinstance(graph, cg.csrgraph): | ||
| self.graph = cg.csrgraph(graph, threads=self.threads) | ||
| if self.graph.threads != self.threads: | ||
| self.graph.set_threads(self.threads) | ||
| self.node_names = self.graph.names | ||
| if type(self.node_names[0]) not in [int, str, np.int32, np.uint32, | ||
| np.int64, np.uint64]: | ||
| raise ValueError("Graph node names must be int or str!") | ||
|
|
||
|
|
||
|
|
||
| def train(self, embed_size=128, window_size=5, workers=3, iters=5 **kwargs): | ||
| print('Start making random walks...') | ||
| start=time.time() | ||
| self.sentences=self.graph.random_walks(walklen=self.walk_length,epochs=self.num_walks, \ | ||
| return_weight=self.p,neighbor_weight=self.q).astype(str).tolist() # It seems gensim word2vec only accept list and string types data | ||
| end=time.time() | ||
| print('Random walks uses '+str(end-start)+' seconds') | ||
|
|
||
|
|
||
|
|
||
| kwargs["sentences"] = self.sentences | ||
| kwargs["min_count"] = kwargs.get("min_count", 0) | ||
| kwargs["size"] = embed_size | ||
| kwargs["sg"] = 1 | ||
| kwargs["hs"] = 0 # node2vec not use Hierarchical Softmax | ||
| kwargs["hs"] = 0 # node2vec don't need to use Hierarchical Softmax | ||
| kwargs["workers"] = workers | ||
| kwargs["window"] = window_size | ||
| kwargs["iter"] = iter | ||
|
|
||
| kwargs["iter"] = iters | ||
| print("Learning embedding vectors...") | ||
| model = Word2Vec(**kwargs) | ||
| model = word2vec.Word2Vec(sentences=self.sentences,**kwargs) ##Avoid to copy self.sentences in order to save the memory | ||
| print("Learning embedding vectors done!") | ||
|
|
||
| self.w2v_model = model | ||
| self.node_dict = dict(zip(np.arange(len(self.node_names)).astype(str),self.node_names)) # map the node_names to the original node names | ||
|
|
||
| return model | ||
|
|
||
| def get_embeddings(self,): | ||
| if self.w2v_model is None: | ||
| print("model not train") | ||
| return {} | ||
|
|
||
| self._embeddings = {} | ||
| for word in self.graph.nodes(): | ||
| self._embeddings[word] = self.w2v_model.wv[word] | ||
| for word in self.node_dict.keys(): | ||
|
||
| self._embeddings[self.node_dict[word]] = self.w2v_model.wv[self.node_dict[word]] | ||
|
|
||
| return self._embeddings | ||
Uh oh!
There was an error while loading. Please reload this page.