MaartenGr
diff --git a/‎.flake8‎
Lines changed: 2 additions & 0 deletions b/‎.flake8‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bertopic/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎bertopic/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bertopic/_bertopic.py‎
Lines changed: 86 additions & 52 deletions b/‎bertopic/_bertopic.py‎
Lines changed: 86 additions & 52 deletions
diff --git a/‎bertopic/_utils.py‎
Lines changed: 3 additions & 0 deletions b/‎bertopic/_utils.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎bertopic/backend/_gensim.py‎
Lines changed: 3 additions & 3 deletions b/‎bertopic/backend/_gensim.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎bertopic/plotting/_barchart.py‎
Lines changed: 4 additions & 2 deletions b/‎bertopic/plotting/_barchart.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎bertopic/plotting/_heatmap.py‎
Lines changed: 4 additions & 2 deletions b/‎bertopic/plotting/_heatmap.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎bertopic/plotting/_hierarchy.py‎
Lines changed: 5 additions & 3 deletions b/‎bertopic/plotting/_hierarchy.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎bertopic/plotting/_topics.py‎
Lines changed: 4 additions & 2 deletions b/‎bertopic/plotting/_topics.py‎
Lines changed: 4 additions & 2 deletions
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 160
@@ -75,3 +75,4 @@ venv.bak/
 
 .idea
 .idea/
+.vscode
@@ -1,6 +1,6 @@
 from bertopic._bertopic import BERTopic
 
-__version__ = "0.9.4"
+__version__ = "0.10.0"
 
 __all__ = [
     "BERTopic",
 
@@ -53,10 +53,13 @@ def check_embeddings_shape(embeddings, docs):
 
 def check_is_fitted(model):
     """ Checks if the model was fitted by verifying the presence of self.matches
+
     Arguments:
         model: BERTopic instance for which the check is performed.
+
     Returns:
         None
+
     Raises:
         ValueError: If the matches were not found.
     """
 
@@ -48,8 +48,8 @@ def embed(self,
             Document/words embeddings with shape (n, m) with `n` documents/words
             that each have an embeddings size of `m`
         """
-        vector_shape = self.embedding_model.word_vec(list(self.embedding_model.vocab.keys())[0]).shape
-        empty_vector = np.zeros(vector_shape[0])
+        vector_shape = self.embedding_model.get_vector(list(self.embedding_model.index_to_key)[0]).shape[0]
+        empty_vector = np.zeros(vector_shape)
 
         embeddings = []
         for doc in tqdm(documents, disable=not verbose, position=0, leave=True):
@@ -58,7 +58,7 @@ def embed(self,
             # Extract word embeddings
             for word in doc.split(" "):
                 try:
-                    word_embedding = self.embedding_model.word_vec(word)
+                    word_embedding = self.embedding_model.get_vector(word)
                     doc_embedding.append(word_embedding)
                 except KeyError:
                     doc_embedding.append(empty_vector)
 
@@ -46,12 +46,14 @@ def visualize_barchart(topic_model,
     colors = itertools.cycle(["#D55E00", "#0072B2", "#CC79A7", "#E69F00", "#56B4E9", "#009E73", "#F0E442"])
 
     # Select topics based on top_n and topics args
+    freq_df = topic_model.get_topic_freq()
+    freq_df = freq_df.loc[freq_df.Topic != -1, :]
     if topics is not None:
         topics = list(topics)
     elif top_n_topics is not None:
-        topics = topic_model.get_topic_freq().Topic.to_list()[1:top_n_topics + 1]
+        topics = sorted(freq_df.Topic.to_list()[:top_n_topics])
     else:
-        topics = topic_model.get_topic_freq().Topic.to_list()[1:7]
+        topics = sorted(freq_df.Topic.to_list()[0:6])
 
     # Initialize figure
     subplot_titles = [f"Topic {topic}" for topic in topics]
 
@@ -56,12 +56,14 @@ def visualize_heatmap(topic_model,
         embeddings = topic_model.c_tf_idf
 
     # Select topics based on top_n and topics args
+    freq_df = topic_model.get_topic_freq()
+    freq_df = freq_df.loc[freq_df.Topic != -1, :]
     if topics is not None:
         topics = list(topics)
     elif top_n_topics is not None:
-        topics = sorted(topic_model.get_topic_freq().Topic.to_list()[1:top_n_topics + 1])
+        topics = sorted(freq_df.Topic.to_list()[:top_n_topics])
     else:
-        topics = sorted(list(topic_model.get_topics().keys()))
+        topics = sorted(freq_df.Topic.to_list())
 
     # Order heatmap by similar clusters of topics
     if n_clusters:
 
@@ -57,12 +57,14 @@ def visualize_hierarchy(topic_model,
         embeddings = topic_model.c_tf_idf
 
     # Select topics based on top_n and topics args
+    freq_df = topic_model.get_topic_freq()
+    freq_df = freq_df.loc[freq_df.Topic != -1, :]
     if topics is not None:
-        topics = sorted(list(topics))
+        topics = list(topics)
     elif top_n_topics is not None:
-        topics = sorted(topic_model.get_topic_freq().Topic.to_list()[1:top_n_topics + 1])
+        topics = sorted(freq_df.Topic.to_list()[:top_n_topics])
     else:
-        topics = sorted(list(topic_model.get_topics().keys()))
+        topics = sorted(freq_df.Topic.to_list())
 
     # Select embeddings
     all_topics = sorted(list(topic_model.get_topics().keys()))
 
@@ -43,12 +43,14 @@ def visualize_topics(topic_model,
     style="width:1000px; height: 680px; border: 0px;""></iframe>
     """
     # Select topics based on top_n and topics args
+    freq_df = topic_model.get_topic_freq()
+    freq_df = freq_df.loc[freq_df.Topic != -1, :]
     if topics is not None:
         topics = list(topics)
     elif top_n_topics is not None:
-        topics = sorted(topic_model.get_topic_freq().Topic.to_list()[1:top_n_topics + 1])
+        topics = sorted(freq_df.Topic.to_list()[:top_n_topics])
     else:
-        topics = sorted(list(topic_model.get_topics().keys()))
+        topics = sorted(freq_df.Topic.to_list())
 
     # Extract topic words and their frequencies
     topic_list = sorted(topics)
Original file line number	Diff line number	Diff line change
`@@ -75,3 +75,4 @@ venv.bak/`
`75`	`75`
`76`	`76`	`.idea`
`77`	`77`	`.idea/`
	`78`	`+.vscode`