diff --git a/src/topicgpt/TopicGPT.py b/src/topicgpt/TopicGPT.py index 331017a..2e30856 100644 --- a/src/topicgpt/TopicGPT.py +++ b/src/topicgpt/TopicGPT.py @@ -23,12 +23,12 @@ def __init__(self, api_key: str = "", azure_endpoint: dict = {}, n_topics: int = None, - openai_prompting_model: str = "gpt-3.5-turbo-16k", + openai_prompting_model: str = "gpt-4o-mini", max_number_of_tokens: int = 16384, corpus_instruction: str = "", document_embeddings: np.ndarray = None, vocab_embeddings: dict[str, np.ndarray] = None, - embedding_model: str = "text-embedding-ada-002", + embedding_model: str = "text-embedding-3-small", max_number_of_tokens_embedding: int = 8191, use_saved_embeddings: bool = True, path_saved_embeddings: str = embeddings_path, diff --git a/src/topicgpt/TopicRepresentation.py b/src/topicgpt/TopicRepresentation.py index 89b152a..7cad155 100644 --- a/src/topicgpt/TopicRepresentation.py +++ b/src/topicgpt/TopicRepresentation.py @@ -298,7 +298,7 @@ def extract_topics_no_new_vocab_computation(corpus: list[str], vocab: list[str], centroid_arr = np.array(list(centroid_dict.values())) if centroid_arr.ndim == 1: centroid_arr = centroid_arr.reshape(-1, 1) - dim_red_centroids = umap_mapper.transform(np.array(list(centroid_dict.values()))) # map the centroids to low dimensional space + dim_red_centroids = umap_mapper.transform(centroid_arr) # map the centroids to low dimensional space dim_red_centroid_dict = {label: centroid for label, centroid in zip(centroid_dict.keys(), dim_red_centroids)}