Skip to content

Commit 6bab09e

Browse files
authored
Leave input data of Modality unchanged (#205)
1 parent d51e116 commit 6bab09e

File tree

3 files changed

+21
-19
lines changed

3 files changed

+21
-19
lines changed

cornac/data/modality.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,13 @@ class FeatureModality(Modality):
4949
ids: List, default = None
5050
List of user/item ids that the indices are aligned with `corpus`.
5151
If None, the indices of provided `features` will be used as `ids`.
52-
53-
copy: bool, default = False
54-
Whether or not to make a copy of the input features array and leave it unchanged during manipulation.
55-
If `False`, rows of the input feature array will be swapped if needed when building the modality.
5652
"""
5753

58-
def __init__(self, features=None, ids=None, copy=False, normalized=False, **kwargs):
54+
def __init__(self, features=None, ids=None, normalized=False, **kwargs):
5955
super().__init__(**kwargs)
6056
self.features = features
61-
self._ids = ids
62-
self._normalized = normalized
63-
if copy and features is not None:
64-
self.features = np.copy(features)
57+
self.ids = ids
58+
self.normalized = normalized
6559

6660
@property
6761
def features(self):
@@ -82,13 +76,17 @@ def feature_dim(self):
8276
return self.features.shape[1]
8377

8478
def _swap_feature(self, id_map):
85-
for old_idx, raw_id in enumerate(self._ids.copy()):
79+
new_feats = np.copy(self.features)
80+
new_ids = self.ids.copy()
81+
for old_idx, raw_id in enumerate(self.ids):
8682
new_idx = id_map.get(raw_id, None)
8783
if new_idx is None:
8884
continue
89-
assert new_idx < self.features.shape[0]
90-
self.features[[new_idx, old_idx]] = self.features[[old_idx, new_idx]]
91-
self._ids[old_idx], self._ids[new_idx] = self._ids[new_idx], self._ids[old_idx]
85+
assert new_idx < new_feats.shape[0]
86+
new_feats[new_idx] = self.features[old_idx]
87+
new_ids[new_idx] = raw_id
88+
self.features = new_feats
89+
self.ids = new_ids
9290

9391
def build(self, id_map=None):
9492
"""Build the feature matrix.
@@ -97,10 +95,10 @@ def build(self, id_map=None):
9795
if self.features is None:
9896
return
9997

100-
if (self._ids is not None) and (id_map is not None):
98+
if (self.ids is not None) and (id_map is not None):
10199
self._swap_feature(id_map)
102100

103-
if self._normalized:
101+
if self.normalized:
104102
self.features = self.features - np.min(self.features)
105103
self.features = self.features / (np.max(self.features) + 1e-10)
106104

cornac/data/text.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -599,21 +599,25 @@ def __init__(self,
599599
self.count_matrix = None
600600

601601
def _swap_text(self, id_map: Dict):
602-
for old_idx, raw_id in enumerate(self._ids.copy()):
602+
new_corpus = self.corpus.copy()
603+
new_ids = self.ids.copy()
604+
for old_idx, raw_id in enumerate(self.ids):
603605
new_idx = id_map.get(raw_id, None)
604606
if new_idx is None:
605607
continue
606608
assert new_idx < len(self.corpus)
607-
self.corpus[old_idx], self.corpus[new_idx] = self.corpus[new_idx], self.corpus[old_idx]
608-
self._ids[old_idx], self._ids[new_idx] = self._ids[new_idx], self._ids[old_idx]
609+
new_corpus[new_idx] = self.corpus[old_idx]
610+
new_ids[new_idx] = raw_id
611+
self.corpus = new_corpus
612+
self.ids = new_ids
609613

610614
def _build_text(self, id_map: Dict):
611615
"""Build the text based on provided global id map
612616
"""
613617
if self.corpus is None:
614618
return
615619

616-
if (self._ids is not None) and (id_map is not None):
620+
if (self.ids is not None) and (id_map is not None):
617621
self._swap_text(id_map)
618622

619623
vectorizer = CountVectorizer(tokenizer=self.tokenizer, vocab=self.vocab,

0 commit comments

Comments
 (0)