diff --git a/.travis.yml b/.travis.yml index a502794..dd7aa64 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,7 @@ language: python python: - "2.7" + - "3.4" sudo: false @@ -24,6 +25,8 @@ before_install: - pip install cython - pip install numpy - travis_wait pip install scipy + # setup.py import mrec and install + - pip install six>=1.9 # This is a library, not an application. # So we do not have a requirements.txt diff --git a/doc/conf.py b/doc/conf.py index e2b3948..bb45d0f 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -11,6 +11,8 @@ # All configuration values have a default; values that are commented out # serve to show the default. +from __future__ import print_function + import sys, os # If extensions (or modules to document with autodoc) are in another directory, @@ -51,10 +53,10 @@ try: release = pkg_resources.get_distribution('mrec').version except pkg_resources.DistributionNotFound: - print 'To build the documentation, The distribution information of mrec' - print 'has to be available. Either install the package into your' - print 'development environment or run "python setup.py develop" to setup' - print 'the metadata.' + print('To build the documentation, The distribution information of mrec') + print('has to be available. Either install the package into your') + print('development environment or run "python setup.py develop" to setup') + print('the metadata.') sys.exit(1) del pkg_resources version = '.'.join(release.split('.')[:2]) diff --git a/doc/hybrid.rst b/doc/hybrid.rst index 8cc7509..c3f1773 100644 --- a/doc/hybrid.rst +++ b/doc/hybrid.rst @@ -39,7 +39,7 @@ The resulting features are simply `tf-idf counts >> for i in xrange(3): + >>> for i in range(3): ... for tfidf,word in sorted(zip(features[i].data,features[i].indices),reverse=True)[:3]: ... print '{0}\t{1}\t{2:.3f}'.format(i,word,tfidf) ... diff --git a/mrec/__init__.py b/mrec/__init__.py index dece6e3..22b48cb 100644 --- a/mrec/__init__.py +++ b/mrec/__init__.py @@ -1,4 +1,8 @@ -from itertools import izip +from __future__ import absolute_import, print_function +try: + from itertools import izip as zip +except ImportError: + pass import numpy as np from scipy.sparse import coo_matrix, csr_matrix from scipy.io import mmread, mmwrite @@ -7,8 +11,8 @@ except ImportError: import pickle -from sparse import fast_sparse_matrix, loadtxt, loadz, savez -from base_recommender import BaseRecommender +from .sparse import fast_sparse_matrix, loadtxt, loadz, savez +from .base_recommender import BaseRecommender __version__ = '0.3.1' @@ -89,13 +93,13 @@ def save_sparse_matrix(data,fmt,filepath): if fmt == 'tsv': m = data.tocoo() with open(filepath,'w') as out: - for u,i,v in izip(m.row,m.col,m.data): - print >>out,'{0}\t{1}\t{2}'.format(u+1,i+1,v) + for u,i,v in zip(m.row,m.col,m.data): + print('{0}\t{1}\t{2}'.format(u+1,i+1,v), file=out) elif fmt == 'csv': m = data.tocoo() with open(filepath,'w') as out: - for u,i,v in izip(m.row,m.col,m.data): - print >>out,'{0},{1},{2}'.format(u+1,i+1,v) + for u,i,v in zip(m.row,m.col,m.data): + print('{0},{1},{2}'.format(u+1,i+1,v), file=out) elif fmt == 'mm': mmwrite(filepath,data) elif fmt == 'npz': diff --git a/mrec/base_recommender.py b/mrec/base_recommender.py index ef5333f..3f843fb 100644 --- a/mrec/base_recommender.py +++ b/mrec/base_recommender.py @@ -1,7 +1,9 @@ +from __future__ import print_function try: import cPickle as pickle except ImportError: import pickle +from six.moves import xrange import numpy as np from scipy.sparse import csr_matrix @@ -86,7 +88,7 @@ def save(self,filepath): if archive: np.savez(filepath,**archive) else: - pickle.dump(self,open(filepath,'w')) + pickle.dump(self,open(filepath,'wb')) def _create_archive(self): """ @@ -117,7 +119,7 @@ def load(filepath): if isinstance(r,BaseRecommender): model = r else: - model = np.loads(str(r['model'])) + model = np.loads(r['model']) model._load_archive(r) # restore any fields serialized separately return model @@ -148,7 +150,7 @@ def read_recommender_description(filepath): if isinstance(r,BaseRecommender): model = r else: - model = np.loads(str(r['model'])) + model = np.loads(r['model']) return str(model) def __str__(self): @@ -192,10 +194,10 @@ def batch_recommend_items(self, recs = [] for u in xrange(self.num_users): if show_progress and u%1000 == 0: - print u,'..', + print(u, '..',) recs.append(self.recommend_items(dataset,u,max_items,return_scores)) if show_progress: - print + print() return recs def range_recommend_items(self, diff --git a/mrec/evaluation/metrics.py b/mrec/evaluation/metrics.py index ec5a787..d6c76d9 100644 --- a/mrec/evaluation/metrics.py +++ b/mrec/evaluation/metrics.py @@ -3,7 +3,8 @@ * with hit rate, following e.g. Karypis lab SLIM and FISM papers * with prec@k and MRR """ - +from __future__ import print_function +from six.moves import xrange import numpy as np from scipy import stats from collections import defaultdict @@ -62,8 +63,8 @@ def run_evaluation(models,retrain,get_split,num_runs,evaluation_func): for i,model in enumerate(models): retrain(model,train) run_metrics = evaluation_func(model,train,users,test) - for m,val in run_metrics.iteritems(): - print m,val + for m,val in run_metrics.items(): + print(m, val) metrics[i][m].append(val) return metrics @@ -83,10 +84,10 @@ def sort_metrics_by_name(names): prefix2val[name].append(val) else: prefix2val[name] = [] - for name,vals in prefix2val.iteritems(): + for name,vals in prefix2val.items(): prefix2val[name] = sorted(vals) ret = [] - for name,vals in sorted(prefix2val.iteritems()): + for name,vals in sorted(prefix2val.items()): if vals: for val in vals: ret.append('{0}@{1}'.format(name,val)) @@ -99,15 +100,15 @@ def print_report(models,metrics): Call this to print out the metrics returned by run_evaluation(). """ for model,results in zip(models,metrics): - print model + print(model) if hasattr(model,'similarity_matrix'): nnz = model.similarity_matrix.nnz num_items = model.similarity_matrix.shape[0] density = float(model.similarity_matrix.nnz)/num_items**2 - print 'similarity matrix nnz = {0} (density {1:.3f})'.format(nnz,density) + print('similarity matrix nnz = {0} (density {1:.3f})'.format(nnz,density)) for m in sort_metrics_by_name(results.keys()): vals = results[m] - print '{0}{1:.4f} +/- {2:.4f}'.format(m.ljust(15),np.mean(vals),stats.sem(vals,ddof=0)) + print('{0}{1:.4f} +/- {2:.4f}'.format(m.ljust(15),np.mean(vals),stats.sem(vals,ddof=0))) def evaluate(model,train,users,get_known_items,compute_metrics): avg_metrics = defaultdict(float) @@ -116,7 +117,7 @@ def evaluate(model,train,users,get_known_items,compute_metrics): recommended = [r for r,_ in model.recommend_items(train,u,max_items=20)] metrics = compute_metrics(recommended,get_known_items(u)) if metrics: - for m,val in metrics.iteritems(): + for m,val in metrics.items(): avg_metrics[m] += val count += 1 for m in avg_metrics: diff --git a/mrec/evaluation/tests/test_metrics.py b/mrec/evaluation/tests/test_metrics.py index d0b9bab..bc71854 100644 --- a/mrec/evaluation/tests/test_metrics.py +++ b/mrec/evaluation/tests/test_metrics.py @@ -1,3 +1,4 @@ +from six.moves import xrange from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises diff --git a/mrec/examples/convert.py b/mrec/examples/convert.py index 53442f5..9993452 100644 --- a/mrec/examples/convert.py +++ b/mrec/examples/convert.py @@ -1,6 +1,7 @@ """ Convert sparse matrix from one file format to another. """ +from __future__ import print_function import os import subprocess @@ -18,8 +19,8 @@ def tsv2mtx(infile,outfile): nnz += 1 headerfile = outfile+'.header' with open(headerfile,'w') as header: - print >>header,'%%MatrixMarket matrix coordinate real general' - print >>header,'{0} {1} {2}'.format(num_users,num_items,nnz) + print('%%MatrixMarket matrix coordinate real general', file=header) + print('{0} {1} {2}'.format(num_users,num_items,nnz), file=header) subprocess.check_call(['cat',headerfile,infile],stdout=open(outfile,'w')) subprocess.check_call(['rm',headerfile]) diff --git a/mrec/examples/predict.py b/mrec/examples/predict.py index 6b8ab1d..2b55d9b 100644 --- a/mrec/examples/predict.py +++ b/mrec/examples/predict.py @@ -19,6 +19,7 @@ from shutil import rmtree import logging from collections import defaultdict +from six.moves import xrange from mrec import load_sparse_matrix, read_recommender_description, load_recommender from mrec.parallel import predict @@ -82,7 +83,7 @@ def process(view,opts,modelfile,trainfile,testfile,featurefile,outdir,evaluator) tot_count = 0 for results in processed: for cum_metrics,count in results: - for m,val in cum_metrics.iteritems(): + for m,val in cum_metrics.items(): avg_metrics[m] += val tot_count += count for m in avg_metrics: diff --git a/mrec/examples/prepare.py b/mrec/examples/prepare.py index a7fe6e0..3158927 100644 --- a/mrec/examples/prepare.py +++ b/mrec/examples/prepare.py @@ -1,3 +1,6 @@ +from __future__ import print_function +from six.moves import xrange + class Processor(object): def __init__(self,splitter,parser,min_items_per_user,preprocess=None): @@ -8,7 +11,7 @@ def __init__(self,splitter,parser,min_items_per_user,preprocess=None): def output(self,user,vals,outfile): for v,c in vals: - print >>outfile,'{0}\t{1}\t{2}'.format(user,v,c) + print('{0}\t{1}\t{2}'.format(user,v,c), file=outfile) def handle(self,user,vals): if len(vals) >= self.min_items_per_user: diff --git a/mrec/examples/tune_slim.py b/mrec/examples/tune_slim.py index 45a9762..aaa2eaf 100644 --- a/mrec/examples/tune_slim.py +++ b/mrec/examples/tune_slim.py @@ -2,7 +2,9 @@ Try to find a sensible range for regularization constants for SLIM by looking at model sparsity. """ +from __future__ import print_function +from six.moves import xrange import random from math import log10 import logging @@ -91,12 +93,12 @@ def main(): if candidates: best = min(candidates,key=itemgetter(1)) - print 'best parameter setting: {0}'.format(best[0]) - print 'mean # positive similarity weights per item = {0:.3}'.format(best[1]) - print 'proportion of items with fewer than {0} positive similarity weights = {1:.3}'.format(opts.min_sims,best[2]) - print 'mean # negative similarity weights per item = {0:.3}'.format(best[3]) + print('best parameter setting: {0}'.format(best[0])) + print('mean # positive similarity weights per item = {0:.3}'.format(best[1])) + print('proportion of items with fewer than {0} positive similarity weights = {1:.3}'.format(opts.min_sims,best[2])) + print('mean # negative similarity weights per item = {0:.3}'.format(best[3])) else: - print 'no parameter settings satisfied the conditions, try increasing --min_sims, --max_sims or --max_sparse' + print('no parameter settings satisfied the conditions, try increasing --min_sims, --max_sims or --max_sparse') if __name__ == '__main__': main() diff --git a/mrec/item_similarity/knn.py b/mrec/item_similarity/knn.py index 542dbda..eae5674 100644 --- a/mrec/item_similarity/knn.py +++ b/mrec/item_similarity/knn.py @@ -2,10 +2,12 @@ Brute-force k-nearest neighbour recommenders intended to provide evaluation baselines. """ +from __future__ import absolute_import, print_function +from six.moves import xrange import numpy as np from sklearn.metrics.pairwise import cosine_similarity -from recommender import ItemSimilarityRecommender +from mrec.item_similarity.recommender import ItemSimilarityRecommender class KNNRecommender(ItemSimilarityRecommender): """ @@ -79,12 +81,12 @@ def __str__(self): # use knn models like this: import random - import StringIO + from io import BytesIO from mrec import load_fast_sparse_matrix random.seed(0) - print 'loading test data...' + print('loading test data...') data = """\ %%MatrixMarket matrix coordinate real general 3 5 9 @@ -98,8 +100,8 @@ def __str__(self): 3 3 1 3 4 1 """ - print data - dataset = load_fast_sparse_matrix('mm',StringIO.StringIO(data)) + print(data) + dataset = load_fast_sparse_matrix('mm', BytesIO(data.encode('ascii'))) num_users,num_items = dataset.shape model = CosineKNNRecommender(k=2) @@ -108,32 +110,32 @@ def __str__(self): def output(i,j,val): # convert back to 1-indexed - print '{0}\t{1}\t{2:.3f}'.format(i+1,j+1,val) + print('{0}\t{1}\t{2:.3f}'.format(i+1,j+1,val)) - print 'computing some item similarities...' - print 'item\tsim\tweight' + print('computing some item similarities...') + print('item\tsim\tweight') # if we want we can compute these individually without calling fit() for i in random.sample(xrange(num_items),num_samples): for j,weight in model.get_similar_items(i,max_similar_items=2,dataset=dataset): output(i,j,weight) - print 'learning entire similarity matrix...' + print('learning entire similarity matrix...') # more usually we just call train() on the entire dataset model = CosineKNNRecommender(k=2) model.fit(dataset) - print 'making some recommendations...' - print 'user\trec\tscore' + print('making some recommendations...') + print('user\trec\tscore') for u in random.sample(xrange(num_users),num_samples): for i,score in model.recommend_items(dataset.X,u,max_items=10): output(u,i,score) - print 'making batch recommendations...' + print('making batch recommendations...') recs = model.batch_recommend_items(dataset.X) for u in xrange(num_users): for i,score in recs[u]: output(u,i,score) - print 'making range recommendations...' + print('making range recommendations...') for start,end in [(0,2),(2,3)]: recs = model.range_recommend_items(dataset.X,start,end) for u in xrange(start,end): diff --git a/mrec/item_similarity/precomputed.py b/mrec/item_similarity/precomputed.py index f083434..d59e869 100644 --- a/mrec/item_similarity/precomputed.py +++ b/mrec/item_similarity/precomputed.py @@ -1,8 +1,8 @@ """ Make recommendations from a precomputed item similarity matrix. """ - -from recommender import ItemSimilarityRecommender +from __future__ import absolute_import +from .recommender import ItemSimilarityRecommender class PrecomputedItemSimilarityRecommender(ItemSimilarityRecommender): """ diff --git a/mrec/item_similarity/recommender.py b/mrec/item_similarity/recommender.py index 4199b5d..4c6f729 100644 --- a/mrec/item_similarity/recommender.py +++ b/mrec/item_similarity/recommender.py @@ -1,13 +1,17 @@ """ Base class for item similarity recommenders. """ - +from __future__ import print_function +from six.moves import xrange try: import cPickle as pickle except ImportError: import pickle import numpy as np -from itertools import izip +try: + from itertools import izip as zip +except ImportError: + pass from operator import itemgetter from scipy.sparse import csr_matrix, coo_matrix @@ -304,12 +308,12 @@ def _get_recommendations_from_predictions(self,r,dataset,user_start,user_end,max for u in xrange(user_start,user_end): ux = u - user_start if show_progress and ux%1000 == 0: - print ux,'..', + print(ux, '..',) ru = r[ux,:] if return_scores: - recs[ux] = [(i,v) for v,i in sorted(izip(ru.data,ru.indices),reverse=True) if v > 0][:max_items] + recs[ux] = [(i,v) for v,i in sorted(zip(ru.data,ru.indices),reverse=True) if v > 0][:max_items] else: - recs[ux] = [i for v,i in sorted(izip(ru.data,ru.indices),reverse=True) if v > 0][:max_items] + recs[ux] = [i for v,i in sorted(zip(ru.data,ru.indices),reverse=True) if v > 0][:max_items] if show_progress: - print + print() return recs diff --git a/mrec/item_similarity/slim.py b/mrec/item_similarity/slim.py index 2cf698a..8f13c9c 100644 --- a/mrec/item_similarity/slim.py +++ b/mrec/item_similarity/slim.py @@ -10,19 +10,15 @@ X. Ning and G. Karypis, ICDM 2011. http://glaros.dtc.umn.edu/gkhome/fetch/papers/SLIM2011icdm.pdf """ - +from __future__ import print_function, absolute_import +from distutils.version import LooseVersion +from six.moves import xrange from sklearn.linear_model import SGDRegressor, ElasticNet from sklearn.preprocessing import binarize import sklearn import numpy as np -from recommender import ItemSimilarityRecommender - - -def parse_version(version_string): - if '-' in version_string: - version_string = version_string.split('-', 1)[0] - return tuple(map(int, version_string.split('.'))) +from mrec.item_similarity.recommender import ItemSimilarityRecommender class NNFeatureSelectingSGDRegressor(object): @@ -77,7 +73,7 @@ def __init__(self, model='sgd'): alpha = l1_reg+l2_reg l1_ratio = l1_reg/alpha - if parse_version(sklearn.__version__) <= (0, 14, 1): + if LooseVersion(sklearn.__version__) <= LooseVersion('0.14.1'): # Backward compat: in old versions of scikit-learn l1_ratio had # the opposite sign... l1_ratio = (1 - l1_ratio) @@ -121,12 +117,12 @@ def __str__(self): # use SLIM like this: import random - import StringIO + from io import BytesIO from mrec import load_fast_sparse_matrix random.seed(0) - print 'loading test data...' + print('loading test data...') data = """\ %%MatrixMarket matrix coordinate real general 3 5 9 @@ -140,8 +136,8 @@ def __str__(self): 3 3 1 3 4 1 """ - print data - dataset = load_fast_sparse_matrix('mm',StringIO.StringIO(data)) + print(data) + dataset = load_fast_sparse_matrix('mm', BytesIO(data.encode('ascii'))) num_users,num_items = dataset.shape model = SLIM() @@ -150,32 +146,32 @@ def __str__(self): def output(i,j,val): # convert back to 1-indexed - print '{0}\t{1}\t{2:.3f}'.format(i+1,j+1,val) + print('{0}\t{1}\t{2:.3f}'.format(i+1,j+1,val)) - print 'computing some item similarities...' - print 'item\tsim\tweight' + print('computing some item similarities...') + print('item\tsim\tweight') # if we want we can compute these individually without calling fit() for i in random.sample(xrange(num_items),num_samples): for j,weight in model.get_similar_items(i,max_similar_items=10,dataset=dataset): output(i,j,weight) - print 'learning entire similarity matrix...' + print('learning entire similarity matrix...') # usually we'll call train() on the entire dataset model = SLIM() model.fit(dataset) - print 'making some recommendations...' - print 'user\trec\tscore' + print('making some recommendations...') + print('user\trec\tscore') for u in random.sample(xrange(num_users),num_samples): for i,score in model.recommend_items(dataset.X,u,max_items=10): output(u,i,score) - print 'making batch recommendations...' + print('making batch recommendations...') recs = model.batch_recommend_items(dataset.X) for u in xrange(num_users): for i,score in recs[u]: output(u,i,score) - print 'making range recommendations...' + print('making range recommendations...') for start,end in [(0,2),(2,3)]: recs = model.range_recommend_items(dataset.X,start,end) for u in xrange(start,end): diff --git a/mrec/mf/climf.py b/mrec/mf/climf.py index 61ba395..9552762 100644 --- a/mrec/mf/climf.py +++ b/mrec/mf/climf.py @@ -9,7 +9,8 @@ Yue Shi, Martha Larson, Alexandros Karatzoglou, Nuria Oliver, Linas Baltrunas, Alan Hanjalic ACM RecSys 2012 """ - +from __future__ import print_function +from six.moves import xrange from math import exp, log import random import numpy as np @@ -42,8 +43,8 @@ def fit(self,data): # TODO: create a validation set for iter in xrange(self.max_iters): - print 'iteration {0}:'.format(iter+1) - print 'objective = {0:.4f}'.format(self.objective(data)) + print('iteration {0}:'.format(iter+1)) + print('objective = {0:.4f}'.format(self.objective(data))) self.update(data) # TODO: compute MRR on validation set, terminate if appropriate @@ -137,8 +138,8 @@ def compute_mrr(self,data,test_users=None): found = True break if not found: - print 'fail, no relevant items predicted for test user {0}'.format(i+1) - print 'known items: {0}'.format(items) + print('fail, no relevant items predicted for test user {0}'.format(i+1)) + print('known items: {0}'.format(items)) assert(len(mrr) == len(test_users)) return np.mean(mrr) diff --git a/mrec/mf/evaluate.py b/mrec/mf/evaluate.py index 02c0794..2d331d6 100644 --- a/mrec/mf/evaluate.py +++ b/mrec/mf/evaluate.py @@ -1,3 +1,5 @@ +from __future__ import print_function + def retrain_recommender(model,dataset): model.fit(dataset.X) @@ -8,7 +10,7 @@ def retrain_recommender(model,dataset): except ImportError: from sklearn.grid_search import IterGrid as ParameterGrid from optparse import OptionParser - from warp import WARPMFRecommender + from .warp import WARPMFRecommender from mrec.evaluation.metrics import * @@ -22,7 +24,7 @@ def retrain_recommender(model,dataset): parser.print_help() raise SystemExit - print 'doing a grid search for regularization parameters...' + print('doing a grid search for regularization parameters...') params = {'d':[100],'gamma':[0.01],'C':[100],'max_iter':[100000],'validation_iters':[500]} models = [WARPMFRecommender(**a) for a in ParameterGrid(params)] @@ -31,6 +33,7 @@ def retrain_recommender(model,dataset): # load em both up # put them into something that returns train,test.keys(),test in a generator() # test is a dict id->[id,id,...] + pass if opts.main_split_dir: generate_main_metrics = generate_metrics(get_known_items_from_dict,compute_main_metrics) diff --git a/mrec/mf/model/warp.py b/mrec/mf/model/warp.py index 0465343..a59b35b 100644 --- a/mrec/mf/model/warp.py +++ b/mrec/mf/model/warp.py @@ -1,6 +1,11 @@ +from __future__ import print_function import numpy as np import random -from itertools import izip +from six.moves import xrange +try: + from itertools import izip as zip +except ImportError: + pass from mrec.evaluation import metrics @@ -192,13 +197,13 @@ def _fit(self,decomposition,updates,train,validation): tot_trials = 0 for it in xrange(self.max_iters): if it % self.validation_iters == 0: - print 'tot_trials',tot_trials + print('tot_trials',tot_trials) tot_trials = 0 prec = self.estimate_precision(decomposition,train,validation) precs.append(prec) - print '{0}: validation precision = {1:.3f}'.format(it,precs[-1]) + print('{0}: validation precision = {1:.3f}'.format(it,precs[-1])) if len(precs) > 3 and precs[-1] < precs[-2] and precs[-2] < precs[-3]: - print 'validation precision got worse twice, terminating' + print('validation precision got worse twice, terminating') break tot_trials += self.compute_updates(train,decomposition,updates) decomposition.apply_updates(updates,self.gamma,self.C) @@ -280,7 +285,7 @@ def estimate_precision(self,decomposition,train,validation,k=30): r = decomposition.reconstruct(rows) prec = 0 - for u,ru in izip(rows,r): + for u,ru in zip(rows,r): predicted = ru.argsort()[::-1][:k] if have_validation_set: actual = validation[u] diff --git a/mrec/mf/model/warp2.py b/mrec/mf/model/warp2.py index 66a5925..fba4407 100644 --- a/mrec/mf/model/warp2.py +++ b/mrec/mf/model/warp2.py @@ -2,8 +2,8 @@ import scipy import random -from warp import WARPBatchUpdate, WARPDecomposition, WARP -from warp_fast import warp2_sample +from .warp import WARPBatchUpdate, WARPDecomposition, WARP +from .warp_fast import warp2_sample class WARP2BatchUpdate(WARPBatchUpdate): """Collection of arrays to hold a batch of sgd updates.""" diff --git a/mrec/mf/model/warp_fast.pyx b/mrec/mf/model/warp_fast.pyx index e4b8417..8e1f7cf 100644 --- a/mrec/mf/model/warp_fast.pyx +++ b/mrec/mf/model/warp_fast.pyx @@ -121,7 +121,7 @@ cdef sample_violating_negative_example(np.ndarray[np.float_t,ndim=2] U, num_items = V.shape[0] r = U[u].dot(V[i]) - for N in xrange(1,max_trials): + for N in range(1,max_trials): # find j!=i s.t. data[u,j] < data[u,i] j = sample_negative_example(num_items,vals,indices,begin,end,ix) if r - U[u].dot(V[j]) < 1: @@ -166,7 +166,7 @@ cdef sample_negative_example(num_items, # sample item uniformly with replacement j = rand() % num_items found = 0 - for jx in xrange(begin,end): + for jx in range(begin,end): if indices[jx] == j: found = 1 break @@ -235,14 +235,14 @@ def apply_updates(np.ndarray[np.float_t,ndim=2] F, C : float The regularization constant. """ - + cdef unsigned int i, num cdef float p assert(rows.shape[0] == deltas.shape[0]) num = rows.shape[0] - for i in xrange(num): + for i in range(num): row = rows[i] delta = deltas[i] F[row] += gamma*delta @@ -379,7 +379,7 @@ cdef sample_violating_negative_example2(np.ndarray[np.float_t,ndim=2] U, XW = sparse_sdot(xbuf,W,X,i,is_sparse) r = U[u].dot(V[i] + XW) - for N in xrange(1,max_trials): + for N in range(1,max_trials): # find j!=i s.t. data[u,j] < data[u,i] j = sample_negative_example(num_items,vals,indices,begin,end,ix) XW = sparse_sdot(xbuf,W,X,j,is_sparse) @@ -399,10 +399,10 @@ cdef sparse_sdot(np.ndarray[np.float_t,ndim=1] xbuf, if is_sparse: # TODO: surely there's something built in to do this... - for ix in xrange(X.indptr[i],X.indptr[i+1]): + for ix in range(X.indptr[i],X.indptr[i+1]): xbuf[X.indices[ix]] = X.data[ix] XW = xbuf.dot(W) - for ix in xrange(X.indptr[i],X.indptr[i+1]): + for ix in range(X.indptr[i],X.indptr[i+1]): xbuf[X.indices[ix]] = 0 else: XW = X[i].dot(W) diff --git a/mrec/mf/recommender.py b/mrec/mf/recommender.py index f7e422c..2a029a2 100644 --- a/mrec/mf/recommender.py +++ b/mrec/mf/recommender.py @@ -2,13 +2,14 @@ Base class for recommenders that work by matrix factorization. """ +from __future__ import print_function +from six.moves import xrange try: import cPickle as pickle except ImportError: import pickle import numpy as np -from itertools import izip from scipy.sparse import csr_matrix from mrec.base_recommender import BaseRecommender @@ -252,12 +253,12 @@ def _get_recommendations_from_predictions(self, for u in xrange(user_start,user_end): ux = u - user_start if show_progress and ux%1000 == 0: - print ux,'..', + print(ux,'..',) ru = r[ux] if return_scores: recs[ux] = [(i,ru[i]) for i in ru.argsort()[::-1] if ru[i] > 0][:max_items] else: recs[ux] = [i for i in ru.argsort()[::-1] if ru[i] > 0][:max_items] if show_progress: - print + print() return recs diff --git a/mrec/mf/warp.py b/mrec/mf/warp.py index 94b0346..4f69c7a 100644 --- a/mrec/mf/warp.py +++ b/mrec/mf/warp.py @@ -1,10 +1,12 @@ +from __future__ import print_function import numpy as np import random +from six.moves import xrange from mrec.evaluation import metrics -from recommender import MatrixFactorizationRecommender -from model.warp import WARP +from .recommender import MatrixFactorizationRecommender +from .model.warp import WARP class WARPMFRecommender(MatrixFactorizationRecommender): """ @@ -81,9 +83,9 @@ def create_validation_set(self,train): # and reasonable number of validation cycles max_iters = 30*validation_iters - print num_validation_users,'validation users' - print validation_iters,'validation iters' - print max_iters,'max_iters' + print(num_validation_users,'validation users') + print(validation_iters,'validation iters') + print(max_iters,'max_iters') validation = dict() for u in xrange(num_validation_users): diff --git a/mrec/mf/warp2.py b/mrec/mf/warp2.py index 3e4be69..f177638 100644 --- a/mrec/mf/warp2.py +++ b/mrec/mf/warp2.py @@ -1,7 +1,8 @@ +from __future__ import absolute_import import numpy as np -from warp import WARPMFRecommender -from model.warp2 import WARP2 +from .warp import WARPMFRecommender +from .model.warp2 import WARP2 class WARP2MFRecommender(WARPMFRecommender): """ diff --git a/mrec/mf/wrmf.py b/mrec/mf/wrmf.py index 725b05c..1e40d85 100644 --- a/mrec/mf/wrmf.py +++ b/mrec/mf/wrmf.py @@ -7,6 +7,8 @@ R. Pan et al., One-class collaborative filtering, ICDM 2008. http://www.hpl.hp.com/techreports/2008/HPL-2008-48R1.pdf """ +from __future__ import print_function +from six.moves import xrange import numpy as np from scipy.sparse import csr_matrix @@ -62,7 +64,7 @@ def fit(self,train,item_features=None): self.U = self.init_factors(num_users,False) # don't need values, will compute them self.V = self.init_factors(num_items) for it in xrange(self.num_iters): - print 'iteration',it + print('iteration',it) # fit user factors VV = self.V.T.dot(self.V) for u in xrange(num_users): diff --git a/mrec/parallel/item_similarity.py b/mrec/parallel/item_similarity.py index 239912e..4670d6c 100644 --- a/mrec/parallel/item_similarity.py +++ b/mrec/parallel/item_similarity.py @@ -1,3 +1,4 @@ +from __future__ import print_function import math import glob import re @@ -5,6 +6,7 @@ import subprocess from shutil import rmtree import logging +from six.moves import xrange from mrec import load_sparse_matrix, save_recommender @@ -110,7 +112,7 @@ def process(task): for j in xrange(start,end): w = model.get_similar_items(j,max_similar_items=max_similar_items,dataset=dataset) for k,v in w: - print >>out,'{0}\t{1}\t{2}'.format(j+1,k+1,v) # write as 1-indexed + print('{0}\t{1}\t{2}'.format(j+1,k+1,v), file=out) # write as 1-indexed out.close() # record success diff --git a/mrec/parallel/predict.py b/mrec/parallel/predict.py index e9d5b40..0f3acb7 100644 --- a/mrec/parallel/predict.py +++ b/mrec/parallel/predict.py @@ -1,6 +1,8 @@ """ Prediction task to run on an ipython engine. """ +from __future__ import print_function +from six.moves import xrange def run(task): @@ -35,7 +37,7 @@ def run(task): recs = model.range_recommend_items(dataset,start,end,max_items=20,return_scores=True) for u,items in zip(xrange(start,end),recs): for i,w in items: - print >>out,'{0}\t{1}\t{2}'.format(u+1,i+1,w) # write as 1-indexed + print('{0}\t{1}\t{2}'.format(u+1,i+1,w), file=out) # write as 1-indexed out.close() # record success diff --git a/mrec/parallel/warp.py b/mrec/parallel/warp.py index 840ff56..fae407a 100644 --- a/mrec/parallel/warp.py +++ b/mrec/parallel/warp.py @@ -5,6 +5,7 @@ from shutil import rmtree import logging import numpy as np +from six.moves import xrange from mrec import save_recommender, load_recommender diff --git a/mrec/parallel/wrmf.py b/mrec/parallel/wrmf.py index e2d0fc5..60a0f63 100644 --- a/mrec/parallel/wrmf.py +++ b/mrec/parallel/wrmf.py @@ -5,6 +5,7 @@ from shutil import rmtree import math import numpy as np +from six.moves import xrange from mrec import load_sparse_matrix, save_recommender diff --git a/mrec/popularity.py b/mrec/popularity.py index 9c04ee2..bb5f30d 100644 --- a/mrec/popularity.py +++ b/mrec/popularity.py @@ -2,11 +2,12 @@ Trivial unpersonalized item popularity recommender intended to provide a baseline for evaluations. """ - +from __future__ import absolute_import +from six.moves import xrange import numpy as np -from base_recommender import BaseRecommender -from sparse import fast_sparse_matrix +from .base_recommender import BaseRecommender +from .sparse import fast_sparse_matrix class ItemPopularityRecommender(BaseRecommender): """ diff --git a/mrec/reranking_recommender.py b/mrec/reranking_recommender.py index ade5912..cf9e0fd 100644 --- a/mrec/reranking_recommender.py +++ b/mrec/reranking_recommender.py @@ -2,14 +2,14 @@ Recommender that gets candidates using an item similarity model and then reranks them using a matrix factorization model. """ - +from __future__ import absolute_import try: import cPickle as pickle except ImportError: import pickle import numpy as np -from base_recommender import BaseRecommender +from .base_recommender import BaseRecommender class RerankingRecommender(BaseRecommender): """ diff --git a/mrec/sparse.py b/mrec/sparse.py index b08541e..8cfa2cb 100644 --- a/mrec/sparse.py +++ b/mrec/sparse.py @@ -1,7 +1,7 @@ """ Sparse data structures and convenience methods to load sparse matrices from file. """ - +from six.moves import xrange import random import numpy as np from scipy.sparse import csr_matrix, coo_matrix diff --git a/mrec/testing.py b/mrec/testing.py index 75c5945..e438f8f 100644 --- a/mrec/testing.py +++ b/mrec/testing.py @@ -1,3 +1,4 @@ +from six.moves import xrange import random import numpy as np from scipy.sparse import coo_matrix diff --git a/mrec/tests/test_base_recommender.py b/mrec/tests/test_base_recommender.py index a75dea9..3ccff83 100644 --- a/mrec/tests/test_base_recommender.py +++ b/mrec/tests/test_base_recommender.py @@ -1,3 +1,4 @@ +from six.moves import xrange try: import cPickle as pickle except ImportError: diff --git a/mrec/tests/test_sparse.py b/mrec/tests/test_sparse.py index 1b5f931..3203cb1 100644 --- a/mrec/tests/test_sparse.py +++ b/mrec/tests/test_sparse.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from six.moves import xrange import tempfile import os from sklearn.utils.testing import assert_equal @@ -16,7 +18,7 @@ def test_loadtxt(): f,path = tempfile.mkstemp(suffix='.npz') with open(path,'w') as f: for i,j,v in zip(X.row,X.col,X.data): - print >>f,'{0}\t{1}\t{2}'.format(i+1,j+1,v) + print('{0}\t{1}\t{2}'.format(i+1,j+1,v), file=f) Y = loadtxt(path) os.remove(path) assert_sparse_matrix_equal(X,Y) diff --git a/setup.py b/setup.py index 2447a0f..ecf2616 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,8 @@ 'scikit-learn', 'ipython <= 4.0.0', 'cython', - 'psutil'], + 'psutil', + 'six>=1.9'], entry_points={ 'console_scripts':[ 'mrec_prepare = mrec.examples.prepare:main',