Skip to content

Commit d8f30da

Browse files
author
yuyang18
committed
Add PyDataProvider2 DataConverter to swig api.
* fix recommendation prediction also. ISSUE=4561941 git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1449 1ad973e4-5ce8-4261-8a94-b56d1f490c56
1 parent 8fe4a33 commit d8f30da

File tree

7 files changed

+60
-27
lines changed

7 files changed

+60
-27
lines changed

demo/recommendation/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ data/train.list
77
data/test.list
88
dataprovider_copy_1.py
99
*.pyc
10+
output

demo/recommendation/prediction.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
from py_paddle import swig_paddle, DataProviderWrapperConverter
16+
from py_paddle import swig_paddle, DataProviderConverter
1717

1818
from common_utils import *
1919
from paddle.trainer.config_parser import parse_config
@@ -31,11 +31,11 @@
3131
network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
3232
assert isinstance(network, swig_paddle.GradientMachine)
3333
network.loadParameters(model_path)
34-
with open('meta.bin', 'rb') as f:
34+
with open('./data/meta.bin', 'rb') as f:
3535
meta = pickle.load(f)
3636
headers = list(meta_to_header(meta, 'movie'))
3737
headers.extend(list(meta_to_header(meta, 'user')))
38-
cvt = DataProviderWrapperConverter(True, map(lambda x: x[1], headers))
38+
cvt = DataProviderConverter(headers)
3939
while True:
4040
movie_id = int(raw_input("Input movie_id: "))
4141
user_id = int(raw_input("Input user_id: "))
@@ -45,7 +45,5 @@
4545
data.extend(movie_meta)
4646
data.append(user_id - 1)
4747
data.extend(user_meta)
48-
data = map(lambda (header, val): val if header[0] else [val],
49-
zip(headers, data))
50-
print "Prediction Score is %.2f" % ((network.forwardTest(cvt([
51-
data]))[0]['value'][0][0] + 5) / 2)
48+
print "Prediction Score is %.2f" % ((network.forwardTest(
49+
cvt.convert([data]))[0]['value'][0][0] + 5) / 2)

demo/recommendation/trainer_config.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,7 @@ def construct_feature(name):
6161
slot_dim = each_meta['max']
6262
embedding = embedding_layer(input=data_layer(slot_name,
6363
size=slot_dim),
64-
size=256,
65-
param_attr=ParamAttr(
66-
sparse_update=True))
64+
size=256)
6765
fusion.append(fc_layer(input=embedding,
6866
size=256))
6967
elif type_name == 'embedding':

paddle/api/Arguments.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,23 @@ static inline void doCopyFromSafely(std::shared_ptr<T1>& dest,
102102
IVector* Arguments::getSlotSequenceStartPositions(size_t idx) const
103103
throw(RangeError) {
104104
auto& a = m->getArg(idx);
105-
return IVector::createByPaddleVectorPtr(
106-
&a.sequenceStartPositions->getMutableVector(false));
105+
if (a.sequenceStartPositions) {
106+
return IVector::createByPaddleVectorPtr(
107+
&a.sequenceStartPositions->getMutableVector(false));
108+
} else {
109+
return nullptr;
110+
}
111+
}
112+
113+
IVector*Arguments::getSlotSubSequenceStartPositions(size_t idx) const
114+
throw (RangeError){
115+
auto& a = m->getArg(idx);
116+
if (a.subSequenceStartPositions) {
117+
return IVector::createByPaddleVectorPtr(
118+
&a.subSequenceStartPositions->getMutableVector(false));
119+
} else {
120+
return nullptr;
121+
}
107122
}
108123

109124
void Arguments::setSlotSequenceStartPositions(size_t idx,
@@ -113,6 +128,13 @@ void Arguments::setSlotSequenceStartPositions(size_t idx,
113128
a.sequenceStartPositions = std::make_shared<paddle::ICpuGpuVector>(v);
114129
}
115130

131+
void Arguments::setSlotSubSequenceStartPositions(
132+
size_t idx, IVector *vec) throw (RangeError) {
133+
auto& a = m->getArg(idx);
134+
auto& v = m->cast<paddle::IVector>(vec->getSharedPtr());
135+
a.subSequenceStartPositions = std::make_shared<paddle::ICpuGpuVector>(v);
136+
}
137+
116138
IVector* Arguments::getSlotSequenceDim(size_t idx) const throw(RangeError) {
117139
auto& a = m->getArg(idx);
118140
return IVector::createByPaddleVectorPtr(&a.cpuSequenceDims);

paddle/api/PaddleAPI.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,7 @@ class Arguments {
374374
IVector* getSlotIds(size_t idx) const throw(RangeError);
375375
Matrix* getSlotIn(size_t idx) const throw(RangeError);
376376
IVector* getSlotSequenceStartPositions(size_t idx) const throw(RangeError);
377+
IVector* getSlotSubSequenceStartPositions(size_t idx) const throw(RangeError);
377378
IVector* getSlotSequenceDim(size_t idx) const throw(RangeError);
378379
// End Of get functions of Arguments
379380

@@ -390,6 +391,8 @@ class Arguments {
390391
void setSlotIds(size_t idx, IVector* vec) throw(RangeError);
391392
void setSlotSequenceStartPositions(size_t idx,
392393
IVector* vec) throw(RangeError);
394+
void setSlotSubSequenceStartPositions(size_t idx,
395+
IVector* vec) throw (RangeError);
393396
void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError);
394397

395398
private:

paddle/py_paddle/__init__.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import swig_paddle
16-
import util
1715
from util import DataProviderWrapperConverter
16+
from dataprovider_converter import DataProviderConverter
1817

19-
__all__ = ['paddle', 'DataProviderWrapperConverter', 'loadParameterFile']
20-
18+
__all__ = ['paddle',
19+
'DataProviderConverter',
20+
'DataProviderWrapperConverter', # for deprecated usage.
21+
'loadParameterFile']
2122
util.monkeypatches()
23+

paddle/py_paddle/util.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def getLayerOutputs(self, layerNames):
209209

210210
swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs
211211

212+
212213
def loadGradientMachine(config_filename, model_dir=None):
213214
"""
214215
Load a gradient machine from config file name/path.
@@ -229,6 +230,7 @@ def loadGradientMachine(config_filename, model_dir=None):
229230
network.loadParameters(model_dir)
230231
return network
231232

233+
232234
def loadParameterFile(fn):
233235
"""
234236
Load Paddle Parameter file to numpy.ndarray
@@ -257,6 +259,7 @@ def loadParameterFile(fn):
257259
else:
258260
raise swig_paddle.UnsupportError()
259261

262+
260263
class DataProviderWrapperConverter(object):
261264
"""
262265
A class convert DataFormat from PyDataProvider Wrapper to
@@ -278,8 +281,8 @@ def append(self, other):
278281

279282
def __call__(self, slot_idx, arg):
280283
mat = swig_paddle.Matrix.createDense(self.buf,
281-
len(self.buf) / self.__dim__,
282-
self.__dim__)
284+
len(self.buf) / self.__dim__,
285+
self.__dim__)
283286
arg.setSlotValue(slot_idx, mat)
284287

285288
class IdValueConverter(object):
@@ -312,8 +315,9 @@ def append(self, other):
312315
self.cols += other
313316

314317
def __call__(self, slot_idx, arg):
315-
mat = swig_paddle.Matrix.createSparse(len(self.indices) - 1, self.dim,
316-
len(self.cols), True)
318+
mat = swig_paddle.Matrix.createSparse(len(self.indices) - 1,
319+
self.dim,
320+
len(self.cols), True)
317321
assert isinstance(mat, swig_paddle.Matrix)
318322
mat.sparseCopyFrom(self.indices, self.cols)
319323
self.putIntoArg(slot_idx, arg, mat)
@@ -337,8 +341,9 @@ def append(self, other):
337341
self.values += map(lambda x: x[1], other)
338342

339343
def __call__(self, slot_idx, arg):
340-
mat = swig_paddle.Matrix.createSparse(len(self.indices) - 1, self.dim,
341-
len(self.cols), False)
344+
mat = swig_paddle.Matrix.createSparse(len(self.indices) - 1,
345+
self.dim,
346+
len(self.cols), False)
342347
assert isinstance(mat, swig_paddle.Matrix)
343348
mat.sparseCopyFrom(self.indices, self.cols, self.values)
344349
self.putIntoArg(slot_idx, arg, mat)
@@ -373,7 +378,7 @@ def convert(self, wrapper_data, argument=None):
373378
"""
374379
if argument is None:
375380
argument = swig_paddle.Arguments.createArguments(0)
376-
assert isinstance(argument,swig_paddle.Arguments)
381+
assert isinstance(argument, swig_paddle.Arguments)
377382
argument.resize(len(self.__header__))
378383

379384
values = map(lambda x:
@@ -394,10 +399,12 @@ def convert(self, wrapper_data, argument=None):
394399
seq_dim[slot_idx].append(len(sequence))
395400

396401
for slot_idx in xrange(len(self.__header__)):
397-
argument.setSlotSequenceDim(slot_idx, swig_paddle.IVector.create(
398-
seq_dim[slot_idx]))
402+
argument.setSlotSequenceDim(slot_idx,
403+
swig_paddle.IVector.create(
404+
seq_dim[slot_idx]))
399405
argument.setSlotSequenceStartPositions(
400-
slot_idx, swig_paddle.IVector.create(seq_start_pos[slot_idx]))
406+
slot_idx,
407+
swig_paddle.IVector.create(seq_start_pos[slot_idx]))
401408
else:
402409
for each_sample in wrapper_data:
403410
for raw_data, value in zip(each_sample, values):
@@ -415,6 +422,7 @@ def __call__(self, wrapper_data, argument=None):
415422
return self.convert(wrapper_data, argument)
416423

417424

425+
418426
def __monkey_patch_protobuf_objects__():
419427
def ParameterConfig_toProto(self):
420428
"""
@@ -451,7 +459,8 @@ def OptimizationConfig_createFromProto(protoObj):
451459
:return: paddle.OptimizationConfig
452460
"""
453461

454-
assert isinstance(protoObj, paddle.proto.TrainerConfig_pb2.OptimizationConfig)
462+
assert isinstance(protoObj,
463+
paddle.proto.TrainerConfig_pb2.OptimizationConfig)
455464
return swig_paddle.OptimizationConfig.createFromProtoString(
456465
protoObj.SerializeToString())
457466

0 commit comments

Comments
 (0)