Skip to content

Commit f6348e7

Browse files
author
Mark Hall
committed
A number of bug fixes/improvements. 1. Moved to CE weka-stable 3.8.5-snapshot. 2. Fixed a bug in getFields() that could affect the ordering of class label-specific evaluation metrics. 3. Fixed a bug in PMI scoring that would cause an NPE when there was no incoming rows to score. 4. Fixed a bug, inadvertently introduced in the 1.5 release, that prevented scheme configuration sub-dialogs from displaying correctly. 5. Fixed a bug that prevented the clearing of options for a given scheme when moving from the implementation in one engine (with options) to an implementation in another engine (with no user options) - mainly affected switching from non-Weka naive Bayes multinomial to Weka's naive Bayes multinomial. 6. Fixed a bug that resulted in the Keras engine being an option for support vector classifier, logistic regression and linear regression.
1 parent a421b64 commit f6348e7

File tree

9 files changed

+104
-86
lines changed

9 files changed

+104
-86
lines changed

assembly.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
<outputDirectory>lib</outputDirectory>
3030
<includes>
3131
<include>kfKettle-${weka.kfkettle.version}.jar</include>
32-
<include>pdm-ce-${weka.version}.jar</include>
32+
<include>weka-stable-${weka.version}.jar</include>
3333
<include>timeseriesForecasting-${weka.timeseries.version}.jar</include>
3434
<include>xpp3_min-${xpp-min.version}.jar</include>
3535
</includes>

lib/weka-stable-3.8.5.jar

12 MB
Binary file not shown.

pom.xml

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
<maven.compiler.target>1.8</maven.compiler.target>
1313
<maven.compiler.source>1.8</maven.compiler.source>
1414
<kettle.version>8.1.0.0-SNAPSHOT</kettle.version>
15-
<weka.version>3.8.3.1</weka.version>
15+
<!-- <weka.version>3.8.3.1</weka.version> -->
16+
<weka.version>3.8.5</weka.version>
1617
<weka.timeseries.version>1.0.25</weka.timeseries.version>
1718
<weka.kfkettle.version>1.0.5</weka.kfkettle.version>
1819
<xpp-min.version>1.1.3.4.O</xpp-min.version>
@@ -74,15 +75,21 @@
7475
<version>${weka.version}</version>
7576
</dependency> -->
7677

78+
<!-- <dependency>
79+
<groupId>nz.ac.waikato.cms.weka</groupId>
80+
<artifactId>weka-stable</artifactId>
81+
<version>${weka.version}</version>
82+
</dependency> -->
83+
7784
<!-- Bit of a hack here (through including a local version of this).
7885
Unfortunately, changes to 3.8.1.1 in svn are not being picked up
7986
in Pentaho builds of Weka/PDM 3.8.1.1 for some reason -->
8087
<dependency>
81-
<groupId>pdm-ce</groupId>
82-
<artifactId>pdm-ce</artifactId>
88+
<groupId>nz.ac.waikato.cms.weka</groupId>
89+
<artifactId>weka-stable</artifactId>
8390
<version>${weka.version}</version>
8491
<scope>system</scope>
85-
<systemPath>${basedir}/lib/pdm-ce-${weka.version}.jar</systemPath>
92+
<systemPath>${basedir}/lib/weka-stable-${weka.version}.jar</systemPath>
8693
</dependency>
8794

8895
<dependency>

src/main/java/org/pentaho/di/trans/steps/pmi/BaseSupervisedPMIStepData.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,9 +1237,10 @@ protected static void establishOutputRowMeta( RowMetaInterface outRowMeta, Varia
12371237
if ( stepMeta.getOutputIRMetrics() ) {
12381238
String classLabels = classArffMeta.getNominalVals();
12391239
if ( !Const.isEmpty( classLabels ) ) {
1240-
TreeSet<String> ts = new TreeSet<>( ArffMeta.stringToVals( classLabels ) );
1240+
// TreeSet<String> ts = new TreeSet<>( ArffMeta.stringToVals( classLabels ) );
1241+
ArrayList<String> preOrdered = new ArrayList<>( ArffMeta.stringToVals( classLabels ) );
12411242
//String[] labels = classLabels.split( "," );
1242-
for ( String label : ts ) {
1243+
for ( String label : preOrdered ) {
12431244
label = label.trim();
12441245
vm =
12451246
ValueMetaFactory
@@ -1283,9 +1284,10 @@ protected static void establishOutputRowMeta( RowMetaInterface outRowMeta, Varia
12831284
if ( stepMeta.getOutputAUCMetrics() ) {
12841285
String classLabels = classArffMeta.getNominalVals();
12851286
if ( !Const.isEmpty( classLabels ) ) {
1286-
TreeSet<String> ts = new TreeSet<>( ArffMeta.stringToVals( classLabels ) );
1287+
//TreeSet<String> ts = new TreeSet<>( ArffMeta.stringToVals( classLabels ) );
12871288
// String[] labels = classLabels.split( "," );
1288-
for ( String label : ts ) {
1289+
ArrayList<String> preOrdered = new ArrayList<>( ArffMeta.stringToVals( classLabels ) );
1290+
for ( String label : preOrdered ) {
12891291
label = label.trim();
12901292

12911293
vm =

src/main/java/org/pentaho/di/trans/steps/pmi/PMILifecycleListener.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ public class PMILifecycleListener implements KettleLifecycleListener {
4242

4343
// TODO replace this by some code that somehow locates the pdm jar file in plugins/steps/pmi/lib
4444
// This allows the Spark engine to locate the main weka.jar file for use in the Spark execution environment
45-
System.setProperty( "weka.jar.filename", "pdm-ce-3.8.3.1.jar" );
45+
//System.setProperty( "weka.jar.filename", "pdm-ce-3.8.3.1.jar" );
46+
System.setProperty( "weka.jar.filename", "weka-stable-3.8.5.jar" );
4647

4748
// check that the required packages are installed (and possibly install if not)
4849
try {

src/main/java/org/pentaho/di/trans/steps/pmi/PMIScoring.java

Lines changed: 79 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -167,84 +167,15 @@ private PMIScoringModel setModel( String modelFileName ) throws KettleException
167167

168168
Object[] r = getRow();
169169

170-
if ( r == null ) {
171-
if ( !m_meta.getEvaluateRatherThanScore() && m_data.getModel().isBatchPredictor() && !m_meta
172-
.getFileNameFromField() && m_batch.size() > 0 ) {
173-
try {
174-
outputBatchRows( true );
175-
} catch ( Exception ex ) {
176-
throw new KettleException(
177-
BaseMessages.getString( PMIScoringMeta.PKG, "PMIScoring.Error.ProblemWhileGettingPredictionsForBatch" ),
178-
ex ); //$NON-NLS-1$
179-
}
180-
}
181-
182-
if ( m_meta.getEvaluateRatherThanScore() && m_data.getModel().isSupervisedLearningModel() ) {
183-
// generate the output row
184-
try {
185-
if ( m_data.getModel().isBatchPredictor() ) {
186-
outputBatchRows( true );
187-
} else {
188-
Object[] outputRow = m_data.evaluateForRow( getInputRowMeta(), m_data.getOutputRowMeta(), null, m_meta );
189-
putRow( m_data.getOutputRowMeta(), outputRow );
190-
}
191-
} catch ( Exception ex ) {
192-
throw new KettleException(
193-
BaseMessages.getString( PMIScoringMeta.PKG, "PMIScoring.Error.ProblemWhileGettingPredictionsForBatch" ),
194-
ex ); //$NON-NLS-1$
195-
}
196-
}
197-
198-
// see if we have an incremental model that is to be saved somewhere.
199-
if ( !m_meta.getFileNameFromField() && m_meta.getUpdateIncrementalModel() ) {
200-
if ( !Const.isEmpty( m_meta.getSavedModelFileName() ) ) {
201-
// try and save that sucker...
202-
try {
203-
String modName = environmentSubstitute( m_meta.getSavedModelFileName() );
204-
File updatedModelFile = null;
205-
if ( modName.startsWith( "file:" ) ) {
206-
try {
207-
modName = modName.replace( " ", "%20" );
208-
updatedModelFile = new File( new java.net.URI( modName ) );
209-
} catch ( Exception ex ) {
210-
throw new KettleException(
211-
BaseMessages.getString( PMIScoringMeta.PKG, "PMIScoring.Error.MalformedURIForUpdatedModelFile" ),
212-
ex );
213-
}
214-
} else {
215-
updatedModelFile = new File( modName );
216-
}
217-
PMIScoringData.saveSerializedModel( m_data.getModel(), updatedModelFile );
218-
} catch ( Exception ex ) {
219-
throw new KettleException(
220-
BaseMessages.getString( PMIScoringMeta.PKG, "PMIScoring.Error.ProblemSavingUpdatedModelToFile" ),
221-
ex ); //$NON-NLS-1$
222-
}
223-
}
224-
}
225-
226-
if ( m_meta.getFileNameFromField() ) {
227-
// clear the main model
228-
m_data.getModel().done();
229-
m_data.setModel( null );
230-
m_data.setDefaultModel( null );
231-
if ( m_modelCache != null ) {
232-
m_modelCache.clear();
233-
}
234-
} else {
235-
m_data.getModel().done();
236-
m_data.setModel( null );
237-
m_data.setDefaultModel( null );
238-
}
239-
240-
setOutputDone();
241-
return false;
242-
}
243-
244170
// Handle the first row
245171
if ( first ) {
246172
first = false;
247173

174+
if (r == null) {
175+
setOutputDone();
176+
return false;
177+
}
178+
248179
m_data.setOutputRowMeta( getInputRowMeta().clone() );
249180
if ( m_meta.getFileNameFromField() ) {
250181
RowMetaInterface inputRowMeta = getInputRowMeta();
@@ -378,6 +309,80 @@ private PMIScoringModel setModel( String modelFileName ) throws KettleException
378309
}
379310
} // end (if first)
380311

312+
if ( r == null ) {
313+
if ( !m_meta.getEvaluateRatherThanScore() && m_data.getModel().isBatchPredictor() && !m_meta
314+
.getFileNameFromField() && m_batch.size() > 0 ) {
315+
try {
316+
outputBatchRows( true );
317+
} catch ( Exception ex ) {
318+
throw new KettleException(
319+
BaseMessages.getString( PMIScoringMeta.PKG, "PMIScoring.Error.ProblemWhileGettingPredictionsForBatch" ),
320+
ex ); //$NON-NLS-1$
321+
}
322+
}
323+
324+
if ( m_meta.getEvaluateRatherThanScore() && m_data.getModel().isSupervisedLearningModel() ) {
325+
// generate the output row
326+
try {
327+
if ( m_data.getModel().isBatchPredictor() ) {
328+
outputBatchRows( true );
329+
} else {
330+
Object[] outputRow = m_data.evaluateForRow( getInputRowMeta(), m_data.getOutputRowMeta(), null, m_meta );
331+
putRow( m_data.getOutputRowMeta(), outputRow );
332+
}
333+
} catch ( Exception ex ) {
334+
throw new KettleException(
335+
BaseMessages.getString( PMIScoringMeta.PKG, "PMIScoring.Error.ProblemWhileGettingPredictionsForBatch" ),
336+
ex ); //$NON-NLS-1$
337+
}
338+
}
339+
340+
// see if we have an incremental model that is to be saved somewhere.
341+
if ( !m_meta.getFileNameFromField() && m_meta.getUpdateIncrementalModel() ) {
342+
if ( !Const.isEmpty( m_meta.getSavedModelFileName() ) ) {
343+
// try and save that sucker...
344+
try {
345+
String modName = environmentSubstitute( m_meta.getSavedModelFileName() );
346+
File updatedModelFile = null;
347+
if ( modName.startsWith( "file:" ) ) {
348+
try {
349+
modName = modName.replace( " ", "%20" );
350+
updatedModelFile = new File( new java.net.URI( modName ) );
351+
} catch ( Exception ex ) {
352+
throw new KettleException(
353+
BaseMessages.getString( PMIScoringMeta.PKG, "PMIScoring.Error.MalformedURIForUpdatedModelFile" ),
354+
ex );
355+
}
356+
} else {
357+
updatedModelFile = new File( modName );
358+
}
359+
PMIScoringData.saveSerializedModel( m_data.getModel(), updatedModelFile );
360+
} catch ( Exception ex ) {
361+
throw new KettleException(
362+
BaseMessages.getString( PMIScoringMeta.PKG, "PMIScoring.Error.ProblemSavingUpdatedModelToFile" ),
363+
ex ); //$NON-NLS-1$
364+
}
365+
}
366+
}
367+
368+
if ( m_meta.getFileNameFromField() ) {
369+
// clear the main model
370+
m_data.getModel().done();
371+
m_data.setModel( null );
372+
m_data.setDefaultModel( null );
373+
if ( m_modelCache != null ) {
374+
m_modelCache.clear();
375+
}
376+
} else {
377+
m_data.getModel().done();
378+
m_data.setModel( null );
379+
m_data.setDefaultModel( null );
380+
}
381+
382+
setOutputDone();
383+
return false;
384+
}
385+
381386
// Make prediction for row using model
382387
try {
383388
if ( m_meta.getFileNameFromField() ) {

src/main/java/org/pentaho/di/ui/trans/steps/pmi/BaseSupervisedPMIStepDialog.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,8 @@ protected void setData( BaseSupervisedPMIStepMeta meta ) {
540540
String[] schemeOpts = m_scheme.getSchemeOptions();
541541
if ( schemeOpts != null && schemeOpts.length > 0 ) {
542542
meta.setSchemeCommandLineOptions( Utils.joinOptions( schemeOpts ) );
543+
} else {
544+
meta.setSchemeCommandLineOptions( "" );
543545
}
544546

545547
if ( m_incrementalRowCacheField != null ) {

src/main/java/org/pentaho/di/ui/trans/steps/pmi/GOEDialog.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ protected static void setValuesOnObject( Object objectToEdit, Map<String, Map<St
316316
final Object value = propDetails.get( "value" );
317317
String category = (String) propDetails.get( "category" );
318318

319-
if ( m_propertyGroupingCategory != null ) {
319+
if ( m_propertyGroupingCategory.length() > 0 ) {
320320
if ( category == null || category.length() == 0 || !category.equalsIgnoreCase( m_propertyGroupingCategory ) ) {
321321
continue;
322322
}

src/main/java/org/pentaho/pmi/engines/KerasScheme.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ public abstract class KerasScheme {
4545
s_excludedSchemes =
4646
Arrays.asList( "Naive Bayes", "Naive Bayes incremental", "Naive Bayes multinomial", "Decision tree classifier",
4747
"Decision tree regressor", "Random forest classifier", "Random forest regressor", "Gradient boosted trees",
48-
"Support vector regressor", "Multi-layer perceptron classifier", "Multi-layer perceptron regressor",
48+
"Support vector regressor", "Support vector classifier", "Logistic regression", "Linear regression",
49+
"Multi-layer perceptron classifier", "Multi-layer perceptron regressor",
4950
"Extreme gradient boosting classifier", "Extreme gradient boosting regressor",
5051
"Multi-layer perceptron classifier", "Multi-layer perceptron regressor" );
5152

0 commit comments

Comments
 (0)