Skip to content

Commit 97396a4

Browse files
committed
Add support of wait_guess_complete for clustering tasks
1 parent 41d52f9 commit 97396a4

File tree

3 files changed

+33
-11
lines changed

3 files changed

+33
-11
lines changed

dataikuapi/dss/analysis.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,9 @@ def create_prediction_ml_task(self,
188188
return mltask
189189

190190
def create_clustering_ml_task(self,
191-
ml_backend_type = "PY_MEMORY",
192-
guess_policy = "KMEANS"):
191+
ml_backend_type="PY_MEMORY",
192+
guess_policy="KMEANS",
193+
wait_guess_complete=True):
193194

194195

195196
"""Creates a new clustering task in a new visual analysis lab
@@ -205,6 +206,10 @@ def create_clustering_ml_task(self,
205206
206207
:param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
207208
:param string guess_policy: Policy to use for setting the default parameters. Valid values are: KMEANS and ANOMALY_DETECTION
209+
:param boolean wait_guess_complete: if False, the returned ML task will be in 'guessing' state, i.e. analyzing the input dataset to determine feature handling and algorithms.
210+
You should wait for the guessing to be completed by calling
211+
``wait_guess_complete`` on the returned object before doing anything
212+
else (in particular calling ``train`` or ``get_settings``)
208213
"""
209214

210215
obj = {
@@ -214,7 +219,11 @@ def create_clustering_ml_task(self,
214219
}
215220

216221
ref = self.client._perform_json("POST", "/projects/%s/lab/%s/models/" % (self.project_key, self.analysis_id), body=obj)
217-
return DSSMLTask(self.client, self.project_key, self.analysis_id, ref["mlTaskId"])
222+
mltask = DSSMLTask(self.client, self.project_key, self.analysis_id, ref["mlTaskId"])
223+
224+
if wait_guess_complete:
225+
mltask.wait_guess_complete()
226+
return mltask
218227

219228
def list_ml_tasks(self):
220229
"""

dataikuapi/dss/dataset.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,9 @@ def create_prediction_ml_task(self, target_variable,
385385
guess_policy = guess_policy, prediction_type = prediction_type, wait_guess_complete = wait_guess_complete)
386386

387387
def create_clustering_ml_task(self, input_dataset,
388-
ml_backend_type = "PY_MEMORY",
389-
guess_policy = "KMEANS"):
388+
ml_backend_type="PY_MEMORY",
389+
guess_policy="KMEANS",
390+
wait_guess_complete=True):
390391
"""Creates a new clustering task in a new visual analysis lab
391392
for a dataset.
392393
@@ -400,9 +401,13 @@ def create_clustering_ml_task(self, input_dataset,
400401
401402
:param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
402403
:param string guess_policy: Policy to use for setting the default parameters. Valid values are: KMEANS and ANOMALY_DETECTION
404+
:param boolean wait_guess_complete: if False, the returned ML task will be in 'guessing' state, i.e. analyzing the input dataset to determine feature handling and algorithms.
405+
You should wait for the guessing to be completed by calling
406+
``wait_guess_complete`` on the returned object before doing anything
407+
else (in particular calling ``train`` or ``get_settings``)
403408
"""
404-
return self.project.create_clustering_ml_task(self.dataset_name,
405-
ml_backend_type = ml_backend_type, guess_policy = guess_policy)
409+
return self.project.create_clustering_ml_task(self.dataset_name, ml_backend_type=ml_backend_type, guess_policy=guess_policy,
410+
wait_guess_complete=wait_guess_complete)
406411

407412
def create_analysis(self):
408413
"""

dataikuapi/dss/project.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -532,9 +532,9 @@ def create_prediction_ml_task(self, input_dataset, target_variable,
532532
return ret
533533

534534
def create_clustering_ml_task(self, input_dataset,
535-
ml_backend_type = "PY_MEMORY",
536-
guess_policy = "KMEANS"):
537-
535+
ml_backend_type = "PY_MEMORY",
536+
guess_policy = "KMEANS",
537+
wait_guess_complete=True):
538538

539539
"""Creates a new clustering task in a new visual analysis lab
540540
for a dataset.
@@ -549,6 +549,10 @@ def create_clustering_ml_task(self, input_dataset,
549549
550550
:param string ml_backend_type: ML backend to use, one of PY_MEMORY, MLLIB or H2O
551551
:param string guess_policy: Policy to use for setting the default parameters. Valid values are: KMEANS and ANOMALY_DETECTION
552+
:param boolean wait_guess_complete: if False, the returned ML task will be in 'guessing' state, i.e. analyzing the input dataset to determine feature handling and algorithms.
553+
You should wait for the guessing to be completed by calling
554+
``wait_guess_complete`` on the returned object before doing anything
555+
else (in particular calling ``train`` or ``get_settings``)
552556
"""
553557

554558
obj = {
@@ -559,7 +563,11 @@ def create_clustering_ml_task(self, input_dataset,
559563
}
560564

561565
ref = self.client._perform_json("POST", "/projects/%s/models/lab/" % self.project_key, body=obj)
562-
return DSSMLTask(self.client, self.project_key, ref["analysisId"], ref["mlTaskId"])
566+
mltask = DSSMLTask(self.client, self.project_key, ref["analysisId"], ref["mlTaskId"])
567+
568+
if wait_guess_complete:
569+
mltask.wait_guess_complete()
570+
return mltask
563571

564572
def list_ml_tasks(self):
565573
"""

0 commit comments

Comments
 (0)