Merge pull request #797 from CitrineInformatics/feature/PLA-10839-simplify-branch-update

genfx999 · web-flow · commit 510dd6471f58 · 2022-12-21T15:18:09.000-05:00
Add methods on Branch to simplify data updates/next version ops
diff --git a/src/citrine/__version__.py b/src/citrine/__version__.py
@@ -1 +1 @@
-__version__ = '1.50.1'
+__version__ = '1.51.0'
diff --git a/src/citrine/resources/branch.py b/src/citrine/resources/branch.py
@@ -173,6 +173,54 @@ def restore(self, uid: Union[UUID, str] = None):
         data = self.session.put_resource(url, {}, version=self._api_version)
         return self.build(data)
 
+    def update_data(self,
+                    branch: Union[UUID, str, Branch],
+                    *,
+                    use_existing: bool = True,
+                    retrain_models: bool = False) -> Optional[Branch]:
+        """
+        Automatically advance the branch to the next version.
+
+        If there are no newer versions of data sources used by this branch this method returns
+        without doing anything
+
+        Parameters
+        ----------
+        branch: Union[UUID, str, Branch]
+            Branch Identifier or Branch object
+
+        use_existing: bool
+            If true the workflows in this branch will use existing predictors that are using
+            the latest versions of the data sources and are ready to use.
+
+        retrain_models: bool
+            If true, when new versions of models are created, they are automatically
+            scheduled for training.
+
+        Returns
+        -------
+        Branch
+            The new branch record after version update or None if no update
+
+        """
+        if not isinstance(branch, Branch):
+            branch = self.get(branch)
+        version_updates = self.data_updates(branch.uid)
+        # If no new data sources, then exit, nothing to do
+        if len(version_updates.data_updates) == 0:
+            return None
+
+        use_predictors = []
+        if use_existing:
+            use_predictors = version_updates.predictors
+
+        branch_instructions = NextBranchVersionRequest(data_updates=version_updates.data_updates,
+                                                       use_predictors=use_predictors)
+        branch = self.next_version(branch.root_id,
+                                   branch_instructions=branch_instructions,
+                                   retrain_models=retrain_models)
+        return branch
+
     def data_updates(self, uid: Union[UUID, str]) -> BranchDataUpdate:
         """
         Get data updates for a branch.
diff --git a/tests/resources/test_branch.py b/tests/resources/test_branch.py
@@ -215,7 +215,7 @@ def test_branch_list_archived(session, collection, branch_path):
 
 
 # Needed for coverage checks
-def test_brach_data_update_inits():
+def test_branch_data_update_inits():
     data_updates = [DataVersionUpdate(current="gemd::16f91e7e-0214-4866-8d7f-a4d5c2125d2b::1",
                                       latest="gemd::16f91e7e-0214-4866-8d7f-a4d5c2125d2b::2")]
     predictors = [PredictorRef("aa971886-d17c-43b4-b602-5af7b44fcd5a", 2)]
@@ -281,6 +281,95 @@ def test_branch_next_version(session, collection, branch_path):
     assert str(branchv2.root_id) == root_branch_id
 
 
+def test_branch_data_updates_normal(session, collection, branch_path):
+    # Given
+    branch_data = BranchDataFactory()
+    root_branch_id = branch_data['metadata']['root_id']
+    session.set_response(branch_data)
+
+    branch = collection.get(branch_data['id'])
+
+    data_updates = BranchDataUpdateFactory()
+    v2branch_data = BranchDataFactory(metadata=BranchMetadataFieldFactory(root_id=root_branch_id))
+    session.set_responses(data_updates, v2branch_data)
+    v2branch = collection.update_data(branch)
+
+    # Then
+    expected_path = f'{branch_path}/next-version-predictor'
+    expected_call = FakeCall(method='POST',
+                             path=expected_path,
+                             params={'root': str(root_branch_id),
+                                     'retrain_models': False},
+                             json={
+                                 'data_updates': [
+                                     {
+                                         'current': data_updates['data_updates'][0]['current'],
+                                         'latest': data_updates['data_updates'][0]['latest'],
+                                         'type': 'DataVersionUpdate'
+                                     }
+                                 ],
+                                 'use_predictors': [
+                                     {
+                                         'predictor_id': data_updates['predictors'][0]['predictor_id'],
+                                         'predictor_version': data_updates['predictors'][0]['predictor_version']
+                                     }
+                                 ]
+                             },
+                             version='v2')
+    assert session.last_call == expected_call
+    assert str(v2branch.root_id) == root_branch_id
+
+
+def test_branch_data_updates_latest(session, collection, branch_path):
+    # Given
+    branch_data = BranchDataFactory()
+    root_branch_id = branch_data['metadata']['root_id']
+    session.set_response(branch_data)
+
+    branch = collection.get(branch_data['id'])
+    print(branch)
+
+    data_updates = BranchDataUpdateFactory()
+    v2branch_data = BranchDataFactory(metadata=BranchMetadataFieldFactory(root_id=root_branch_id))
+    session.set_responses(data_updates, v2branch_data)
+    v2branch = collection.update_data(branch, use_existing=False, retrain_models=True)
+
+    # Then
+    expected_path = f'{branch_path}/next-version-predictor'
+    expected_call = FakeCall(method='POST',
+                             path=expected_path,
+                             params={'root': str(root_branch_id),
+                                     'retrain_models': True},
+                             json={
+                                 'data_updates': [
+                                     {
+                                         'current': data_updates['data_updates'][0]['current'],
+                                         'latest': data_updates['data_updates'][0]['latest'],
+                                         'type': 'DataVersionUpdate'
+                                     }
+                                 ],
+                                 'use_predictors': []
+                             },
+                             version='v2')
+    assert session.last_call == expected_call
+    assert str(v2branch.root_id) == root_branch_id
+
+
+def test_branch_data_updates_nochange(session, collection, branch_path):
+    # Given
+    branch_data = BranchDataFactory()
+    session.set_response(branch_data)
+
+    branch = collection.get(branch_data['id'])
+    print(branch)
+
+    data_updates = BranchDataUpdateFactory(data_updates=[], predictors=[])
+    session.set_responses(branch_data, data_updates)
+    v2branch = collection.update_data(branch.uid)
+
+    assert v2branch == None
+
+
 def test_experiment_datasource(session, collection):
     # Given
     erds_path = f'projects/{collection.project_id}/candidate-experiment-datasources'
diff --git a/tests/utils/factories.py b/tests/utils/factories.py
@@ -43,7 +43,7 @@ class DataVersionUpdateFactory(factory.DictFactory):
 
 class PredictorRefFactory(factory.DictFactory):
     predictor_id = factory.Faker('uuid4')
-    version = randrange(10)
+    predictor_version = randrange(10)
 
 
 class BranchDataUpdateFactory(factory.DictFactory):

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = '1.50.1'`
	`1`	`+__version__ = '1.51.0'`