From 4deca0fa94fe71fbd2a2167305351bba7aa3166a Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Wed, 22 Oct 2025 11:40:14 -0400
Subject: [PATCH 01/18] temp: initial commit

---
 kobo/apps/subsequences/tests/test_models.py   |  1 +
 .../subsequences/tests/test_versioning.py     | 33 +++++++++++++++++++
 kobo/apps/subsequences/utils/versioning.py    | 22 +++++++++++++
 kobo/settings/base.py                         |  2 +-
 4 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 kobo/apps/subsequences/tests/test_versioning.py

diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py
index 6e49e1c530..81bf26f267 100644
--- a/kobo/apps/subsequences/tests/test_models.py
+++ b/kobo/apps/subsequences/tests/test_models.py
@@ -337,3 +337,4 @@ def test_revise_data_raise_error_wrong_question_name(self):
                     },
                 },
             )
+
diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
new file mode 100644
index 0000000000..3166bbcf7a
--- /dev/null
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -0,0 +1,33 @@
+from django.test import TestCase
+
+class TestVersioning(TestCase):
+    def test_migrate_submission_extra_to_supplemental(self):
+        old_version = {'Audio_question': {'googlets': {'languageCode': 'en',
+                                 'regionCode': None,
+                                 'status': 'complete',
+                                 'value': 'This is audio that I am trying to '
+                                          'transcribe.'},
+                    'googletx': {'languageCode': 'es',
+                                 'source': 'en',
+                                 'status': 'complete',
+                                 'value': 'Este es un audio que estoy '
+                                          'intentando transcribir.'},
+                    'qual': [{'type': 'qual_text',
+                              'uuid': 'b8188424-6249-4168-8137-7d9fab62ae3c',
+                              'val': 'Trying to transcribe audio'}],
+                    'transcript': {'dateCreated': '2025-10-22 14:30:24',
+                                   'dateModified': '2025-10-22 14:30:24',
+                                   'languageCode': 'en',
+                                   'revisions': [{}],
+                                   'value': 'This is audio that I am trying to '
+                                            'transcribe.'},
+                    'translation': {'es': {'dateCreated': '2025-10-22T14:30:38Z',
+                                           'dateModified': '2025-10-22T14:30:38Z',
+                                           'languageCode': 'es',
+                                           'revisions': [],
+                                           'value': 'Este es un audio que '
+                                                    'estoy intentando '
+                                                    'transcribir.'}}}}
+
+        self.assertEqual(True, False)  # add assertion here
+
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index aba7b21852..8276e6ae38 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -13,6 +13,7 @@ def migrate_advanced_features(advanced_features: dict) -> dict | None:
 
     actionConfigs = migrated_advanced_features['_actionConfigs']
     for key, value in advanced_features.items():
+        print(f'{key=}, {value=}')
         if (
             key == 'transcript'
             and value
@@ -34,6 +35,7 @@ def migrate_advanced_features(advanced_features: dict) -> dict | None:
             ]
 
         if key == 'qual':
+            survey_qs = value['qual_survey']
             raise NotImplementedError
 
     return migrated_advanced_features
@@ -42,3 +44,23 @@ def migrate_advanced_features(advanced_features: dict) -> dict | None:
 def set_version(schema: dict) -> dict:
     schema['_version'] = SCHEMA_VERSIONS[0]
     return schema
+
+def migrate_submission_supplementals(supplemental_data:dict) -> dict:
+    if supplemental_data.get('_version', None) == SCHEMA_VERSIONS[0]:
+        return
+    supplemental = {
+        '_version': SCHEMA_VERSIONS[0],
+    }
+    for question_xpath, action_results in supplemental_data:
+        question_results_by_action = {}
+        for action, results in action_results:
+            if action == 'googlets':
+                pass
+            if action == 'googletx':
+                pass
+            if action == 'qual':
+                pass
+            if action == 'transcript':
+                pass
+            if action == 'translation':
+                pass
diff --git a/kobo/settings/base.py b/kobo/settings/base.py
index 03c4d13887..e64acfefe5 100644
--- a/kobo/settings/base.py
+++ b/kobo/settings/base.py
@@ -1587,7 +1587,7 @@ def dj_stripe_request_callback_method():
 
 # Google Cloud Storage
 # Not fully supported as a generic storage backend
-GS_BUCKET_NAME = env.str('GS_BUCKET_NAME', None)
+GS_BUCKET_NAME = 'kobo-transcription-test' #env.str('GS_BUCKET_NAME', None)
 
 
 """ Django error logging configuration """

From 87ba5a47d508b45ab06f958c79765575c0fb1d48 Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Wed, 22 Oct 2025 15:27:01 -0400
Subject: [PATCH 02/18] fixup!: stuff

---
 .../subsequences/tests/test_versioning.py     | 101 ++++++++++++++++--
 kobo/apps/subsequences/utils/versioning.py    |   3 +
 2 files changed, 94 insertions(+), 10 deletions(-)

diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
index 3166bbcf7a..6372b033c7 100644
--- a/kobo/apps/subsequences/tests/test_versioning.py
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -12,22 +12,103 @@ def test_migrate_submission_extra_to_supplemental(self):
                                  'status': 'complete',
                                  'value': 'Este es un audio que estoy '
                                           'intentando transcribir.'},
-                    'qual': [{'type': 'qual_text',
-                              'uuid': 'b8188424-6249-4168-8137-7d9fab62ae3c',
-                              'val': 'Trying to transcribe audio'}],
-                    'transcript': {'dateCreated': '2025-10-22 14:30:24',
-                                   'dateModified': '2025-10-22 14:30:24',
+                    'transcript': {'dateCreated': None,
+                                   'dateModified': '2025-10-22 17:09:38',
                                    'languageCode': 'en',
-                                   'revisions': [{}],
+                                   'revisions': [{'dateModified': '2025-10-22 '
+                                                                  '14:30:24',
+                                                  'languageCode': 'en',
+                                                  'value': 'This is audio that '
+                                                           'I am trying to '
+                                                           'transcribe.'},
+                                                 {}],
                                    'value': 'This is audio that I am trying to '
-                                            'transcribe.'},
+                                            'transcribe but i edited it.'},
                     'translation': {'es': {'dateCreated': '2025-10-22T14:30:38Z',
-                                           'dateModified': '2025-10-22T14:30:38Z',
+                                           'dateModified': '2025-10-22T17:10:23Z',
                                            'languageCode': 'es',
-                                           'revisions': [],
+                                           'revisions': [{'dateModified': '2025-10-22T14:30:38Z',
+                                                          'languageCode': 'es',
+                                                          'value': 'Este es un '
+                                                                   'audio que '
+                                                                   'estoy '
+                                                                   'intentando '
+                                                                   'transcribir.'}],
                                            'value': 'Este es un audio que '
                                                     'estoy intentando '
-                                                    'transcribir.'}}}}
+                                                    'transcribir pero yo lo edité'}}}}
+
+        new_version = {
+            '_version': '20250820',
+            'Audio_question': {
+                'automatic_transcription': {
+                    '_dateCreated': '',
+                    '_dateModified': '',
+                    '_versions': [
+                        {
+                            '_dateCreated': '',
+                            '_dateAccepted': '',
+                            '_uuid':'',
+                            'language': 'en',
+                            'value': 'This is audio that I am trying to '
+                                          'transcribe.',
+                            'status': 'complete',
+                        }
+                    ]
+                },
+                'automatic_translation': {
+                    'es': {
+                        '_dateCreated': '',
+                        '_dateModified': '',
+                        '_versions': [
+                            {
+                                '_dateCreated': '',
+                                '_dateAccepted': '',
+                                '_dependency': {'_actionId': 'manual_transcription',
+                                                '_uuid': 'a0030a86-d207-4249-8335-9a767fbd77eb'},
+                                '_uuid':'',
+                                'language': 'es',
+                                'value': 'Esto es un audio que estoy intendando a transcribir',
+                                'status': 'complete'
+                            }
+                        ]
+                    }
+                },
+                'manual_transcription': {
+                    '_dateCreated': '',
+                    '_dateModified': '',
+                    '_versions': [
+                        {
+                            '_dateCreated': '',
+                            '_dateAccepted': '',
+                            '_uuid':'',
+                            'language': 'en',
+                            'value': 'This is audio that I am trying to '
+                                     'transcribe but i edited it.',
+                        }
+                    ]
+                },
+                'manual_translation': {
+                    'es': {
+                        '_dateCreated': '',
+                        '_dateModified': '',
+                        '_versions': [
+                            {
+                                '_dateCreated': '',
+                                '_dateAccepted': '',
+                                '_dependency': {'_actionId': 'automatic_transcription',
+                                                '_uuid': 'a0030a86-d207-4249-8335-9a767fbd77eb'},
+                                '_uuid':'',
+                                'language': 'es',
+                                'value': 'Esto es un audio que estoy intendando a transcribir pero yo lo edité',
+                                'status': 'complete'
+                            }
+                        ]
+                    }
+                },
+            }
+        }
+
 
         self.assertEqual(True, False)  # add assertion here
 
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index 8276e6ae38..d3f17cbff6 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -64,3 +64,6 @@ def migrate_submission_supplementals(supplemental_data:dict) -> dict:
                 pass
             if action == 'translation':
                 pass
+
+def get_automated_transcriptions_by_language(action_results:dict) -> dict:
+    pass

From 8e82e61a8c42b6656ebffc9d9787c51086e41e92 Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Thu, 23 Oct 2025 12:39:34 -0400
Subject: [PATCH 03/18] fixup!: stuff

---
 .../subsequences/tests/test_versioning.py     | 132 ++++++++++++++++
 kobo/apps/subsequences/utils/versioning.py    | 142 +++++++++++++++---
 2 files changed, 257 insertions(+), 17 deletions(-)

diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
index 6372b033c7..90aa65957d 100644
--- a/kobo/apps/subsequences/tests/test_versioning.py
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -1,6 +1,138 @@
+from datetime import timedelta
+
+import pytest
+from mock import patch
+from ddt import data, ddt
 from django.test import TestCase
+from django.utils import timezone
+from freezegun import freeze_time
+
+from kobo.apps.subsequences.utils.versioning import (
+    new_transcript_revision_from_old,
+    separate_transcriptions, migrate_advanced_features, migrate_submission_supplementals,
+)
 
+
+@ddt
 class TestVersioning(TestCase):
+    def test_new_transcript_revision_from_old(self):
+        now = timezone.now()
+        old = {
+            'dateCreated': None,
+            'dateModified': '2025-10-22 17:09:38',
+            'languageCode': 'en',
+            'value': 'Transcribed new',
+        }
+        with freeze_time(now):
+            result = new_transcript_revision_from_old(old)
+        assert result['value'] == old['value']
+        assert result['language'] == old['languageCode']
+        assert result['_dateCreated'] == old['dateModified']
+        assert result['_uuid'] is not None
+        assert result['_dateAccepted'] is None
+
+    def test_new_transcript_revision_from_old_returns_none_for_bad_data(self):
+        old = {'badly': 'formatted'}
+        assert new_transcript_revision_from_old(old) is None
+
+    @data(True, False)
+    def test_separate_automated_and_manual_transcriptions(self, latest_is_automated):
+        now = timezone.now()
+        yesterday = timezone.now() - timedelta(days=1)
+        transcript_dict = {
+            'dateCreated': None,
+            'dateModified': now,
+            'languageCode': 'en',
+            'revisions': [
+                {
+                    'dateModified': yesterday,
+                    'languageCode': 'en',
+                    'value': 'Old transcript',
+                }
+            ],
+            'value': 'Latest transcript',
+        }
+        automated_transcription_value = (
+            'Latest transcript' if latest_is_automated else 'Old transcript'
+        )
+        manual, automated = separate_transcriptions(
+            transcript_dict, 'en', automated_transcription_value
+        )
+        new_automated_transcript = automated[0]
+        new_manual_transcript = manual[0]
+        expected_most_recent_transcript = (
+            new_automated_transcript if latest_is_automated else new_manual_transcript
+        )
+        expected_old_transcript = (
+            new_manual_transcript if latest_is_automated else new_automated_transcript
+        )
+
+        assert expected_most_recent_transcript['_dateCreated'] == now
+        assert expected_most_recent_transcript['value'] == 'Latest transcript'
+        assert expected_old_transcript['_dateCreated'] == yesterday
+        assert expected_old_transcript['value'] == 'Old transcript'
+
+    def test_migrate_transcriptions(self):
+        now = timezone.now()
+        one_year_ago = now - timedelta(days=365)
+        old_version = {'Audio_question': {'googlets': {'languageCode': 'en',
+                                 'regionCode': None,
+                                 'status': 'complete',
+                                 'value': 'This is audio that I am trying to '
+                                          'transcribe.'},
+                    'transcript': {'dateCreated': one_year_ago,
+                                   'dateModified': now,
+                                   'languageCode': 'en',
+                                   'revisions': [{'dateModified': one_year_ago,
+                                                  'languageCode': 'en',
+                                                  'value': 'This is audio that '
+                                                           'I am trying to '
+                                                           'transcribe.'},
+                                                 {}],
+                                   'value': 'This is audio that I am trying to '
+                                            'transcribe but i edited it.'},
+                                          }
+                       }
+        with patch('kobo.apps.subsequences.utils.versioning.generate_uuid_for_form', side_effect=['uuid1', 'uuid2']):
+            with freeze_time(now):
+                migrated = migrate_submission_supplementals(old_version)
+        expected = {
+            '_version': '20250820',
+            'Audio_question': {
+                                  'automatic_transcription': {
+                                      '_dateCreated': one_year_ago,
+                                      '_dateModified': one_year_ago,
+                                      '_versions': [
+                                          {
+                                              '_dateCreated': one_year_ago,
+                                              '_dateAccepted': now,
+                                              '_uuid':'uuid2',
+                                              'language': 'en',
+                                              'value': 'This is audio that I am trying to '
+                                                       'transcribe.',
+                                              'status': 'complete',
+                                          }
+                                      ]
+                                  },
+                                  'manual_transcription': {
+                                      '_dateCreated': now,
+                                      '_dateModified': now,
+                                      '_versions': [
+                                          {
+                                              '_dateCreated': now,
+                                              '_dateAccepted': None,
+                                              '_uuid':'uuid1',
+                                              'language': 'en',
+                                              'value': 'This is audio that I am trying to '
+                                                       'transcribe but i edited it.',
+                                          }
+                                      ]
+                                  },
+            }
+        }
+        assert migrated == expected
+
+    @pytest.mark.skip()
     def test_migrate_submission_extra_to_supplemental(self):
         old_version = {'Audio_question': {'googlets': {'languageCode': 'en',
                                  'regionCode': None,
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index d3f17cbff6..931d2b38c9 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -1,5 +1,13 @@
+from django.utils import timezone
+
+from ..actions import ManualTranscriptionAction
+from ...openrosa.libs.utils.model_tools import generate_uuid_for_form
 from ..constants import SCHEMA_VERSIONS
+from ...subsequences__old.actions.automatic_transcription import AutomaticTranscriptionAction
+
 
+class InvalidSupplementalFormat(Exception):
+    pass
 
 def migrate_advanced_features(advanced_features: dict) -> dict | None:
 
@@ -13,7 +21,6 @@ def migrate_advanced_features(advanced_features: dict) -> dict | None:
 
     actionConfigs = migrated_advanced_features['_actionConfigs']
     for key, value in advanced_features.items():
-        print(f'{key=}, {value=}')
         if (
             key == 'transcript'
             and value
@@ -35,7 +42,6 @@ def migrate_advanced_features(advanced_features: dict) -> dict | None:
             ]
 
         if key == 'qual':
-            survey_qs = value['qual_survey']
             raise NotImplementedError
 
     return migrated_advanced_features
@@ -51,19 +57,121 @@ def migrate_submission_supplementals(supplemental_data:dict) -> dict:
     supplemental = {
         '_version': SCHEMA_VERSIONS[0],
     }
-    for question_xpath, action_results in supplemental_data:
+    for question_xpath, action_results in supplemental_data.items():
         question_results_by_action = {}
-        for action, results in action_results:
-            if action == 'googlets':
-                pass
-            if action == 'googletx':
-                pass
-            if action == 'qual':
-                pass
-            if action == 'transcript':
-                pass
-            if action == 'translation':
-                pass
-
-def get_automated_transcriptions_by_language(action_results:dict) -> dict:
-    pass
+        automatic_transcript_language, automatic_transcript_result = (
+            get_automatic_transcription(action_results)
+        )
+        manual_transcripts, automatic_transcripts = separate_transcriptions(
+            action_results.get('transcript', None),
+            automatic_transcript_language,
+            automatic_transcript_result,
+        )
+        # should already be sorted by date created descending, but just in case
+        manual_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])
+        automatic_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])
+
+        if len(manual_transcripts) > 0:
+            question_results_by_action['manual_transcription'] = {
+                '_dateCreated': manual_transcripts[-1]['_dateCreated'],
+                '_dateModified': manual_transcripts[0]['_dateCreated'],
+                '_versions': manual_transcripts,
+            }
+        if len(automatic_transcripts) > 0:
+            question_results_by_action['automatic_transcription'] = {
+                '_dateCreated': automatic_transcripts[-1]['_dateCreated'],
+                '_dateModified': automatic_transcripts[0]['_dateCreated'],
+                '_versions': automatic_transcripts,
+            }
+        supplemental[question_xpath] = question_results_by_action
+
+        # translation
+        # get source
+        tagged_manual_transcripts = [{**transcript, '_actionId': ManualTranscriptionAction.ID} for transcript in manual_transcripts]
+        tagged_automatic_transcripts = [{**transcript, '_actionId': AutomaticTranscriptionAction.ID} for transcript in automatic_transcripts]
+
+        all_tagged_transcripts = [*tagged_manual_transcripts, *tagged_automatic_transcripts]
+        all_tagged_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])
+
+        most_recent_transcript_uuids_by_language = {}
+        for transcript in all_tagged_transcripts:
+            if most_recent_transcript_uuids_by_language.get(transcript['language']) is None:
+                most_recent_transcript_uuids_by_language[transcript['language']] = {'_uuid': transcript['_uuid'], '_actionId': transcript['_actionId']}
+
+        translations_dict = action_results.get('translation', {})
+        for language_code, translations in translations_dict.items():
+            pass
+
+
+
+
+    return supplemental
+
+
+def get_automatic_transcription(
+    action_results: dict,
+) -> tuple[str | None, str | None] | None:
+    googlets = action_results.get('googlets', {})
+    return googlets.get('languageCode', None), googlets.get('value', None)
+
+def get_automatic_translation(action_results:dict):
+    googletx = action_results.get('googletx', {})
+    return googletx.get('source', None), googletx.get('languageCode', None), googletx.get('value', None)
+
+
+
+def new_transcript_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
+    # ignore bad data
+    if (
+        'languageCode' not in old_transcript_revision_dict
+        or 'value' not in old_transcript_revision_dict
+    ):
+        return None
+    return {
+        '_dateCreated': old_transcript_revision_dict.get('dateModified', None),
+        'language': old_transcript_revision_dict['languageCode'],
+        'value': old_transcript_revision_dict['value'],
+        '_uuid': generate_uuid_for_form(),
+        '_dateAccepted': None,
+    }
+
+
+def separate_transcriptions(
+    transcription_dict: dict,
+    automatic_transcript_language: str = None,
+    automatic_transcript_value: str = None,
+) -> tuple[list, list]:
+    if not transcription_dict:
+        return [], []
+    automatic_transcriptions = []
+    manual_transcriptions = []
+    latest_revision = new_transcript_revision_from_old(transcription_dict)
+    if latest_revision:
+        if (
+            latest_revision['value'] == automatic_transcript_value
+            and latest_revision['language'] == automatic_transcript_language
+        ):
+            latest_revision['status'] = 'complete'
+            latest_revision['_dateAccepted'] = timezone.now()
+            automatic_transcriptions.append(latest_revision)
+        else:
+            manual_transcriptions.append(latest_revision)
+
+    for revision in transcription_dict.get('revisions', []):
+        revision_formatted = new_transcript_revision_from_old(revision)
+        if revision_formatted is None:
+            continue
+        if (
+            revision_formatted['language'] == automatic_transcript_language
+            and revision['value'] == automatic_transcript_value
+        ):
+            revision_formatted['status'] = 'complete'
+            revision_formatted['_dateAccepted'] = timezone.now()
+            automatic_transcriptions.append(revision_formatted)
+        else:
+            manual_transcriptions.append(revision_formatted)
+    return manual_transcriptions, automatic_transcriptions
+
+def separate_translations(translation_dict):
+    if not translation_dict:
+        return [],[]

From 89f236f140114ee93a95d90bb8033a7848ca1079 Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Thu, 23 Oct 2025 14:31:54 -0400
Subject: [PATCH 04/18] fixup!: stuff

---
 kobo/apps/subsequences/utils/versioning.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index 931d2b38c9..2557290d1b 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -1,9 +1,7 @@
 from django.utils import timezone
 
-from ..actions import ManualTranscriptionAction
 from ...openrosa.libs.utils.model_tools import generate_uuid_for_form
 from ..constants import SCHEMA_VERSIONS
-from ...subsequences__old.actions.automatic_transcription import AutomaticTranscriptionAction
 
 
 class InvalidSupplementalFormat(Exception):
@@ -87,8 +85,8 @@ def migrate_submission_supplementals(supplemental_data:dict) -> dict:
 
         # translation
         # get source
-        tagged_manual_transcripts = [{**transcript, '_actionId': ManualTranscriptionAction.ID} for transcript in manual_transcripts]
-        tagged_automatic_transcripts = [{**transcript, '_actionId': AutomaticTranscriptionAction.ID} for transcript in automatic_transcripts]
+        tagged_manual_transcripts = [{**transcript, '_actionId': 'manual_transcription'} for transcript in manual_transcripts]
+        tagged_automatic_transcripts = [{**transcript, '_actionId': 'manual_translation'} for transcript in automatic_transcripts]
 
         all_tagged_transcripts = [*tagged_manual_transcripts, *tagged_automatic_transcripts]
         all_tagged_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])

From cc9ce0135b44941fc41c1fe3c261a8776d09d03f Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Fri, 24 Oct 2025 08:26:39 -0400
Subject: [PATCH 05/18] fixup!: stuff

---
 kobo/apps/subsequences/utils/versioning.py | 70 +++++++++++++++++++++-
 1 file changed, 67 insertions(+), 3 deletions(-)

diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index 2557290d1b..8e14ee4136 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -133,6 +133,25 @@ def new_transcript_revision_from_old(old_transcript_revision_dict: dict) -> dict
         '_dateAccepted': None,
     }
 
+def new_translation_revision_from_old(old_translation_revision_dict: dict, source_uuid, source_action) -> dict | None:
+    # ignore bad data
+    if (
+        'languageCode' not in old_translation_revision_dict
+        or 'value' not in old_translation_revision_dict
+    ):
+        return None
+    return {
+        '_dateCreated': old_translation_revision_dict.get('dateModified', None),
+        'language': old_translation_revision_dict['languageCode'],
+        'value': old_translation_revision_dict['value'],
+        '_uuid': generate_uuid_for_form(),
+        '_dateAccepted': None,
+        '_dependency': {
+            '_actionId': source_action,
+            '_uuid': source_uuid,
+        }
+    }
+
 
 def separate_transcriptions(
     transcription_dict: dict,
@@ -170,6 +189,51 @@ def separate_transcriptions(
             manual_transcriptions.append(revision_formatted)
     return manual_transcriptions, automatic_transcriptions
 
-def separate_translations(translation_dict):
-    if not translation_dict:
-        return [],[]
+def separate_translations(language, translation_dict,
+    automatic_translation_language: str = None,
+    automatic_translation_value: str = None, source_uuid=None, source_action_id=None):
+    """
+    {'es': {'dateCreated': '2025-10-22T14:30:38Z',
+                                   'dateModified': '2025-10-22T17:10:23Z',
+                                   'languageCode': 'es',
+                                   'revisions': [{'dateModified': '2025-10-22T14:30:38Z',
+                                                  'languageCode': 'es',
+                                                  'value': 'Este es un '
+                                                           'audio que '
+                                                           'estoy '
+                                                           'intentando '
+                                                           'transcribir.'}],
+                                   'value': 'Este es un audio que '
+                                            'estoy intentando '
+                                            'transcribir pero yo lo edité'}}
+    """
+    automatic_translations = []
+    manual_translations = []
+    latest_revision = new_translation_revision_from_old(translation_dict, source_uuid=source_uuid, source_action=source_action_id)
+    if latest_revision:
+        if (
+            latest_revision['value'] == automatic_translation_value
+            and latest_revision['language'] == automatic_translation_language
+        ):
+            latest_revision['status'] = 'complete'
+            latest_revision['_dateAccepted'] = timezone.now()
+            automatic_translations.append(latest_revision)
+        else:
+            manual_translations.append(latest_revision)
+
+    for revision in translation_dict.get('revisions', []):
+        revision_formatted = new_transcript_revision_from_old(revision)
+        if revision_formatted is None:
+            continue
+        if (
+            revision_formatted['language'] == automatic_translation_language
+            and revision['value'] == automatic_translation_value
+        ):
+            revision_formatted['status'] = 'complete'
+            revision_formatted['_dateAccepted'] = timezone.now()
+            automatic_translations.append(revision_formatted)
+        else:
+            manual_translations.append(revision_formatted)
+    return manual_translations, automatic_translations
+
+

From 9b31b20566dcc685187fd215962792552ab2f319 Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Fri, 24 Oct 2025 13:01:13 -0400
Subject: [PATCH 06/18] fixup!: stuff

---
 .../subsequences/tests/test_versioning.py     |  52 +++++++-
 kobo/apps/subsequences/utils/versioning.py    | 121 ++++++++++++------
 2 files changed, 126 insertions(+), 47 deletions(-)

diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
index 90aa65957d..4253431d87 100644
--- a/kobo/apps/subsequences/tests/test_versioning.py
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -1,15 +1,17 @@
 from datetime import timedelta
+from unittest.mock import patch
 
 import pytest
-from mock import patch
 from ddt import data, ddt
 from django.test import TestCase
 from django.utils import timezone
 from freezegun import freeze_time
 
 from kobo.apps.subsequences.utils.versioning import (
-    new_transcript_revision_from_old,
-    separate_transcriptions, migrate_advanced_features, migrate_submission_supplementals,
+    determine_source_transcripts,
+    migrate_submission_supplementals,
+    new_revision_from_old,
+    separate_transcriptions,
 )
 
 
@@ -24,7 +26,7 @@ def test_new_transcript_revision_from_old(self):
             'value': 'Transcribed new',
         }
         with freeze_time(now):
-            result = new_transcript_revision_from_old(old)
+            result = new_revision_from_old(old)
         assert result['value'] == old['value']
         assert result['language'] == old['languageCode']
         assert result['_dateCreated'] == old['dateModified']
@@ -33,7 +35,7 @@ def test_new_transcript_revision_from_old(self):
 
     def test_new_transcript_revision_from_old_returns_none_for_bad_data(self):
         old = {'badly': 'formatted'}
-        assert new_transcript_revision_from_old(old) is None
+        assert new_revision_from_old(old) is None
 
     @data(True, False)
     def test_separate_automated_and_manual_transcriptions(self, latest_is_automated):
@@ -132,6 +134,46 @@ def test_migrate_transcriptions(self):
         }
         assert migrated == expected
 
+    def test_determine_source_transcripts(self):
+        manual_transcripts = []
+        automatic_transcripts = []
+        now = timezone.now()
+        for i in range(5):
+            manual = {
+                '_dateCreated': now - timedelta(days=i),
+                'language': 'en',
+                'value': 'Value',
+                '_uuid': f'uuid-{i}-manual',
+                '_dateAccepted': None,
+            }
+            automatic = {
+                **manual,
+                '_uuid': f'uuid-{i}-automatic',
+                '_dateCreated': now - timedelta(days=i + 1),
+            }
+            manual_transcripts.append(manual)
+            automatic_transcripts.append(automatic)
+        # add an old transcript in a different language
+        manual_transcripts.append(
+            {
+                '_dateCreated': now - timedelta(days=5),
+                'language': 'fr',
+                'value': 'Value',
+                '_uuid': f'uuid-5-manual',
+                '_dateAccepted': None,
+            }
+        )
+        most_recent_overall, most_recent_by_language = determine_source_transcripts(
+            manual_transcripts, automatic_transcripts
+        )
+        assert most_recent_overall['_uuid'] == 'uuid-0-manual'
+        assert most_recent_overall['_actionId'] == 'manual_transcription'
+        assert most_recent_by_language['en']['_uuid'] == 'uuid-0-manual'
+        assert most_recent_by_language['en']['_actionId'] == 'manual_transcription'
+        assert most_recent_by_language['fr']['_uuid'] == 'uuid-5-manual'
+        assert most_recent_by_language['fr']['_actionId'] == 'manual_transcription'
+
+
     @pytest.mark.skip()
     def test_migrate_submission_extra_to_supplemental(self):
         old_version = {'Audio_question': {'googlets': {'languageCode': 'en',
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index 8e14ee4136..a02f0a5e82 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -84,26 +84,64 @@ def migrate_submission_supplementals(supplemental_data:dict) -> dict:
         supplemental[question_xpath] = question_results_by_action
 
         # translation
-        # get source
-        tagged_manual_transcripts = [{**transcript, '_actionId': 'manual_transcription'} for transcript in manual_transcripts]
-        tagged_automatic_transcripts = [{**transcript, '_actionId': 'manual_translation'} for transcript in automatic_transcripts]
-
-        all_tagged_transcripts = [*tagged_manual_transcripts, *tagged_automatic_transcripts]
-        all_tagged_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])
-
-        most_recent_transcript_uuids_by_language = {}
-        for transcript in all_tagged_transcripts:
-            if most_recent_transcript_uuids_by_language.get(transcript['language']) is None:
-                most_recent_transcript_uuids_by_language[transcript['language']] = {'_uuid': transcript['_uuid'], '_actionId': transcript['_actionId']}
+        # determine what to use as the source transcript
+        most_recent_transcript, most_recent_transcript_by_language = (
+            determine_source_transcripts(manual_transcripts, automatic_transcripts)
+        )
+        (
+            automatic_translation_source_language,
+            automatic_translation_language,
+            automatic_translation_value,
+        ) = get_automatic_translation(action_results)
 
         translations_dict = action_results.get('translation', {})
+        automatic_translations = {}
+        manual_translations = {}
         for language_code, translations in translations_dict.items():
+            automatic_translations_for_language = separate_translations(
+                language_code,
+                automatic_translation_source_language,
+                automatic_translation_language,
+                automatic_translation_value,
+                most_recent_transcript,
+                most_recent_transcript_by_language,
+            )
             pass
 
+    return supplemental
 
 
+def determine_source_transcripts(manual_transcripts, automatic_transcripts):
+    # First combine manual and automatic transcripts and sort by dateCreated descending
+    # tag them with the action so we don't lose track
+    tagged_manual_transcripts = [
+        {**transcript, '_actionId': 'manual_transcription'}
+        for transcript in manual_transcripts
+    ]
+    tagged_automatic_transcripts = [
+        {**transcript, '_actionId': 'automatic_translation'}
+        for transcript in automatic_transcripts
+    ]
+
+    all_tagged_transcripts = [*tagged_manual_transcripts, *tagged_automatic_transcripts]
+    all_tagged_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])
+
+    # take the most recent transcript, manual or automatic, by language
+    most_recent_transcript_uuids_by_language = {}
+    for transcript in all_tagged_transcripts:
+        if most_recent_transcript_uuids_by_language.get(transcript['language']) is None:
+            most_recent_transcript_uuids_by_language[transcript['language']] = {
+                '_uuid': transcript['_uuid'],
+                '_actionId': transcript['_actionId'],
+            }
 
-    return supplemental
+    # we don't always know the source language of a translation, so also get the most recent transcript overall
+    most_recent_transcript_overall = all_tagged_transcripts[0]
+    most_recent_transcript_overall = {
+        '_uuid': most_recent_transcript_overall['_uuid'],
+        '_actionId': most_recent_transcript_overall['_actionId'],
+    }
+    return most_recent_transcript_overall, most_recent_transcript_uuids_by_language
 
 
 def get_automatic_transcription(
@@ -114,11 +152,14 @@ def get_automatic_transcription(
 
 def get_automatic_translation(action_results:dict):
     googletx = action_results.get('googletx', {})
-    return googletx.get('source', None), googletx.get('languageCode', None), googletx.get('value', None)
-
+    return (
+        googletx.get('source', None),
+        googletx.get('languageCode', None),
+        googletx.get('value', None),
+    )
 
 
-def new_transcript_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
+def new_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
     # ignore bad data
     if (
         'languageCode' not in old_transcript_revision_dict
@@ -133,25 +174,6 @@ def new_transcript_revision_from_old(old_transcript_revision_dict: dict) -> dict
         '_dateAccepted': None,
     }
 
-def new_translation_revision_from_old(old_translation_revision_dict: dict, source_uuid, source_action) -> dict | None:
-    # ignore bad data
-    if (
-        'languageCode' not in old_translation_revision_dict
-        or 'value' not in old_translation_revision_dict
-    ):
-        return None
-    return {
-        '_dateCreated': old_translation_revision_dict.get('dateModified', None),
-        'language': old_translation_revision_dict['languageCode'],
-        'value': old_translation_revision_dict['value'],
-        '_uuid': generate_uuid_for_form(),
-        '_dateAccepted': None,
-        '_dependency': {
-            '_actionId': source_action,
-            '_uuid': source_uuid,
-        }
-    }
-
 
 def separate_transcriptions(
     transcription_dict: dict,
@@ -162,7 +184,7 @@ def separate_transcriptions(
         return [], []
     automatic_transcriptions = []
     manual_transcriptions = []
-    latest_revision = new_transcript_revision_from_old(transcription_dict)
+    latest_revision = new_revision_from_old(transcription_dict)
     if latest_revision:
         if (
             latest_revision['value'] == automatic_transcript_value
@@ -175,7 +197,7 @@ def separate_transcriptions(
             manual_transcriptions.append(latest_revision)
 
     for revision in transcription_dict.get('revisions', []):
-        revision_formatted = new_transcript_revision_from_old(revision)
+        revision_formatted = new_revision_from_old(revision)
         if revision_formatted is None:
             continue
         if (
@@ -189,9 +211,16 @@ def separate_transcriptions(
             manual_transcriptions.append(revision_formatted)
     return manual_transcriptions, automatic_transcriptions
 
-def separate_translations(language, translation_dict,
+
+def separate_translations(
+    language,
+    translation_dict,
+    automatic_translation_source_language: str = None,
     automatic_translation_language: str = None,
-    automatic_translation_value: str = None, source_uuid=None, source_action_id=None):
+    automatic_translation_value: str = None,
+    most_recent_transcript=None,
+    most_recent_transcript_by_language=None,
+):
     """
     {'es': {'dateCreated': '2025-10-22T14:30:38Z',
                                    'dateModified': '2025-10-22T17:10:23Z',
@@ -209,7 +238,7 @@ def separate_translations(language, translation_dict,
     """
     automatic_translations = []
     manual_translations = []
-    latest_revision = new_translation_revision_from_old(translation_dict, source_uuid=source_uuid, source_action=source_action_id)
+    latest_revision = new_revision_from_old(translation_dict)
     if latest_revision:
         if (
             latest_revision['value'] == automatic_translation_value
@@ -217,12 +246,17 @@ def separate_translations(language, translation_dict,
         ):
             latest_revision['status'] = 'complete'
             latest_revision['_dateAccepted'] = timezone.now()
+            source = most_recent_transcript_by_language.get(
+                automatic_translation_source_language, most_recent_transcript
+            )
+            latest_revision['source'] = source
             automatic_translations.append(latest_revision)
         else:
+            latest_revision['source'] = most_recent_transcript
             manual_translations.append(latest_revision)
 
     for revision in translation_dict.get('revisions', []):
-        revision_formatted = new_transcript_revision_from_old(revision)
+        revision_formatted = new_revision_from_old(revision)
         if revision_formatted is None:
             continue
         if (
@@ -231,9 +265,12 @@ def separate_translations(language, translation_dict,
         ):
             revision_formatted['status'] = 'complete'
             revision_formatted['_dateAccepted'] = timezone.now()
+            source = most_recent_transcript_by_language.get(
+                automatic_translation_source_language, most_recent_transcript
+            )
+            revision_formatted['source'] = source
             automatic_translations.append(revision_formatted)
         else:
+            revision_formatted['source'] = most_recent_transcript
             manual_translations.append(revision_formatted)
     return manual_translations, automatic_translations
-
-

From ef8655a38cc586ef1f11a08ca4837b3065389245 Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Tue, 28 Oct 2025 10:22:53 -0400
Subject: [PATCH 07/18] fixup!: stuff

---
 .../subsequences/tests/test_versioning.py     | 72 ++++++++++---------
 kobo/apps/subsequences/utils/versioning.py    | 15 ++--
 2 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
index 4253431d87..416d8b502a 100644
--- a/kobo/apps/subsequences/tests/test_versioning.py
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -173,9 +173,14 @@ def test_determine_source_transcripts(self):
         assert most_recent_by_language['fr']['_uuid'] == 'uuid-5-manual'
         assert most_recent_by_language['fr']['_actionId'] == 'manual_transcription'
 
+    def test_migrate_translations(self):
+        pass
+
 
     @pytest.mark.skip()
     def test_migrate_submission_extra_to_supplemental(self):
+        now = timezone.now()
+        one_year_ago = now - timedelta(days=365)
         old_version = {'Audio_question': {'googlets': {'languageCode': 'en',
                                  'regionCode': None,
                                  'status': 'complete',
@@ -186,11 +191,10 @@ def test_migrate_submission_extra_to_supplemental(self):
                                  'status': 'complete',
                                  'value': 'Este es un audio que estoy '
                                           'intentando transcribir.'},
-                    'transcript': {'dateCreated': None,
-                                   'dateModified': '2025-10-22 17:09:38',
+                    'transcript': {'dateCreated': one_year_ago,
+                                   'dateModified': now,
                                    'languageCode': 'en',
-                                   'revisions': [{'dateModified': '2025-10-22 '
-                                                                  '14:30:24',
+                                   'revisions': [{'dateModified': one_year_ago,
                                                   'languageCode': 'en',
                                                   'value': 'This is audio that '
                                                            'I am trying to '
@@ -198,10 +202,10 @@ def test_migrate_submission_extra_to_supplemental(self):
                                                  {}],
                                    'value': 'This is audio that I am trying to '
                                             'transcribe but i edited it.'},
-                    'translation': {'es': {'dateCreated': '2025-10-22T14:30:38Z',
-                                           'dateModified': '2025-10-22T17:10:23Z',
+                    'translation': {'es': {'dateCreated': one_year_ago,
+                                           'dateModified': now,
                                            'languageCode': 'es',
-                                           'revisions': [{'dateModified': '2025-10-22T14:30:38Z',
+                                           'revisions': [{'dateModified': one_year_ago,
                                                           'languageCode': 'es',
                                                           'value': 'Este es un '
                                                                    'audio que '
@@ -212,17 +216,21 @@ def test_migrate_submission_extra_to_supplemental(self):
                                                     'estoy intentando '
                                                     'transcribir pero yo lo edité'}}}}
 
+        with patch('kobo.apps.subsequences.utils.versioning.generate_uuid_for_form', side_effect=['uuid1', 'uuid2', 'uuid3', 'uuid4']):
+            with freeze_time(now):
+                migrated = migrate_submission_supplementals(old_version)
+
         new_version = {
             '_version': '20250820',
             'Audio_question': {
                 'automatic_transcription': {
-                    '_dateCreated': '',
-                    '_dateModified': '',
+                    '_dateCreated': one_year_ago,
+                    '_dateModified': one_year_ago,
                     '_versions': [
                         {
-                            '_dateCreated': '',
-                            '_dateAccepted': '',
-                            '_uuid':'',
+                            '_dateCreated': one_year_ago,
+                            '_dateAccepted': now,
+                            '_uuid':'uuid2',
                             'language': 'en',
                             'value': 'This is audio that I am trying to '
                                           'transcribe.',
@@ -232,15 +240,15 @@ def test_migrate_submission_extra_to_supplemental(self):
                 },
                 'automatic_translation': {
                     'es': {
-                        '_dateCreated': '',
-                        '_dateModified': '',
+                        '_dateCreated': one_year_ago,
+                        '_dateModified': one_year_ago,
                         '_versions': [
                             {
-                                '_dateCreated': '',
-                                '_dateAccepted': '',
+                                '_dateCreated': one_year_ago,
+                                '_dateAccepted': now,
                                 '_dependency': {'_actionId': 'manual_transcription',
-                                                '_uuid': 'a0030a86-d207-4249-8335-9a767fbd77eb'},
-                                '_uuid':'',
+                                                '_uuid': 'uuid1'},
+                                '_uuid':'uuid4',
                                 'language': 'es',
                                 'value': 'Esto es un audio que estoy intendando a transcribir',
                                 'status': 'complete'
@@ -249,13 +257,13 @@ def test_migrate_submission_extra_to_supplemental(self):
                     }
                 },
                 'manual_transcription': {
-                    '_dateCreated': '',
-                    '_dateModified': '',
+                    '_dateCreated': now,
+                    '_dateModified': now,
                     '_versions': [
                         {
-                            '_dateCreated': '',
-                            '_dateAccepted': '',
-                            '_uuid':'',
+                            '_dateCreated': now,
+                            '_dateAccepted': None,
+                            '_uuid':'uuid1',
                             'language': 'en',
                             'value': 'This is audio that I am trying to '
                                      'transcribe but i edited it.',
@@ -264,15 +272,15 @@ def test_migrate_submission_extra_to_supplemental(self):
                 },
                 'manual_translation': {
                     'es': {
-                        '_dateCreated': '',
-                        '_dateModified': '',
+                        '_dateCreated': now,
+                        '_dateModified': now,
                         '_versions': [
                             {
-                                '_dateCreated': '',
-                                '_dateAccepted': '',
-                                '_dependency': {'_actionId': 'automatic_transcription',
-                                                '_uuid': 'a0030a86-d207-4249-8335-9a767fbd77eb'},
-                                '_uuid':'',
+                                '_dateCreated': now,
+                                '_dateAccepted': now,
+                                '_dependency': {'_actionId': 'manual_transcription',
+                                                '_uuid': 'uuid1'},
+                                '_uuid':'uuid3',
                                 'language': 'es',
                                 'value': 'Esto es un audio que estoy intendando a transcribir pero yo lo edité',
                                 'status': 'complete'
@@ -282,7 +290,5 @@ def test_migrate_submission_extra_to_supplemental(self):
                 },
             }
         }
-
-
-        self.assertEqual(True, False)  # add assertion here
+        assert migrated == new_version  # add assertion here
 
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index a02f0a5e82..2f2f4bed65 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -81,7 +81,6 @@ def migrate_submission_supplementals(supplemental_data:dict) -> dict:
                 '_dateModified': automatic_transcripts[0]['_dateCreated'],
                 '_versions': automatic_transcripts,
             }
-        supplemental[question_xpath] = question_results_by_action
 
         # translation
         # determine what to use as the source transcript
@@ -98,15 +97,21 @@ def migrate_submission_supplementals(supplemental_data:dict) -> dict:
         automatic_translations = {}
         manual_translations = {}
         for language_code, translations in translations_dict.items():
-            automatic_translations_for_language = separate_translations(
+            automatic_translations_for_language, manual_translations_for_language = separate_translations(
                 language_code,
+                translations,
                 automatic_translation_source_language,
                 automatic_translation_language,
                 automatic_translation_value,
                 most_recent_transcript,
                 most_recent_transcript_by_language,
             )
-            pass
+            automatic_translations[language_code] = automatic_translations_for_language
+            manual_translations[language_code] = manual_translations_for_language
+        question_results_by_action['automatic_translation'] = automatic_translations
+        question_results_by_action['manual_translation'] = manual_translations
+        supplemental[question_xpath] = question_results_by_action
+
 
     return supplemental
 
@@ -242,7 +247,7 @@ def separate_translations(
     if latest_revision:
         if (
             latest_revision['value'] == automatic_translation_value
-            and latest_revision['language'] == automatic_translation_language
+            and language == automatic_translation_language
         ):
             latest_revision['status'] = 'complete'
             latest_revision['_dateAccepted'] = timezone.now()
@@ -260,7 +265,7 @@ def separate_translations(
         if revision_formatted is None:
             continue
         if (
-            revision_formatted['language'] == automatic_translation_language
+            language == automatic_translation_language
             and revision['value'] == automatic_translation_value
         ):
             revision_formatted['status'] = 'complete'

From 3c35f145fa34152fe5f436f1ccbef57c64972ce7 Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Tue, 28 Oct 2025 13:14:26 -0400
Subject: [PATCH 08/18] fixup!: messy but functional

---
 .../subsequences/tests/test_versioning.py     | 40 +++++++++----------
 kobo/apps/subsequences/utils/versioning.py    | 39 ++++++++++++------
 2 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
index 416d8b502a..9cff737787 100644
--- a/kobo/apps/subsequences/tests/test_versioning.py
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -177,10 +177,9 @@ def test_migrate_translations(self):
         pass
 
 
-    @pytest.mark.skip()
     def test_migrate_submission_extra_to_supplemental(self):
         now = timezone.now()
-        one_year_ago = now - timedelta(days=365)
+        one_year_ago = (now - timedelta(days=365)).isoformat()
         old_version = {'Audio_question': {'googlets': {'languageCode': 'en',
                                  'regionCode': None,
                                  'status': 'complete',
@@ -189,10 +188,10 @@ def test_migrate_submission_extra_to_supplemental(self):
                     'googletx': {'languageCode': 'es',
                                  'source': 'en',
                                  'status': 'complete',
-                                 'value': 'Este es un audio que estoy '
-                                          'intentando transcribir.'},
+                                 'value': 'Esto es un audio que estoy '
+                                          'intentando a transcribir.'},
                     'transcript': {'dateCreated': one_year_ago,
-                                   'dateModified': now,
+                                   'dateModified': now.isoformat(),
                                    'languageCode': 'en',
                                    'revisions': [{'dateModified': one_year_ago,
                                                   'languageCode': 'en',
@@ -203,16 +202,16 @@ def test_migrate_submission_extra_to_supplemental(self):
                                    'value': 'This is audio that I am trying to '
                                             'transcribe but i edited it.'},
                     'translation': {'es': {'dateCreated': one_year_ago,
-                                           'dateModified': now,
+                                           'dateModified': now.isoformat(),
                                            'languageCode': 'es',
                                            'revisions': [{'dateModified': one_year_ago,
                                                           'languageCode': 'es',
-                                                          'value': 'Este es un '
+                                                          'value': 'Esto es un '
                                                                    'audio que '
                                                                    'estoy '
-                                                                   'intentando '
+                                                                   'intentando a '
                                                                    'transcribir.'}],
-                                           'value': 'Este es un audio que '
+                                           'value': 'Esto es un audio que '
                                                     'estoy intentando '
                                                     'transcribir pero yo lo edité'}}}}
 
@@ -229,7 +228,7 @@ def test_migrate_submission_extra_to_supplemental(self):
                     '_versions': [
                         {
                             '_dateCreated': one_year_ago,
-                            '_dateAccepted': now,
+                            '_dateAccepted': now.isoformat(),
                             '_uuid':'uuid2',
                             'language': 'en',
                             'value': 'This is audio that I am trying to '
@@ -245,23 +244,23 @@ def test_migrate_submission_extra_to_supplemental(self):
                         '_versions': [
                             {
                                 '_dateCreated': one_year_ago,
-                                '_dateAccepted': now,
+                                '_dateAccepted': now.isoformat(),
                                 '_dependency': {'_actionId': 'manual_transcription',
                                                 '_uuid': 'uuid1'},
                                 '_uuid':'uuid4',
                                 'language': 'es',
-                                'value': 'Esto es un audio que estoy intendando a transcribir',
+                                'value': 'Esto es un audio que estoy intentando a transcribir.',
                                 'status': 'complete'
                             }
                         ]
                     }
                 },
                 'manual_transcription': {
-                    '_dateCreated': now,
-                    '_dateModified': now,
+                    '_dateCreated': now.isoformat(),
+                    '_dateModified': now.isoformat(),
                     '_versions': [
                         {
-                            '_dateCreated': now,
+                            '_dateCreated': now.isoformat(),
                             '_dateAccepted': None,
                             '_uuid':'uuid1',
                             'language': 'en',
@@ -272,18 +271,17 @@ def test_migrate_submission_extra_to_supplemental(self):
                 },
                 'manual_translation': {
                     'es': {
-                        '_dateCreated': now,
-                        '_dateModified': now,
+                        '_dateCreated': now.isoformat(),
+                        '_dateModified': now.isoformat(),
                         '_versions': [
                             {
-                                '_dateCreated': now,
-                                '_dateAccepted': now,
+                                '_dateCreated': now.isoformat(),
+                                '_dateAccepted': None,
                                 '_dependency': {'_actionId': 'manual_transcription',
                                                 '_uuid': 'uuid1'},
                                 '_uuid':'uuid3',
                                 'language': 'es',
-                                'value': 'Esto es un audio que estoy intendando a transcribir pero yo lo edité',
-                                'status': 'complete'
+                                'value': 'Esto es un audio que estoy intentando transcribir pero yo lo edité',
                             }
                         ]
                     }
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index 2f2f4bed65..15e82c3c26 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -97,7 +97,7 @@ def migrate_submission_supplementals(supplemental_data:dict) -> dict:
         automatic_translations = {}
         manual_translations = {}
         for language_code, translations in translations_dict.items():
-            automatic_translations_for_language, manual_translations_for_language = separate_translations(
+            manual_translations_for_language, automatic_translations_for_language  = separate_translations(
                 language_code,
                 translations,
                 automatic_translation_source_language,
@@ -106,9 +106,22 @@ def migrate_submission_supplementals(supplemental_data:dict) -> dict:
                 most_recent_transcript,
                 most_recent_transcript_by_language,
             )
-            automatic_translations[language_code] = automatic_translations_for_language
-            manual_translations[language_code] = manual_translations_for_language
-        question_results_by_action['automatic_translation'] = automatic_translations
+            if len(automatic_translations_for_language) > 0:
+                automatic_translations_for_language.sort(reverse=True, key =lambda x: x['_dateCreated'])
+                automatic_translations[language_code] = {
+                    '_dateCreated': automatic_translations_for_language[-1]['_dateCreated'],
+                    '_dateModified': automatic_translations_for_language[0]['_dateCreated']
+                }
+                automatic_translations[language_code]['_versions'] = automatic_translations_for_language
+            if len(manual_translations_for_language) > 0:
+                manual_translations_for_language.sort(reverse=True, key =lambda x: x['_dateCreated'])
+                manual_translations[language_code] = {
+                    '_dateCreated': manual_translations_for_language[-1]['_dateCreated'],
+                    '_dateModified': manual_translations_for_language[0]['_dateCreated']
+                }
+                manual_translations[language_code]['_versions'] = manual_translations_for_language
+        if automatic_translations != {}:
+            question_results_by_action['automatic_translation'] = automatic_translations
         question_results_by_action['manual_translation'] = manual_translations
         supplemental[question_xpath] = question_results_by_action
 
@@ -196,7 +209,7 @@ def separate_transcriptions(
             and latest_revision['language'] == automatic_transcript_language
         ):
             latest_revision['status'] = 'complete'
-            latest_revision['_dateAccepted'] = timezone.now()
+            latest_revision['_dateAccepted'] = timezone.now().isoformat()
             automatic_transcriptions.append(latest_revision)
         else:
             manual_transcriptions.append(latest_revision)
@@ -210,7 +223,7 @@ def separate_transcriptions(
             and revision['value'] == automatic_transcript_value
         ):
             revision_formatted['status'] = 'complete'
-            revision_formatted['_dateAccepted'] = timezone.now()
+            revision_formatted['_dateAccepted'] = timezone.now().isoformat()
             automatic_transcriptions.append(revision_formatted)
         else:
             manual_transcriptions.append(revision_formatted)
@@ -225,7 +238,7 @@ def separate_translations(
     automatic_translation_value: str = None,
     most_recent_transcript=None,
     most_recent_transcript_by_language=None,
-):
+) -> tuple[list, list]:
     """
     {'es': {'dateCreated': '2025-10-22T14:30:38Z',
                                    'dateModified': '2025-10-22T17:10:23Z',
@@ -250,14 +263,14 @@ def separate_translations(
             and language == automatic_translation_language
         ):
             latest_revision['status'] = 'complete'
-            latest_revision['_dateAccepted'] = timezone.now()
+            latest_revision['_dateAccepted'] = timezone.now().isoformat()
             source = most_recent_transcript_by_language.get(
                 automatic_translation_source_language, most_recent_transcript
             )
-            latest_revision['source'] = source
+            latest_revision['_dependency'] = source
             automatic_translations.append(latest_revision)
         else:
-            latest_revision['source'] = most_recent_transcript
+            latest_revision['_dependency'] = most_recent_transcript
             manual_translations.append(latest_revision)
 
     for revision in translation_dict.get('revisions', []):
@@ -269,13 +282,13 @@ def separate_translations(
             and revision['value'] == automatic_translation_value
         ):
             revision_formatted['status'] = 'complete'
-            revision_formatted['_dateAccepted'] = timezone.now()
+            revision_formatted['_dateAccepted'] = timezone.now().isoformat()
             source = most_recent_transcript_by_language.get(
                 automatic_translation_source_language, most_recent_transcript
             )
-            revision_formatted['source'] = source
+            revision_formatted['_dependency'] = source
             automatic_translations.append(revision_formatted)
         else:
-            revision_formatted['source'] = most_recent_transcript
+            revision_formatted['_dependency'] = most_recent_transcript
             manual_translations.append(revision_formatted)
     return manual_translations, automatic_translations

From 335ff049686a4e1b034245fd711deb7849873e66 Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Tue, 28 Oct 2025 15:27:37 -0400
Subject: [PATCH 09/18] fixup!: cleaning

---
 .../subsequences/tests/test_versioning.py     | 351 ++++++++-------
 kobo/apps/subsequences/utils/versioning.py    | 409 ++++++++----------
 2 files changed, 360 insertions(+), 400 deletions(-)

diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
index 9cff737787..1ebce79aed 100644
--- a/kobo/apps/subsequences/tests/test_versioning.py
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -1,23 +1,41 @@
-from datetime import timedelta
+from datetime import datetime, timedelta
 from unittest.mock import patch
 
-import pytest
-from ddt import data, ddt
+from ddt import data, ddt, unpack
 from django.test import TestCase
 from django.utils import timezone
 from freezegun import freeze_time
 
 from kobo.apps.subsequences.utils.versioning import (
-    determine_source_transcripts,
+    _determine_source_transcript,
+    _new_revision_from_old,
+    _separate_manual_and_automatic_versions,
     migrate_submission_supplementals,
-    new_revision_from_old,
-    separate_transcriptions,
 )
 
 
 @ddt
 class TestVersioning(TestCase):
-    def test_new_transcript_revision_from_old(self):
+    def setUp(self):
+        super().setUp()
+        # works for translations or transcriptions
+        self.now = timezone.now().isoformat()
+        self.yesterday = (timezone.now() - timedelta(days=1)).isoformat()
+        self.action_dict = {
+            'dateCreated': None,
+            'dateModified': self.now,
+            'languageCode': 'en',
+            'revisions': [
+                {
+                    'dateModified': self.yesterday,
+                    'languageCode': 'en',
+                    'value': 'Old value',
+                }
+            ],
+            'value': 'Latest value',
+        }
+
+    def test_new_revision_from_old(self):
         now = timezone.now()
         old = {
             'dateCreated': None,
@@ -26,7 +44,7 @@ def test_new_transcript_revision_from_old(self):
             'value': 'Transcribed new',
         }
         with freeze_time(now):
-            result = new_revision_from_old(old)
+            result = _new_revision_from_old(old)
         assert result['value'] == old['value']
         assert result['language'] == old['languageCode']
         assert result['_dateCreated'] == old['dateModified']
@@ -35,30 +53,15 @@ def test_new_transcript_revision_from_old(self):
 
     def test_new_transcript_revision_from_old_returns_none_for_bad_data(self):
         old = {'badly': 'formatted'}
-        assert new_revision_from_old(old) is None
+        assert _new_revision_from_old(old) is None
 
     @data(True, False)
-    def test_separate_automated_and_manual_transcriptions(self, latest_is_automated):
-        now = timezone.now()
-        yesterday = timezone.now() - timedelta(days=1)
-        transcript_dict = {
-            'dateCreated': None,
-            'dateModified': now,
-            'languageCode': 'en',
-            'revisions': [
-                {
-                    'dateModified': yesterday,
-                    'languageCode': 'en',
-                    'value': 'Old transcript',
-                }
-            ],
-            'value': 'Latest transcript',
-        }
+    def test_separate_automatic_and_manual(self, latest_is_automated):
         automated_transcription_value = (
-            'Latest transcript' if latest_is_automated else 'Old transcript'
+            'Latest value' if latest_is_automated else 'Old value'
         )
-        manual, automated = separate_transcriptions(
-            transcript_dict, 'en', automated_transcription_value
+        manual, automated = _separate_manual_and_automatic_versions(
+            self.action_dict, 'en', automated_transcription_value
         )
         new_automated_transcript = automated[0]
         new_manual_transcript = manual[0]
@@ -69,153 +72,143 @@ def test_separate_automated_and_manual_transcriptions(self, latest_is_automated)
             new_manual_transcript if latest_is_automated else new_automated_transcript
         )
 
-        assert expected_most_recent_transcript['_dateCreated'] == now
-        assert expected_most_recent_transcript['value'] == 'Latest transcript'
-        assert expected_old_transcript['_dateCreated'] == yesterday
-        assert expected_old_transcript['value'] == 'Old transcript'
+        assert expected_most_recent_transcript['_dateCreated'] == self.now
+        assert expected_most_recent_transcript['value'] == 'Latest value'
+        assert expected_old_transcript['_dateCreated'] == self.yesterday
+        assert expected_old_transcript['value'] == 'Old value'
 
-    def test_migrate_transcriptions(self):
-        now = timezone.now()
-        one_year_ago = now - timedelta(days=365)
-        old_version = {'Audio_question': {'googlets': {'languageCode': 'en',
-                                 'regionCode': None,
-                                 'status': 'complete',
-                                 'value': 'This is audio that I am trying to '
-                                          'transcribe.'},
-                    'transcript': {'dateCreated': one_year_ago,
-                                   'dateModified': now,
-                                   'languageCode': 'en',
-                                   'revisions': [{'dateModified': one_year_ago,
-                                                  'languageCode': 'en',
-                                                  'value': 'This is audio that '
-                                                           'I am trying to '
-                                                           'transcribe.'},
-                                                 {}],
-                                   'value': 'This is audio that I am trying to '
-                                            'transcribe but i edited it.'},
-                                          }
-                       }
-        with patch('kobo.apps.subsequences.utils.versioning.generate_uuid_for_form', side_effect=['uuid1', 'uuid2']):
-            with freeze_time(now):
-                migrated = migrate_submission_supplementals(old_version)
-        expected = {
-            '_version': '20250820',
-            'Audio_question': {
-                                  'automatic_transcription': {
-                                      '_dateCreated': one_year_ago,
-                                      '_dateModified': one_year_ago,
-                                      '_versions': [
-                                          {
-                                              '_dateCreated': one_year_ago,
-                                              '_dateAccepted': now,
-                                              '_uuid':'uuid2',
-                                              'language': 'en',
-                                              'value': 'This is audio that I am trying to '
-                                                       'transcribe.',
-                                              'status': 'complete',
-                                          }
-                                      ]
-                                  },
-                                  'manual_transcription': {
-                                      '_dateCreated': now,
-                                      '_dateModified': now,
-                                      '_versions': [
-                                          {
-                                              '_dateCreated': now,
-                                              '_dateAccepted': None,
-                                              '_uuid':'uuid1',
-                                              'language': 'en',
-                                              'value': 'This is audio that I am trying to '
-                                                       'transcribe but i edited it.',
-                                          }
-                                      ]
-                                  },
-            }
-        }
-        assert migrated == expected
+    def test_separate_automatic_and_manual_forces_language_if_given(self):
+        manual, automated = _separate_manual_and_automatic_versions(
+            self.action_dict, None, None, language='en'
+        )
+        for formatted_item in manual:
+            assert formatted_item['language'] == 'en'
+
+    def test_separate_automatic_and_manual_without_automatic_value(self):
+        manual, automatic = _separate_manual_and_automatic_versions(
+            self.action_dict, None, None
+        )
+        assert len(manual) == 2
+        assert len(automatic) == 0
 
-    def test_determine_source_transcripts(self):
-        manual_transcripts = []
-        automatic_transcripts = []
+    @data(
+        # known language, date created, expected result uuid
+        # there is a transcript of the same language with an older date
+        ('de', '2024-12-31', 'uuid4'),
+        # there are transcripts of the same language but none older than the translation
+        ('de', '2023-01-01', 'uuid3'),
+        # there are no transcripts of the same language
+        ('fr', '2024-12-31', 'uuid1'),
+        # we don't know the source language but there are older transcripts
+        (None, '2024-12-31', 'uuid2'),
+        # we don't know the source language and there are no older transcripts
+        (None, '2023-01-01', 'uuid1'),
+    )
+    @unpack
+    def test_determine_source_transcription(
+        self, source_language, date_created, expected_source_uuid
+    ):
         now = timezone.now()
-        for i in range(5):
-            manual = {
-                '_dateCreated': now - timedelta(days=i),
+        one_day_ago = now - timedelta(days=1)
+        jan_1_2024 = datetime(2024, 1, 1, tzinfo=timezone.utc)
+        jan_2_2024 = datetime(2024, 1, 2, tzinfo=timezone.utc)
+        transcripts = [
+            {
+                '_uuid': 'uuid1',
+                '_dateCreated': now.isoformat(),
                 'language': 'en',
-                'value': 'Value',
-                '_uuid': f'uuid-{i}-manual',
-                '_dateAccepted': None,
-            }
-            automatic = {
-                **manual,
-                '_uuid': f'uuid-{i}-automatic',
-                '_dateCreated': now - timedelta(days=i + 1),
-            }
-            manual_transcripts.append(manual)
-            automatic_transcripts.append(automatic)
-        # add an old transcript in a different language
-        manual_transcripts.append(
+                '_actionId': 'manual_transcription',
+            },
             {
-                '_dateCreated': now - timedelta(days=5),
-                'language': 'fr',
-                'value': 'Value',
-                '_uuid': f'uuid-5-manual',
-                '_dateAccepted': None,
-            }
-        )
-        most_recent_overall, most_recent_by_language = determine_source_transcripts(
-            manual_transcripts, automatic_transcripts
+                '_uuid': 'uuid2',
+                '_dateCreated': jan_1_2024.isoformat(),
+                'language': 'en',
+                '_actionId': 'automatic_transcription',
+            },
+            {
+                '_uuid': 'uuid3',
+                '_dateCreated': one_day_ago.isoformat(),
+                'language': 'de',
+                '_actionId': 'manual_transcription',
+            },
+            {
+                '_uuid': 'uuid4',
+                '_dateCreated': jan_2_2024.isoformat(),
+                'language': 'de',
+                '_actionId': 'automatic_transcription',
+            },
+        ]
+        translation_revision = {'_dateCreated': date_created}
+        source_transcript = _determine_source_transcript(
+            translation_revision, transcripts, automatic_source_language=source_language
         )
-        assert most_recent_overall['_uuid'] == 'uuid-0-manual'
-        assert most_recent_overall['_actionId'] == 'manual_transcription'
-        assert most_recent_by_language['en']['_uuid'] == 'uuid-0-manual'
-        assert most_recent_by_language['en']['_actionId'] == 'manual_transcription'
-        assert most_recent_by_language['fr']['_uuid'] == 'uuid-5-manual'
-        assert most_recent_by_language['fr']['_actionId'] == 'manual_transcription'
-
-    def test_migrate_translations(self):
-        pass
-
+        assert source_transcript['_uuid'] == expected_source_uuid
 
+    # test the whole transformation process
     def test_migrate_submission_extra_to_supplemental(self):
         now = timezone.now()
+        one_day_ago = (now - timedelta(days=1)).isoformat()
         one_year_ago = (now - timedelta(days=365)).isoformat()
-        old_version = {'Audio_question': {'googlets': {'languageCode': 'en',
-                                 'regionCode': None,
-                                 'status': 'complete',
-                                 'value': 'This is audio that I am trying to '
-                                          'transcribe.'},
-                    'googletx': {'languageCode': 'es',
-                                 'source': 'en',
-                                 'status': 'complete',
-                                 'value': 'Esto es un audio que estoy '
-                                          'intentando a transcribir.'},
-                    'transcript': {'dateCreated': one_year_ago,
-                                   'dateModified': now.isoformat(),
-                                   'languageCode': 'en',
-                                   'revisions': [{'dateModified': one_year_ago,
-                                                  'languageCode': 'en',
-                                                  'value': 'This is audio that '
-                                                           'I am trying to '
-                                                           'transcribe.'},
-                                                 {}],
-                                   'value': 'This is audio that I am trying to '
-                                            'transcribe but i edited it.'},
-                    'translation': {'es': {'dateCreated': one_year_ago,
-                                           'dateModified': now.isoformat(),
-                                           'languageCode': 'es',
-                                           'revisions': [{'dateModified': one_year_ago,
-                                                          'languageCode': 'es',
-                                                          'value': 'Esto es un '
-                                                                   'audio que '
-                                                                   'estoy '
-                                                                   'intentando a '
-                                                                   'transcribir.'}],
-                                           'value': 'Esto es un audio que '
-                                                    'estoy intentando '
-                                                    'transcribir pero yo lo edité'}}}}
+        a_year_and_a_day_ago = (now - timedelta(days=366)).isoformat()
+        old_version = {
+            'Audio_question': {
+                'googlets': {
+                    'languageCode': 'en',
+                    'regionCode': None,
+                    'status': 'complete',
+                    'value': 'This is audio that I am trying to ' 'transcribe.',
+                },
+                'googletx': {
+                    'languageCode': 'es',
+                    'source': 'en',
+                    'status': 'complete',
+                    'value': 'Esto es un audio que estoy ' 'intentando a transcribir.',
+                },
+                'transcript': {
+                    'dateCreated': one_day_ago,
+                    'dateModified': one_day_ago,
+                    'languageCode': 'en',
+                    'revisions': [
+                        {
+                            'dateModified': a_year_and_a_day_ago,
+                            'languageCode': 'en',
+                            'value': 'This is audio that '
+                            'I am trying to '
+                            'transcribe.',
+                        },
+                        {},
+                    ],
+                    'value': 'This is audio that I am trying to '
+                    'transcribe but i edited it.',
+                },
+                'translation': {
+                    'es': {
+                        'dateCreated': one_year_ago,
+                        'dateModified': now.isoformat(),
+                        'languageCode': 'es',
+                        'revisions': [
+                            {
+                                'dateModified': one_year_ago,
+                                'languageCode': 'es',
+                                'value': 'Esto es un '
+                                'audio que '
+                                'estoy '
+                                'intentando a '
+                                'transcribir.',
+                            }
+                        ],
+                        'value': 'Esto es un audio que '
+                        'estoy intentando '
+                        'transcribir pero yo lo edité',
+                    }
+                },
+            }
+        }
 
-        with patch('kobo.apps.subsequences.utils.versioning.generate_uuid_for_form', side_effect=['uuid1', 'uuid2', 'uuid3', 'uuid4']):
+        with patch(
+            'kobo.apps.subsequences.utils.versioning.generate_uuid_for_form',
+            side_effect=['uuid1', 'uuid2', 'uuid3', 'uuid4'],
+        ):
             with freeze_time(now):
                 migrated = migrate_submission_supplementals(old_version)
 
@@ -223,11 +216,11 @@ def test_migrate_submission_extra_to_supplemental(self):
             '_version': '20250820',
             'Audio_question': {
                 'automatic_transcription': {
-                    '_dateCreated': one_year_ago,
-                    '_dateModified': one_year_ago,
+                    '_dateCreated': a_year_and_a_day_ago,
+                    '_dateModified': a_year_and_a_day_ago,
                     '_versions': [
                         {
-                            '_dateCreated': one_year_ago,
+                            '_dateCreated': a_year_and_a_day_ago,
                             '_dateAccepted': now.isoformat(),
                             '_uuid':'uuid2',
                             'language': 'en',
@@ -245,22 +238,25 @@ def test_migrate_submission_extra_to_supplemental(self):
                             {
                                 '_dateCreated': one_year_ago,
                                 '_dateAccepted': now.isoformat(),
-                                '_dependency': {'_actionId': 'manual_transcription',
-                                                '_uuid': 'uuid1'},
-                                '_uuid':'uuid4',
+                                '_dependency': {
+                                    '_actionId': 'automatic_transcription',
+                                    '_uuid': 'uuid2',
+                                },
+                                '_uuid': 'uuid4',
                                 'language': 'es',
-                                'value': 'Esto es un audio que estoy intentando a transcribir.',
-                                'status': 'complete'
+                                'value': 'Esto es un audio que estoy intentando a'
+                                ' transcribir.',
+                                'status': 'complete',
                             }
                         ]
                     }
                 },
                 'manual_transcription': {
-                    '_dateCreated': now.isoformat(),
-                    '_dateModified': now.isoformat(),
+                    '_dateCreated': one_day_ago,
+                    '_dateModified': one_day_ago,
                     '_versions': [
                         {
-                            '_dateCreated': now.isoformat(),
+                            '_dateCreated': one_day_ago,
                             '_dateAccepted': None,
                             '_uuid':'uuid1',
                             'language': 'en',
@@ -281,12 +277,13 @@ def test_migrate_submission_extra_to_supplemental(self):
                                                 '_uuid': 'uuid1'},
                                 '_uuid':'uuid3',
                                 'language': 'es',
-                                'value': 'Esto es un audio que estoy intentando transcribir pero yo lo edité',
+                                'value': 'Esto es un audio que estoy intentando'
+                                ' transcribir pero yo lo edité',
                             }
                         ]
                     }
                 },
             }
         }
-        assert migrated == new_version  # add assertion here
+        assert migrated == new_version
 
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index 15e82c3c26..fc45e5859d 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -4,8 +4,135 @@
 from ..constants import SCHEMA_VERSIONS
 
 
-class InvalidSupplementalFormat(Exception):
-    pass
+def _add_translation_sources(
+    version_list, all_tagged_transcripts, automatic_translation_source_language=None
+):
+    for translation in version_list:
+        # determine and record the most likely source transcript
+        source = _determine_source_transcript(
+            translation,
+            all_tagged_transcripts,
+            automatic_source_language=automatic_translation_source_language,
+        )
+        translation['_dependency'] = {
+            '_uuid': source['_uuid'],
+            '_actionId': source['_actionId'],
+        }
+
+
+def _combine_source_transcripts(manual_transcripts, automatic_transcripts):
+    # Combine manual and automatic transcripts and sort by dateCreated descending
+    # tag them with the action so we don't lose track
+    tagged_manual_transcripts = [
+        {**transcript, '_actionId': 'manual_transcription'}
+        for transcript in manual_transcripts
+    ]
+    tagged_automatic_transcripts = [
+        {**transcript, '_actionId': 'automatic_transcription'}
+        for transcript in automatic_transcripts
+    ]
+
+    all_tagged_transcripts = [*tagged_manual_transcripts, *tagged_automatic_transcripts]
+    all_tagged_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])
+    return all_tagged_transcripts
+
+
+def _determine_source_transcript(
+    translation_revision, all_transcripts, automatic_source_language=None
+):
+    if automatic_source_language:  # we know the source language
+        transcripts_matching_language = [
+            transcript
+            for transcript in all_transcripts
+            if transcript['language'] == automatic_source_language
+        ]
+        for transcript in transcripts_matching_language:
+            # is there a transcript in the source language created earlier than the
+            # translation?
+            if transcript['_dateCreated'] < translation_revision['_dateCreated']:
+                return transcript
+        # if not, is there *any* transcript in the source language? take the most
+        # recent one
+        if len(transcripts_matching_language) > 0:
+            return transcripts_matching_language[0]
+    else:
+        # is there a transcript older than the translation?
+        for transcript in all_transcripts:
+            if transcript['_dateCreated'] < translation_revision['_dateCreated']:
+                return transcript
+    # default to the most recent transcript
+    return all_transcripts[0]
+
+
+def _new_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
+    # ignore bad data
+    if (
+        'languageCode' not in old_transcript_revision_dict
+        or 'value' not in old_transcript_revision_dict
+    ):
+        return None
+    return {
+        '_dateCreated': old_transcript_revision_dict.get('dateModified', None),
+        'language': old_transcript_revision_dict['languageCode'],
+        'value': old_transcript_revision_dict['value'],
+        '_uuid': generate_uuid_for_form(),
+        '_dateAccepted': None,
+    }
+
+
+def _separate_manual_and_automatic_versions(
+    old_action_dictionary,
+    automatic_result_language,
+    automatic_result_value,
+    # translations have an expected language
+    language=None,
+):
+    automatic_versions = []
+    manual_versions = []
+    latest_revision = {
+        key: val
+        for key, val in old_action_dictionary.items()
+        if key in ['value', 'languageCode', 'dateModified']
+    }
+    # add the latest revision to the list of all revisions for easier processing
+    all_revisions = [latest_revision, *old_action_dictionary.get('revisions', [])]
+    for revision in all_revisions:
+        if language:
+            # force the expected language if given
+            revision['languageCode'] = language
+        revision_formatted = _new_revision_from_old(revision)
+        if revision_formatted is None:
+            continue
+        # if the language and value match that of the automatic result,
+        # assume this one was generated automatically
+        matches_automatic_result = (
+            revision_formatted['language'] == automatic_result_language
+            and revision_formatted['value'] == automatic_result_value
+        )
+        correct_version_list_to_append = (
+            automatic_versions if matches_automatic_result else manual_versions
+        )
+        if matches_automatic_result:
+            # automatic versions also need a status and a date accepted
+            revision_formatted['status'] = 'complete'
+            revision_formatted['_dateAccepted'] = timezone.now().isoformat()
+        correct_version_list_to_append.append(revision_formatted)
+
+    # they should be sorted anyway, but just make sure in case the input values
+    # weren't sorted correctly
+    manual_versions.sort(reverse=True, key=lambda d: d['_dateCreated'])
+    automatic_versions.sort(reverse=True, key=lambda d: d['_dateCreated'])
+
+    return manual_versions, automatic_versions
+
+
+def _version_list_to_summary_dict(list_of_versions: list[dict]) -> dict:
+    return {
+        '_dateCreated': list_of_versions[-1]['_dateCreated'],
+        '_dateModified': list_of_versions[0]['_dateCreated'],
+        '_versions': list_of_versions,
+    }
+
 
 def migrate_advanced_features(advanced_features: dict) -> dict | None:
 
@@ -45,11 +172,7 @@ def migrate_advanced_features(advanced_features: dict) -> dict | None:
     return migrated_advanced_features
 
 
-def set_version(schema: dict) -> dict:
-    schema['_version'] = SCHEMA_VERSIONS[0]
-    return schema
-
-def migrate_submission_supplementals(supplemental_data:dict) -> dict:
+def migrate_submission_supplementals(supplemental_data: dict) -> dict | None:
     if supplemental_data.get('_version', None) == SCHEMA_VERSIONS[0]:
         return
     supplemental = {
@@ -57,238 +180,78 @@ def migrate_submission_supplementals(supplemental_data:dict) -> dict:
     }
     for question_xpath, action_results in supplemental_data.items():
         question_results_by_action = {}
-        automatic_transcript_language, automatic_transcript_result = (
-            get_automatic_transcription(action_results)
-        )
-        manual_transcripts, automatic_transcripts = separate_transcriptions(
-            action_results.get('transcript', None),
-            automatic_transcript_language,
-            automatic_transcript_result,
+
+        # get all the automatic result data
+        automatic_transcript = action_results.get('googlets', {})
+        automatic_transcript_language = automatic_transcript.get('languageCode', None)
+        automatic_transcript_value = automatic_transcript.get('value', None)
+        automatic_translation = action_results.get('googletx', {})
+        automatic_translation_language = automatic_translation.get('languageCode')
+        automatic_translation_value = automatic_translation.get('value')
+        automatic_translation_source_language = automatic_translation.get('source')
+
+        # divide transcripts into manual and automatic
+        manual_transcripts, automatic_transcripts = (
+            _separate_manual_and_automatic_versions(
+                action_results.get('transcript', None),
+                automatic_transcript_language,
+                automatic_transcript_value,
+            )
         )
-        # should already be sorted by date created descending, but just in case
-        manual_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])
-        automatic_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])
 
         if len(manual_transcripts) > 0:
-            question_results_by_action['manual_transcription'] = {
-                '_dateCreated': manual_transcripts[-1]['_dateCreated'],
-                '_dateModified': manual_transcripts[0]['_dateCreated'],
-                '_versions': manual_transcripts,
-            }
+            question_results_by_action['manual_transcription'] = (
+                _version_list_to_summary_dict(manual_transcripts)
+            )
         if len(automatic_transcripts) > 0:
-            question_results_by_action['automatic_transcription'] = {
-                '_dateCreated': automatic_transcripts[-1]['_dateCreated'],
-                '_dateModified': automatic_transcripts[0]['_dateCreated'],
-                '_versions': automatic_transcripts,
-            }
-
-        # translation
-        # determine what to use as the source transcript
-        most_recent_transcript, most_recent_transcript_by_language = (
-            determine_source_transcripts(manual_transcripts, automatic_transcripts)
-        )
-        (
-            automatic_translation_source_language,
-            automatic_translation_language,
-            automatic_translation_value,
-        ) = get_automatic_translation(action_results)
+            question_results_by_action['automatic_transcription'] = (
+                _version_list_to_summary_dict(automatic_transcripts)
+            )
 
+        # process translations
         translations_dict = action_results.get('translation', {})
         automatic_translations = {}
         manual_translations = {}
+
+        # divide translations into manual and automatic by language
         for language_code, translations in translations_dict.items():
-            manual_translations_for_language, automatic_translations_for_language  = separate_translations(
-                language_code,
-                translations,
-                automatic_translation_source_language,
-                automatic_translation_language,
-                automatic_translation_value,
-                most_recent_transcript,
-                most_recent_transcript_by_language,
+            manual_translations_for_language, automatic_translations_for_language = (
+                _separate_manual_and_automatic_versions(
+                    translations,
+                    automatic_translation_language,
+                    automatic_translation_value,
+                    language_code,
+                )
+            )
+
+            all_tagged_transcripts = _combine_source_transcripts(
+                manual_transcripts, automatic_transcripts
             )
             if len(automatic_translations_for_language) > 0:
-                automatic_translations_for_language.sort(reverse=True, key =lambda x: x['_dateCreated'])
-                automatic_translations[language_code] = {
-                    '_dateCreated': automatic_translations_for_language[-1]['_dateCreated'],
-                    '_dateModified': automatic_translations_for_language[0]['_dateCreated']
-                }
-                automatic_translations[language_code]['_versions'] = automatic_translations_for_language
+                _add_translation_sources(
+                    automatic_translations_for_language,
+                    all_tagged_transcripts,
+                    automatic_translation_source_language,
+                )
+                automatic_translations[language_code] = _version_list_to_summary_dict(
+                    automatic_translations_for_language
+                )
             if len(manual_translations_for_language) > 0:
-                manual_translations_for_language.sort(reverse=True, key =lambda x: x['_dateCreated'])
-                manual_translations[language_code] = {
-                    '_dateCreated': manual_translations_for_language[-1]['_dateCreated'],
-                    '_dateModified': manual_translations_for_language[0]['_dateCreated']
-                }
-                manual_translations[language_code]['_versions'] = manual_translations_for_language
+                _add_translation_sources(
+                    manual_translations_for_language, all_tagged_transcripts
+                )
+                manual_translations[language_code] = _version_list_to_summary_dict(
+                    manual_translations_for_language
+                )
         if automatic_translations != {}:
             question_results_by_action['automatic_translation'] = automatic_translations
-        question_results_by_action['manual_translation'] = manual_translations
+        if manual_translations != {}:
+            question_results_by_action['manual_translation'] = manual_translations
         supplemental[question_xpath] = question_results_by_action
 
-
     return supplemental
 
 
-def determine_source_transcripts(manual_transcripts, automatic_transcripts):
-    # First combine manual and automatic transcripts and sort by dateCreated descending
-    # tag them with the action so we don't lose track
-    tagged_manual_transcripts = [
-        {**transcript, '_actionId': 'manual_transcription'}
-        for transcript in manual_transcripts
-    ]
-    tagged_automatic_transcripts = [
-        {**transcript, '_actionId': 'automatic_translation'}
-        for transcript in automatic_transcripts
-    ]
-
-    all_tagged_transcripts = [*tagged_manual_transcripts, *tagged_automatic_transcripts]
-    all_tagged_transcripts.sort(reverse=True, key=lambda d: d['_dateCreated'])
-
-    # take the most recent transcript, manual or automatic, by language
-    most_recent_transcript_uuids_by_language = {}
-    for transcript in all_tagged_transcripts:
-        if most_recent_transcript_uuids_by_language.get(transcript['language']) is None:
-            most_recent_transcript_uuids_by_language[transcript['language']] = {
-                '_uuid': transcript['_uuid'],
-                '_actionId': transcript['_actionId'],
-            }
-
-    # we don't always know the source language of a translation, so also get the most recent transcript overall
-    most_recent_transcript_overall = all_tagged_transcripts[0]
-    most_recent_transcript_overall = {
-        '_uuid': most_recent_transcript_overall['_uuid'],
-        '_actionId': most_recent_transcript_overall['_actionId'],
-    }
-    return most_recent_transcript_overall, most_recent_transcript_uuids_by_language
-
-
-def get_automatic_transcription(
-    action_results: dict,
-) -> tuple[str | None, str | None] | None:
-    googlets = action_results.get('googlets', {})
-    return googlets.get('languageCode', None), googlets.get('value', None)
-
-def get_automatic_translation(action_results:dict):
-    googletx = action_results.get('googletx', {})
-    return (
-        googletx.get('source', None),
-        googletx.get('languageCode', None),
-        googletx.get('value', None),
-    )
-
-
-def new_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
-    # ignore bad data
-    if (
-        'languageCode' not in old_transcript_revision_dict
-        or 'value' not in old_transcript_revision_dict
-    ):
-        return None
-    return {
-        '_dateCreated': old_transcript_revision_dict.get('dateModified', None),
-        'language': old_transcript_revision_dict['languageCode'],
-        'value': old_transcript_revision_dict['value'],
-        '_uuid': generate_uuid_for_form(),
-        '_dateAccepted': None,
-    }
-
-
-def separate_transcriptions(
-    transcription_dict: dict,
-    automatic_transcript_language: str = None,
-    automatic_transcript_value: str = None,
-) -> tuple[list, list]:
-    if not transcription_dict:
-        return [], []
-    automatic_transcriptions = []
-    manual_transcriptions = []
-    latest_revision = new_revision_from_old(transcription_dict)
-    if latest_revision:
-        if (
-            latest_revision['value'] == automatic_transcript_value
-            and latest_revision['language'] == automatic_transcript_language
-        ):
-            latest_revision['status'] = 'complete'
-            latest_revision['_dateAccepted'] = timezone.now().isoformat()
-            automatic_transcriptions.append(latest_revision)
-        else:
-            manual_transcriptions.append(latest_revision)
-
-    for revision in transcription_dict.get('revisions', []):
-        revision_formatted = new_revision_from_old(revision)
-        if revision_formatted is None:
-            continue
-        if (
-            revision_formatted['language'] == automatic_transcript_language
-            and revision['value'] == automatic_transcript_value
-        ):
-            revision_formatted['status'] = 'complete'
-            revision_formatted['_dateAccepted'] = timezone.now().isoformat()
-            automatic_transcriptions.append(revision_formatted)
-        else:
-            manual_transcriptions.append(revision_formatted)
-    return manual_transcriptions, automatic_transcriptions
-
-
-def separate_translations(
-    language,
-    translation_dict,
-    automatic_translation_source_language: str = None,
-    automatic_translation_language: str = None,
-    automatic_translation_value: str = None,
-    most_recent_transcript=None,
-    most_recent_transcript_by_language=None,
-) -> tuple[list, list]:
-    """
-    {'es': {'dateCreated': '2025-10-22T14:30:38Z',
-                                   'dateModified': '2025-10-22T17:10:23Z',
-                                   'languageCode': 'es',
-                                   'revisions': [{'dateModified': '2025-10-22T14:30:38Z',
-                                                  'languageCode': 'es',
-                                                  'value': 'Este es un '
-                                                           'audio que '
-                                                           'estoy '
-                                                           'intentando '
-                                                           'transcribir.'}],
-                                   'value': 'Este es un audio que '
-                                            'estoy intentando '
-                                            'transcribir pero yo lo edité'}}
-    """
-    automatic_translations = []
-    manual_translations = []
-    latest_revision = new_revision_from_old(translation_dict)
-    if latest_revision:
-        if (
-            latest_revision['value'] == automatic_translation_value
-            and language == automatic_translation_language
-        ):
-            latest_revision['status'] = 'complete'
-            latest_revision['_dateAccepted'] = timezone.now().isoformat()
-            source = most_recent_transcript_by_language.get(
-                automatic_translation_source_language, most_recent_transcript
-            )
-            latest_revision['_dependency'] = source
-            automatic_translations.append(latest_revision)
-        else:
-            latest_revision['_dependency'] = most_recent_transcript
-            manual_translations.append(latest_revision)
-
-    for revision in translation_dict.get('revisions', []):
-        revision_formatted = new_revision_from_old(revision)
-        if revision_formatted is None:
-            continue
-        if (
-            language == automatic_translation_language
-            and revision['value'] == automatic_translation_value
-        ):
-            revision_formatted['status'] = 'complete'
-            revision_formatted['_dateAccepted'] = timezone.now().isoformat()
-            source = most_recent_transcript_by_language.get(
-                automatic_translation_source_language, most_recent_transcript
-            )
-            revision_formatted['_dependency'] = source
-            automatic_translations.append(revision_formatted)
-        else:
-            revision_formatted['_dependency'] = most_recent_transcript
-            manual_translations.append(revision_formatted)
-    return manual_translations, automatic_translations
+def set_version(schema: dict) -> dict:
+    schema['_version'] = SCHEMA_VERSIONS[0]
+    return schema

From c638c6210f78bbee555f5ff1c7cf8fc16c40e78a Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Tue, 28 Oct 2025 15:29:12 -0400
Subject: [PATCH 10/18] fixup!: accidental change

---
 kobo/apps/subsequences/tests/test_models.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kobo/apps/subsequences/tests/test_models.py b/kobo/apps/subsequences/tests/test_models.py
index 81bf26f267..6e49e1c530 100644
--- a/kobo/apps/subsequences/tests/test_models.py
+++ b/kobo/apps/subsequences/tests/test_models.py
@@ -337,4 +337,3 @@ def test_revise_data_raise_error_wrong_question_name(self):
                     },
                 },
             )
-

From 73da17c94ac80e337351e67eb4058bdd5c5e26b8 Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Tue, 28 Oct 2025 15:30:28 -0400
Subject: [PATCH 11/18] fixup!: accidental change

---
 kobo/settings/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kobo/settings/base.py b/kobo/settings/base.py
index e64acfefe5..03c4d13887 100644
--- a/kobo/settings/base.py
+++ b/kobo/settings/base.py
@@ -1587,7 +1587,7 @@ def dj_stripe_request_callback_method():
 
 # Google Cloud Storage
 # Not fully supported as a generic storage backend
-GS_BUCKET_NAME = 'kobo-transcription-test' #env.str('GS_BUCKET_NAME', None)
+GS_BUCKET_NAME = env.str('GS_BUCKET_NAME', None)
 
 
 """ Django error logging configuration """

From 397d8839f1de553d339fb8fa469bb5ea767f6b9f Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Wed, 29 Oct 2025 08:31:55 -0400
Subject: [PATCH 12/18] fixup!: new uuid

---
 kobo/apps/subsequences/tests/test_versioning.py | 2 +-
 kobo/apps/subsequences/utils/versioning.py      | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
index 1ebce79aed..e45b0e00a9 100644
--- a/kobo/apps/subsequences/tests/test_versioning.py
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -206,7 +206,7 @@ def test_migrate_submission_extra_to_supplemental(self):
         }
 
         with patch(
-            'kobo.apps.subsequences.utils.versioning.generate_uuid_for_form',
+            'kobo.apps.subsequences.utils.versioning.uuid.uuid4',
             side_effect=['uuid1', 'uuid2', 'uuid3', 'uuid4'],
         ):
             with freeze_time(now):
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index fc45e5859d..95436a561b 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -1,6 +1,7 @@
+import uuid
+
 from django.utils import timezone
 
-from ...openrosa.libs.utils.model_tools import generate_uuid_for_form
 from ..constants import SCHEMA_VERSIONS
 
 
@@ -75,7 +76,7 @@ def _new_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
         '_dateCreated': old_transcript_revision_dict.get('dateModified', None),
         'language': old_transcript_revision_dict['languageCode'],
         'value': old_transcript_revision_dict['value'],
-        '_uuid': generate_uuid_for_form(),
+        '_uuid': uuid.uuid4(),
         '_dateAccepted': None,
     }
 

From 3fd8f593963657a5421aae05ffe95816c3d66db4 Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Wed, 29 Oct 2025 08:44:17 -0400
Subject: [PATCH 13/18] fixup!: format

---
 kobo/apps/subsequences/tests/test_versioning.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
index e45b0e00a9..52961f17b8 100644
--- a/kobo/apps/subsequences/tests/test_versioning.py
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -222,10 +222,9 @@ def test_migrate_submission_extra_to_supplemental(self):
                         {
                             '_dateCreated': a_year_and_a_day_ago,
                             '_dateAccepted': now.isoformat(),
-                            '_uuid':'uuid2',
+                            '_uuid': 'uuid2',
                             'language': 'en',
-                            'value': 'This is audio that I am trying to '
-                                          'transcribe.',
+                            'value': 'This is audio that I am trying to transcribe.',
                             'status': 'complete',
                         }
                     ]
@@ -258,7 +257,7 @@ def test_migrate_submission_extra_to_supplemental(self):
                         {
                             '_dateCreated': one_day_ago,
                             '_dateAccepted': None,
-                            '_uuid':'uuid1',
+                            '_uuid': 'uuid1',
                             'language': 'en',
                             'value': 'This is audio that I am trying to '
                                      'transcribe but i edited it.',
@@ -275,7 +274,7 @@ def test_migrate_submission_extra_to_supplemental(self):
                                 '_dateAccepted': None,
                                 '_dependency': {'_actionId': 'manual_transcription',
                                                 '_uuid': 'uuid1'},
-                                '_uuid':'uuid3',
+                                '_uuid': 'uuid3',
                                 'language': 'es',
                                 'value': 'Esto es un audio que estoy intentando'
                                 ' transcribir pero yo lo edité',
@@ -286,4 +285,3 @@ def test_migrate_submission_extra_to_supplemental(self):
             }
         }
         assert migrated == new_version
-

From 746766cfb250381da883f3c44851460e021aaa1e Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Wed, 29 Oct 2025 09:52:24 -0400
Subject: [PATCH 14/18] fixup!: stuff

---
 kobo/apps/subsequences/schemas.py          | 2 +-
 kobo/apps/subsequences/utils/versioning.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kobo/apps/subsequences/schemas.py b/kobo/apps/subsequences/schemas.py
index 333ff79c32..2ef53865c4 100644
--- a/kobo/apps/subsequences/schemas.py
+++ b/kobo/apps/subsequences/schemas.py
@@ -29,7 +29,7 @@
 
 
 def validate_submission_supplement(asset: 'kpi.models.Asset', supplement: dict):
-    jsonschema.validate(get_submission_supplement_schema(asset), supplement)
+    jsonschema.validate(supplement, get_submission_supplement_schema(asset))
 
 
 def get_submission_supplement_schema(asset: 'kpi.models.Asset') -> dict:
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index 95436a561b..b5af63036f 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -76,7 +76,7 @@ def _new_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
         '_dateCreated': old_transcript_revision_dict.get('dateModified', None),
         'language': old_transcript_revision_dict['languageCode'],
         'value': old_transcript_revision_dict['value'],
-        '_uuid': uuid.uuid4(),
+        '_uuid': str(uuid.uuid4()),
         '_dateAccepted': None,
     }
 

From 263227f4458f901a853eb22daa70295d2bc5d583 Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Fri, 31 Oct 2025 08:34:34 -0400
Subject: [PATCH 15/18] fixup!: changes from review

---
 kobo/apps/subsequences/utils/versioning.py | 247 ++++++++++-----------
 1 file changed, 123 insertions(+), 124 deletions(-)

diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index b5af63036f..b4f1b02656 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -5,6 +5,128 @@
 from ..constants import SCHEMA_VERSIONS
 
 
+def migrate_advanced_features(advanced_features: dict) -> dict | None:
+
+    if advanced_features.get('_version') == SCHEMA_VERSIONS[0]:
+        return
+
+    migrated_advanced_features = {'_version': SCHEMA_VERSIONS[0], '_actionConfigs': {}}
+
+    actionConfigs = migrated_advanced_features['_actionConfigs']
+    for key, value in advanced_features.items():
+        if (
+            key == 'transcript'
+            and value
+            and 'languages' in value
+            and value['languages']
+        ):
+            actionConfigs['manual_transcription'] = [
+                {'language': language} for language in value['languages']
+            ]
+
+        if (
+            key == 'translation'
+            and value
+            and 'languages' in value
+            and value['languages']
+        ):
+            actionConfigs['manual_translation'] = [
+                {'language': language} for language in value['languages']
+            ]
+
+        if key == 'qual':
+            raise NotImplementedError
+
+    return migrated_advanced_features
+
+
+def migrate_submission_supplementals(supplemental_data: dict) -> dict | None:
+    if supplemental_data.get('_version') == SCHEMA_VERSIONS[0]:
+        return
+    supplemental = {
+        '_version': SCHEMA_VERSIONS[0],
+    }
+    for question_xpath, action_results in supplemental_data.items():
+        question_results_by_action = {}
+
+        # get all the automatic result data
+        automatic_transcript = action_results.get('googlets', {})
+        automatic_transcript_language = automatic_transcript.get('languageCode')
+        automatic_transcript_value = automatic_transcript.get('value')
+        automatic_translation = action_results.get('googletx', {})
+        automatic_translation_language = automatic_translation.get('languageCode')
+        automatic_translation_value = automatic_translation.get('value')
+        automatic_translation_source_language = automatic_translation.get('source')
+
+        # divide transcripts into manual and automatic
+        manual_transcripts, automatic_transcripts = (
+            _separate_manual_and_automatic_versions(
+                action_results.get('transcript'),
+                automatic_transcript_language,
+                automatic_transcript_value,
+            )
+        )
+
+        if len(manual_transcripts) > 0:
+            question_results_by_action['manual_transcription'] = (
+                _version_list_to_summary_dict(manual_transcripts)
+            )
+        if len(automatic_transcripts) > 0:
+            question_results_by_action['automatic_google_transcription'] = (
+                _version_list_to_summary_dict(automatic_transcripts)
+            )
+
+        # process translations
+        translations_dict = action_results.get('translation', {})
+        automatic_translations = {}
+        manual_translations = {}
+
+        # divide translations into manual and automatic by language
+        for language_code, translations in translations_dict.items():
+            manual_translations_for_language, automatic_translations_for_language = (
+                _separate_manual_and_automatic_versions(
+                    translations,
+                    automatic_translation_language,
+                    automatic_translation_value,
+                    language_code,
+                )
+            )
+
+            all_tagged_transcripts = _combine_source_transcripts(
+                manual_transcripts, automatic_transcripts
+            )
+            if len(automatic_translations_for_language) > 0:
+                _add_translation_sources(
+                    automatic_translations_for_language,
+                    all_tagged_transcripts,
+                    automatic_translation_source_language,
+                )
+                automatic_translations[language_code] = _version_list_to_summary_dict(
+                    automatic_translations_for_language
+                )
+            if len(manual_translations_for_language) > 0:
+                _add_translation_sources(
+                    manual_translations_for_language, all_tagged_transcripts
+                )
+                manual_translations[language_code] = _version_list_to_summary_dict(
+                    manual_translations_for_language
+                )
+        if automatic_translations != {}:
+            question_results_by_action['automatic_google_translation'] = (
+                automatic_translations
+            )
+        if manual_translations != {}:
+            question_results_by_action['manual_translation'] = manual_translations
+        supplemental[question_xpath] = question_results_by_action
+
+    return supplemental
+
+
+def set_version(schema: dict) -> dict:
+    schema['_version'] = SCHEMA_VERSIONS[0]
+    return schema
+
+
 def _add_translation_sources(
     version_list, all_tagged_transcripts, automatic_translation_source_language=None
 ):
@@ -73,7 +195,7 @@ def _new_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
     ):
         return None
     return {
-        '_dateCreated': old_transcript_revision_dict.get('dateModified', None),
+        '_dateCreated': old_transcript_revision_dict.get('dateModified'),
         'language': old_transcript_revision_dict['languageCode'],
         'value': old_transcript_revision_dict['value'],
         '_uuid': str(uuid.uuid4()),
@@ -133,126 +255,3 @@ def _version_list_to_summary_dict(list_of_versions: list[dict]) -> dict:
         '_dateModified': list_of_versions[0]['_dateCreated'],
         '_versions': list_of_versions,
     }
-
-
-def migrate_advanced_features(advanced_features: dict) -> dict | None:
-
-    if advanced_features.get('_version') == SCHEMA_VERSIONS[0]:
-        return
-
-    migrated_advanced_features = {
-        '_version': SCHEMA_VERSIONS[0],
-        '_actionConfigs': {}
-    }
-
-    actionConfigs = migrated_advanced_features['_actionConfigs']
-    for key, value in advanced_features.items():
-        if (
-            key == 'transcript'
-            and value
-            and 'languages' in value
-            and value['languages']
-        ):
-            actionConfigs['manual_transcription'] = [
-                {'language': language} for language in value['languages']
-            ]
-
-        if (
-            key == 'translation'
-            and value
-            and 'languages' in value
-            and value['languages']
-        ):
-            actionConfigs['manual_translation'] = [
-                {'language': language} for language in value['languages']
-            ]
-
-        if key == 'qual':
-            raise NotImplementedError
-
-    return migrated_advanced_features
-
-
-def migrate_submission_supplementals(supplemental_data: dict) -> dict | None:
-    if supplemental_data.get('_version', None) == SCHEMA_VERSIONS[0]:
-        return
-    supplemental = {
-        '_version': SCHEMA_VERSIONS[0],
-    }
-    for question_xpath, action_results in supplemental_data.items():
-        question_results_by_action = {}
-
-        # get all the automatic result data
-        automatic_transcript = action_results.get('googlets', {})
-        automatic_transcript_language = automatic_transcript.get('languageCode', None)
-        automatic_transcript_value = automatic_transcript.get('value', None)
-        automatic_translation = action_results.get('googletx', {})
-        automatic_translation_language = automatic_translation.get('languageCode')
-        automatic_translation_value = automatic_translation.get('value')
-        automatic_translation_source_language = automatic_translation.get('source')
-
-        # divide transcripts into manual and automatic
-        manual_transcripts, automatic_transcripts = (
-            _separate_manual_and_automatic_versions(
-                action_results.get('transcript', None),
-                automatic_transcript_language,
-                automatic_transcript_value,
-            )
-        )
-
-        if len(manual_transcripts) > 0:
-            question_results_by_action['manual_transcription'] = (
-                _version_list_to_summary_dict(manual_transcripts)
-            )
-        if len(automatic_transcripts) > 0:
-            question_results_by_action['automatic_transcription'] = (
-                _version_list_to_summary_dict(automatic_transcripts)
-            )
-
-        # process translations
-        translations_dict = action_results.get('translation', {})
-        automatic_translations = {}
-        manual_translations = {}
-
-        # divide translations into manual and automatic by language
-        for language_code, translations in translations_dict.items():
-            manual_translations_for_language, automatic_translations_for_language = (
-                _separate_manual_and_automatic_versions(
-                    translations,
-                    automatic_translation_language,
-                    automatic_translation_value,
-                    language_code,
-                )
-            )
-
-            all_tagged_transcripts = _combine_source_transcripts(
-                manual_transcripts, automatic_transcripts
-            )
-            if len(automatic_translations_for_language) > 0:
-                _add_translation_sources(
-                    automatic_translations_for_language,
-                    all_tagged_transcripts,
-                    automatic_translation_source_language,
-                )
-                automatic_translations[language_code] = _version_list_to_summary_dict(
-                    automatic_translations_for_language
-                )
-            if len(manual_translations_for_language) > 0:
-                _add_translation_sources(
-                    manual_translations_for_language, all_tagged_transcripts
-                )
-                manual_translations[language_code] = _version_list_to_summary_dict(
-                    manual_translations_for_language
-                )
-        if automatic_translations != {}:
-            question_results_by_action['automatic_translation'] = automatic_translations
-        if manual_translations != {}:
-            question_results_by_action['manual_translation'] = manual_translations
-        supplemental[question_xpath] = question_results_by_action
-
-    return supplemental
-
-
-def set_version(schema: dict) -> dict:
-    schema['_version'] = SCHEMA_VERSIONS[0]
-    return schema

From 8569da140d777ee1cf71aac692862908b035786a Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Tue, 4 Nov 2025 08:45:50 -0500
Subject: [PATCH 16/18] fixup!: action names

---
 kobo/apps/subsequences/tests/test_versioning.py             | 6 +++---
 kobo/apps/subsequences/utils/versioning.py                  | 2 +-
 .../tests/test_submission_extras_api_post.py                | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
index 52961f17b8..a29a662b95 100644
--- a/kobo/apps/subsequences/tests/test_versioning.py
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -215,7 +215,7 @@ def test_migrate_submission_extra_to_supplemental(self):
         new_version = {
             '_version': '20250820',
             'Audio_question': {
-                'automatic_transcription': {
+                'automatic_google_transcription': {
                     '_dateCreated': a_year_and_a_day_ago,
                     '_dateModified': a_year_and_a_day_ago,
                     '_versions': [
@@ -229,7 +229,7 @@ def test_migrate_submission_extra_to_supplemental(self):
                         }
                     ]
                 },
-                'automatic_translation': {
+                'automatic_google_translation': {
                     'es': {
                         '_dateCreated': one_year_ago,
                         '_dateModified': one_year_ago,
@@ -238,7 +238,7 @@ def test_migrate_submission_extra_to_supplemental(self):
                                 '_dateCreated': one_year_ago,
                                 '_dateAccepted': now.isoformat(),
                                 '_dependency': {
-                                    '_actionId': 'automatic_transcription',
+                                    '_actionId': 'automatic_google_transcription',
                                     '_uuid': 'uuid2',
                                 },
                                 '_uuid': 'uuid4',
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index b4f1b02656..62a9e8ba00 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -151,7 +151,7 @@ def _combine_source_transcripts(manual_transcripts, automatic_transcripts):
         for transcript in manual_transcripts
     ]
     tagged_automatic_transcripts = [
-        {**transcript, '_actionId': 'automatic_transcription'}
+        {**transcript, '_actionId': 'automatic_google_transcription'}
         for transcript in automatic_transcripts
     ]
 
diff --git a/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py b/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py
index 73a86bd91e..97fee2ff9f 100644
--- a/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py
+++ b/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py
@@ -332,6 +332,7 @@ def setUp(self):
                 'values': ['q1'],
             }
         })
+        breakpoint()
         self.act1 = next(self.asset.get_advanced_feature_instances())
 
     def test_simplest(self):

From e2a43ed93a92c9db85df71766630c19f877186c5 Mon Sep 17 00:00:00 2001
From: Rebecca Graber <becca.graber@kobotoolbox.org>
Date: Wed, 5 Nov 2025 15:18:20 -0500
Subject: [PATCH 17/18] fixup: accidental change

---
 .../subsequences__old/tests/test_submission_extras_api_post.py   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py b/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py
index 97fee2ff9f..73a86bd91e 100644
--- a/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py
+++ b/kobo/apps/subsequences__old/tests/test_submission_extras_api_post.py
@@ -332,7 +332,6 @@ def setUp(self):
                 'values': ['q1'],
             }
         })
-        breakpoint()
         self.act1 = next(self.asset.get_advanced_feature_instances())
 
     def test_simplest(self):

From 65881505c88ea86f38ed24ba8683071ad2a35b4e Mon Sep 17 00:00:00 2001
From: rgraber <becca.graber@kobotoolbox.org>
Date: Thu, 13 Nov 2025 08:19:17 -0500
Subject: [PATCH 18/18] fixup!: auto accept manual

---
 kobo/apps/subsequences/tests/test_versioning.py | 6 +++---
 kobo/apps/subsequences/utils/versioning.py      | 7 ++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/kobo/apps/subsequences/tests/test_versioning.py b/kobo/apps/subsequences/tests/test_versioning.py
index a29a662b95..b84efc1824 100644
--- a/kobo/apps/subsequences/tests/test_versioning.py
+++ b/kobo/apps/subsequences/tests/test_versioning.py
@@ -49,7 +49,7 @@ def test_new_revision_from_old(self):
         assert result['language'] == old['languageCode']
         assert result['_dateCreated'] == old['dateModified']
         assert result['_uuid'] is not None
-        assert result['_dateAccepted'] is None
+        assert result['_dateAccepted'] == now.isoformat()
 
     def test_new_transcript_revision_from_old_returns_none_for_bad_data(self):
         old = {'badly': 'formatted'}
@@ -256,7 +256,7 @@ def test_migrate_submission_extra_to_supplemental(self):
                     '_versions': [
                         {
                             '_dateCreated': one_day_ago,
-                            '_dateAccepted': None,
+                            '_dateAccepted': now.isoformat(),
                             '_uuid': 'uuid1',
                             'language': 'en',
                             'value': 'This is audio that I am trying to '
@@ -271,7 +271,7 @@ def test_migrate_submission_extra_to_supplemental(self):
                         '_versions': [
                             {
                                 '_dateCreated': now.isoformat(),
-                                '_dateAccepted': None,
+                                '_dateAccepted': now.isoformat(),
                                 '_dependency': {'_actionId': 'manual_transcription',
                                                 '_uuid': 'uuid1'},
                                 '_uuid': 'uuid3',
diff --git a/kobo/apps/subsequences/utils/versioning.py b/kobo/apps/subsequences/utils/versioning.py
index 62a9e8ba00..9d29d38b9c 100644
--- a/kobo/apps/subsequences/utils/versioning.py
+++ b/kobo/apps/subsequences/utils/versioning.py
@@ -188,6 +188,7 @@ def _determine_source_transcript(
 
 
 def _new_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
+    now = timezone.now().isoformat()
     # ignore bad data
     if (
         'languageCode' not in old_transcript_revision_dict
@@ -199,7 +200,8 @@ def _new_revision_from_old(old_transcript_revision_dict: dict) -> dict | None:
         'language': old_transcript_revision_dict['languageCode'],
         'value': old_transcript_revision_dict['value'],
         '_uuid': str(uuid.uuid4()),
-        '_dateAccepted': None,
+        # all preexisting translations/transcripts are considered accepted
+        '_dateAccepted': now,
     }
 
 
@@ -236,9 +238,8 @@ def _separate_manual_and_automatic_versions(
             automatic_versions if matches_automatic_result else manual_versions
         )
         if matches_automatic_result:
-            # automatic versions also need a status and a date accepted
+            # automatic versions also need a status
             revision_formatted['status'] = 'complete'
-            revision_formatted['_dateAccepted'] = timezone.now().isoformat()
         correct_version_list_to_append.append(revision_formatted)
 
     # they should be sorted anyway, but just make sure in case the input values