Skip to content

Commit 4a5e604

Browse files
authored
Add beam job to delete voiceover models realted to non-curated exploration (oppia#22362)
* Add beam job to delete voiceover models * Adds test case into the shard * Fixes backend coverage * updates copyright year * Fixes backend tests * Fixes lint issues
1 parent f7a894d commit 4a5e604

File tree

4 files changed

+674
-1
lines changed

4 files changed

+674
-1
lines changed
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
# coding: utf-8
2+
#
3+
# Copyright 2025 The Oppia Authors. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS-IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Jobs used for deleting instances of EntityVoiceoversModel and
18+
ExplorationVoiceArtistsLinkModel.
19+
"""
20+
21+
from __future__ import annotations
22+
23+
import logging
24+
25+
from core.domain import opportunity_services
26+
from core.jobs import base_jobs
27+
from core.jobs.io import ndb_io
28+
from core.jobs.types import job_run_result
29+
from core.platform import models
30+
31+
import apache_beam as beam
32+
from typing import Optional
33+
34+
MYPY = False
35+
if MYPY: # pragma: no cover
36+
from mypy_imports import datastore_services
37+
from mypy_imports import voiceover_models
38+
39+
datastore_services = models.Registry.import_datastore_services()
40+
41+
(voiceover_models,) = models.Registry.import_models([
42+
models.Names.VOICEOVER])
43+
44+
45+
class DeleteNonCuratedInstanceofExplorationVoiceArtistsLinkModelJob(
46+
base_jobs.JobBase):
47+
"""Jobs deletes the instances of ExplorationVoiceArtistsLinkModel's which
48+
corresponds to non-curated explorations.
49+
"""
50+
51+
DATASTORE_UPDATES_ALLOWED = True
52+
53+
@staticmethod
54+
def is_exploration_curated(exploration_id: str) -> Optional[bool]:
55+
"""Checks whether the provided exploration ID is curated or not.
56+
57+
Args:
58+
exploration_id: str. The given exploration ID.
59+
60+
Returns:
61+
bool. A boolean value indicating if the exploration is curated
62+
or not.
63+
"""
64+
try:
65+
with datastore_services.get_ndb_context():
66+
return (
67+
opportunity_services.
68+
is_exploration_available_for_contribution(exploration_id)
69+
)
70+
except Exception:
71+
logging.exception(
72+
'Not able to check whether exploration is curated or not'
73+
' for exploration ID %s.' % exploration_id)
74+
return False
75+
76+
def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
77+
"""Returns a PCollection of results for the exploration IDs which are
78+
deleted from the exploration voice artist link model.
79+
80+
Returns:
81+
PCollection. A PCollection of results for the exploration IDs which
82+
are deleted from the exploration voice artist link model.
83+
"""
84+
models_to_be_deleted = (
85+
self.pipeline
86+
| 'Get all exploration voice artist link models' >>
87+
ndb_io.GetModels(
88+
voiceover_models.ExplorationVoiceArtistsLinkModel.get_all()
89+
)
90+
| 'Filter non-curated model instances' >> beam.Filter(
91+
lambda model: not self.is_exploration_curated(model.id))
92+
)
93+
94+
deleted_models_report_pcollection = (
95+
models_to_be_deleted
96+
| 'Report deleted model IDs' >> beam.Map(
97+
lambda model: job_run_result.JobRunResult.as_stdout(
98+
'Deleted ExplorationVoiceArtistsLinkModel: %s' % model.id)
99+
)
100+
)
101+
102+
if self.DATASTORE_UPDATES_ALLOWED:
103+
unused_delete_result = (
104+
models_to_be_deleted
105+
| beam.Map(lambda model: model.key)
106+
| 'Deleting models' >> ndb_io.DeleteModels()
107+
)
108+
109+
return deleted_models_report_pcollection
110+
111+
112+
class AuditExplorationVoiceArtistsLinkModelJob(
113+
DeleteNonCuratedInstanceofExplorationVoiceArtistsLinkModelJob):
114+
""" Jobs used for auditing the instances of ExplorationVoiceArtistsLinkModel
115+
which corresponds to non-curated explorations.
116+
"""
117+
118+
DATASTORE_UPDATES_ALLOWED = False
119+
120+
121+
class DeleteNonCuratedInstanceofEntityVoiceoversModelJob(
122+
base_jobs.JobBase):
123+
"""Jobs deletes the instances of EntityVoiceoversModel's which
124+
corresponds to non-curated explorations.
125+
"""
126+
127+
DATASTORE_UPDATES_ALLOWED = True
128+
129+
@staticmethod
130+
def is_exploration_curated(exploration_id: str) -> Optional[bool]:
131+
"""Checks whether the provided exploration ID is curated or not.
132+
133+
Args:
134+
exploration_id: str. The given exploration ID.
135+
136+
Returns:
137+
bool. A boolean value indicating if the exploration is curated
138+
or not.
139+
"""
140+
try:
141+
with datastore_services.get_ndb_context():
142+
return (
143+
opportunity_services.
144+
is_exploration_available_for_contribution(exploration_id)
145+
)
146+
except Exception:
147+
logging.exception(
148+
'Not able to check whether exploration is curated or not'
149+
' for exploration ID %s.' % exploration_id)
150+
return False
151+
152+
def run(self) -> beam.PCollection[job_run_result.JobRunResult]:
153+
"""Returns a PCollection of results for the exploration IDs which are
154+
deleted from the entity voiceover models.
155+
156+
Returns:
157+
PCollection. A PCollection of results for the exploration IDs which
158+
are deleted from the entity voiceover models.
159+
"""
160+
models_to_be_deleted = (
161+
self.pipeline
162+
| 'Get all entity voiceover models' >>
163+
ndb_io.GetModels(
164+
voiceover_models.EntityVoiceoversModel.get_all()
165+
)
166+
| 'Filter non-curated model instances' >> beam.Filter(
167+
lambda model: not self.is_exploration_curated(model.entity_id))
168+
)
169+
170+
deleted_models_report_pcollection = (
171+
models_to_be_deleted
172+
| 'Report deleted model IDs' >> beam.Map(
173+
lambda model: job_run_result.JobRunResult.as_stdout(
174+
'Deleted EntityVoiceoversModel: %s' % model.id)
175+
)
176+
)
177+
178+
if self.DATASTORE_UPDATES_ALLOWED:
179+
unused_delete_result = (
180+
models_to_be_deleted
181+
| beam.Map(lambda model: model.key)
182+
| 'Deleting models' >> ndb_io.DeleteModels()
183+
)
184+
185+
return deleted_models_report_pcollection
186+
187+
188+
class AuditEntityVoiceoversModelJob(
189+
DeleteNonCuratedInstanceofEntityVoiceoversModelJob):
190+
""" Jobs used for auditing the instances of EntityVoiceoversModel
191+
which corresponds to non-curated explorations.
192+
"""
193+
194+
DATASTORE_UPDATES_ALLOWED = False

0 commit comments

Comments
 (0)