Skip to content

Commit 6d7a3cb

Browse files
fixes(unique fields) dotCMS#32765 : [Unique Fields] Upgrade with inconsistencies leave the system unusable (dotCMS#33018)
### Proposed Changes * Includes a new step in the Database Unique Field Validation process where duplicate records are fixed and reported instead of having dotCMS start up with an invalid `unique_fields` table. * Records with the same hash belong to Contentlets that have the same unique value with different casing. This is NOT correct. So, what the code does now is: * Remove the primary key constraint from the DB table, and load the unique value data as usual. * Leave one "as is" and add a "prefix" to the other ones. * Re-generate the hash in order to have a different value. * Bring the primary key back and let dotCMS start as usual. * Generate a log file with absolutely all records with conflicting data. This way, users can inspect it and manually fix the corrupted Contentlets. * Such a log file can be displayed and downloaded from the `Logs` portlet in the back-end. This PR fixes: dotCMS#32765
1 parent cd4f0c3 commit 6d7a3cb

File tree

8 files changed

+861
-231
lines changed

8 files changed

+861
-231
lines changed

dotCMS/src/main/java/com/dotcms/contenttype/business/uniquefields/UniqueFieldsValidationInitializer.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
import com.dotcms.config.DotInitializer;
44
import com.dotcms.content.elasticsearch.business.ESContentletAPIImpl;
55
import com.dotcms.contenttype.business.uniquefields.extratable.UniqueFieldDataBaseUtil;
6+
import com.dotcms.exception.ExceptionUtil;
67
import com.dotmarketing.common.db.DotDatabaseMetaData;
78
import com.dotmarketing.db.DbConnectionFactory;
89
import com.dotmarketing.exception.DotDataException;
10+
import com.dotmarketing.exception.DotRuntimeException;
911
import com.dotmarketing.util.Logger;
12+
import io.vavr.control.Try;
1013

1114
import javax.enterprise.context.Dependent;
1215
import javax.inject.Inject;
@@ -21,7 +24,8 @@
2124
* <li>If it does not exist and the Database validation is enabled, then it creates it and
2225
* populates it.</li>
2326
* <li>If it exists and the Database validation is enabled, do nothing.</li>
24-
* <li>If it does not exist and the Database validation is disabled, do nothing.</li>
27+
* <li>If it does not exist and the Database validation is disabled, do nothing.</li>
28+
* <li>If any error occurred, drop the table nd fail to start.</li>
2529
* </ul>
2630
*
2731
* @author Freddy Rodriguez
@@ -43,7 +47,6 @@ public UniqueFieldsValidationInitializer(final UniqueFieldDataBaseUtil uniqueFie
4347
public void init() {
4448
final boolean featureFlagDbUniqueFieldValidation = ESContentletAPIImpl.getFeatureFlagDbUniqueFieldValidation();
4549
final boolean uniqueFieldsTableExists = uniqueFieldsTableExists();
46-
4750
try {
4851
if (featureFlagDbUniqueFieldValidation && !uniqueFieldsTableExists) {
4952
Logger.info(this, "Creating and populating the Unique Fields table");
@@ -53,7 +56,16 @@ public void init() {
5356
this.uniqueFieldDataBaseUtil.dropUniqueFieldsValidationTable();
5457
}
5558
} catch (final DotDataException e) {
56-
Logger.error(UniqueFieldsValidationInitializer.class, e);
59+
try {
60+
// Drop the table so that the process can run again the next restart
61+
this.uniqueFieldDataBaseUtil.dropUniqueFieldsValidationTable();
62+
} catch (final DotDataException ex) {
63+
// Failed to drop the unique_fields table, or doesn't exist yet
64+
};
65+
final String errorMsg = String.format("Failed to create and populate the Unique Fields table: " +
66+
"%s", ExceptionUtil.getErrorMessage(e));
67+
Logger.warnAndDebug(this.getClass(), errorMsg, e);
68+
throw new DotRuntimeException(errorMsg, e);
5769
}
5870
}
5971

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
package com.dotcms.contenttype.business.uniquefields.extratable;
2+
3+
import static com.dotcms.content.elasticsearch.business.ESContentletAPIImpl.UNIQUE_PER_SITE_FIELD_VARIABLE_NAME;
4+
import static com.dotcms.contenttype.business.uniquefields.extratable.UniqueFieldCriteria.CONTENTLET_IDS_ATTR;
5+
import static com.dotcms.contenttype.business.uniquefields.extratable.UniqueFieldCriteria.CONTENT_TYPE_ID_ATTR;
6+
import static com.dotcms.contenttype.business.uniquefields.extratable.UniqueFieldCriteria.FIELD_VALUE_ATTR;
7+
import static com.dotcms.contenttype.business.uniquefields.extratable.UniqueFieldCriteria.FIELD_VARIABLE_NAME_ATTR;
8+
import static com.dotcms.contenttype.business.uniquefields.extratable.UniqueFieldCriteria.LANGUAGE_ID_ATTR;
9+
import static com.dotcms.contenttype.business.uniquefields.extratable.UniqueFieldCriteria.LIVE_ATTR;
10+
import static com.dotcms.contenttype.business.uniquefields.extratable.UniqueFieldCriteria.SITE_ID_ATTR;
11+
import static com.dotcms.contenttype.business.uniquefields.extratable.UniqueFieldCriteria.UNIQUE_PER_SITE_ATTR;
12+
import static com.dotcms.contenttype.business.uniquefields.extratable.UniqueFieldCriteria.VARIANT_ATTR;
13+
14+
/**
15+
* This is a simple utility class that exposes the different SQL queries used to perform CRUD
16+
* operations on the {@code unique_fields} table.
17+
*
18+
* @author Jose Castro
19+
* @since Aug 6th, 2025
20+
*/
21+
public final class SqlQueries {
22+
23+
private SqlQueries() {}
24+
25+
public static final String INSERT_SQL = "INSERT INTO unique_fields (unique_key_val, supporting_values) " +
26+
"VALUES (encode(sha256(convert_to(?::text, 'UTF8')), 'hex'), ?)";
27+
28+
public static final String RECALCULATE_UNIQUE_KEY_VAL = "UPDATE unique_fields\n" +
29+
"SET unique_key_val = encode(sha256(" +
30+
"convert_to(\n" +
31+
"CONCAT(" +
32+
" jsonb_extract_path_text(supporting_values, '" + CONTENT_TYPE_ID_ATTR + "')::text || \n" +
33+
" jsonb_extract_path_text(supporting_values, '" + FIELD_VARIABLE_NAME_ATTR + "')::text || \n" +
34+
" jsonb_extract_path_text(supporting_values, '" + LANGUAGE_ID_ATTR + "')::text || \n" +
35+
" jsonb_extract_path_text(supporting_values, '" + FIELD_VALUE_ATTR + "')::text\n" +
36+
" %s \n" +
37+
"),'UTF8'\n" +
38+
")\n" +
39+
"), 'hex'), \n" +
40+
"supporting_values = jsonb_set(supporting_values, '{" + UNIQUE_PER_SITE_ATTR + "}', '%s') \n" +
41+
"WHERE supporting_values->>'" + CONTENT_TYPE_ID_ATTR + "' = ?\n" +
42+
"AND supporting_values->>'" + FIELD_VARIABLE_NAME_ATTR + "' = ?";
43+
44+
public static final String UPDATE_CONTENT_LIST ="UPDATE unique_fields " +
45+
"SET supporting_values = jsonb_set(supporting_values, '{" + CONTENTLET_IDS_ATTR + "}', ?::jsonb) " +
46+
"WHERE unique_key_val = encode(sha256(convert_to(?::text, 'UTF8')), 'hex')";
47+
48+
public static final String UPDATE_CONTENT_LIST_WITH_HASH ="UPDATE unique_fields " +
49+
"SET supporting_values = jsonb_set(supporting_values, '{" + CONTENTLET_IDS_ATTR + "}', ?::jsonb) " +
50+
"WHERE unique_key_val = ?";
51+
52+
public static final String GET_UNIQUE_FIELDS_BY_CONTENTLET = "SELECT * FROM unique_fields " +
53+
"WHERE supporting_values->'" + CONTENTLET_IDS_ATTR + "' @> ?::jsonb " +
54+
"AND supporting_values->>'" + VARIANT_ATTR + "' = ? " +
55+
"AND (supporting_values->>'"+ LANGUAGE_ID_ATTR + "')::BIGINT = ? " +
56+
"AND supporting_values->>'" + FIELD_VARIABLE_NAME_ATTR + "' = ?";
57+
58+
public static final String DELETE_UNIQUE_FIELDS_BY_CONTENTLET = "DELETE FROM unique_fields " +
59+
"WHERE supporting_values->'" + CONTENTLET_IDS_ATTR + "' @> ?::jsonb AND supporting_values->>'" + VARIANT_ATTR + "' = ? " +
60+
"AND (supporting_values->>'"+ LANGUAGE_ID_ATTR + "')::BIGINT = ? " +
61+
"AND (supporting_values->>'" + LIVE_ATTR + "')::BOOLEAN = ?";
62+
63+
public static final String SET_LIVE_BY_CONTENTLET = "UPDATE unique_fields " +
64+
"SET supporting_values = jsonb_set(supporting_values, '{" + LIVE_ATTR + "}', ?::jsonb) " +
65+
"WHERE supporting_values->'" + CONTENTLET_IDS_ATTR + "' @> ?::jsonb " +
66+
"AND supporting_values->>'" + VARIANT_ATTR + "' = ? " +
67+
"AND (supporting_values->>'"+ LANGUAGE_ID_ATTR + "')::BIGINT = ? " +
68+
"AND (supporting_values->>'" + LIVE_ATTR + "')::BOOLEAN = false";
69+
70+
public static final String GET_UNIQUE_FIELDS_BY_CONTENTLET_AND_LANGUAGE = "SELECT * FROM unique_fields " +
71+
"WHERE supporting_values->'" + CONTENTLET_IDS_ATTR + "' @> ?::jsonb AND (supporting_values->>'" + LANGUAGE_ID_ATTR +"')::BIGINT = ?";
72+
73+
public static final String GET_UNIQUE_FIELDS_BY_CONTENTLET_AND_VARIANT= "SELECT * FROM unique_fields " +
74+
"WHERE supporting_values->'" + CONTENTLET_IDS_ATTR + "' @> ?::jsonb AND supporting_values->>'" + VARIANT_ATTR + "' = ?";
75+
76+
public static final String DELETE_UNIQUE_FIELDS = "DELETE FROM unique_fields WHERE unique_key_val = ?";
77+
78+
public static final String GET_UNIQUE_FIELDS_BY_UNIQUE_FIELD_CRITERIA = "SELECT * FROM unique_fields " +
79+
"WHERE unique_key_val = encode(sha256(convert_to(?::text, 'UTF8')), 'hex')";
80+
81+
public static final String DELETE_UNIQUE_FIELDS_BY_FIELD = "DELETE FROM unique_fields " +
82+
"WHERE supporting_values->>'" + FIELD_VARIABLE_NAME_ATTR + "' = ?";
83+
84+
public static final String DELETE_UNIQUE_FIELDS_BY_CONTENT_TYPE = "DELETE FROM unique_fields " +
85+
"WHERE supporting_values->>'" + CONTENT_TYPE_ID_ATTR + "' = ?";
86+
87+
public static final String POPULATE_UNIQUE_FIELDS_VALUES_QUERY = "INSERT INTO unique_fields (unique_key_val, supporting_values) " +
88+
"SELECT encode(" +
89+
" sha256(" +
90+
" convert_to(" +
91+
" CONCAT(" +
92+
" content_type_id::text," +
93+
" field_var_name::text," +
94+
" language_id::text," +
95+
" LOWER(field_value)::text," +
96+
" CASE WHEN uniquePerSite = 'true' THEN COALESCE(host_id::text, '') ELSE '' END" +
97+
" )," +
98+
" 'UTF8'" +
99+
" )" +
100+
" )," +
101+
" 'hex'" +
102+
" ) AS unique_key_val, " +
103+
" json_build_object('" + CONTENT_TYPE_ID_ATTR + "', content_type_id, " +
104+
"'" + FIELD_VARIABLE_NAME_ATTR + "', field_var_name, " +
105+
"'" + LANGUAGE_ID_ATTR + "', language_id, " +
106+
"'" + FIELD_VALUE_ATTR +"', LOWER(field_value), " +
107+
"'" + SITE_ID_ATTR + "', host_id, " +
108+
"'" + VARIANT_ATTR + "', variant_id, " +
109+
"'" + UNIQUE_PER_SITE_ATTR + "', " + "uniquePerSite, " +
110+
"'" + LIVE_ATTR + "', live, " +
111+
"'" + CONTENTLET_IDS_ATTR + "', contentlet_identifier) AS supporting_values " +
112+
"FROM (" +
113+
" SELECT structure.inode AS content_type_id," +
114+
" field.velocity_var_name AS field_var_name," +
115+
" contentlet.language_id AS language_id," +
116+
" (CASE WHEN field_variable.variable_value = 'true' THEN identifier.host_inode ELSE '' END) AS host_id," +
117+
" jsonb_extract_path_text(contentlet_as_json -> 'fields', field.velocity_var_name)::jsonb ->>'value' AS field_value," +
118+
" ARRAY_AGG(DISTINCT contentlet.identifier) AS contentlet_identifier," +
119+
" (CASE WHEN COUNT(DISTINCT contentlet_version_info.variant_id) > 1 THEN 'DEFAULT' ELSE MAX(contentlet_version_info.variant_id) END) AS variant_id, " +
120+
" ((CASE WHEN COUNT(*) > 1 AND COUNT(DISTINCT contentlet_version_info.live_inode = contentlet.inode) > 1 THEN 0 " +
121+
" ELSE MAX((CASE WHEN contentlet_version_info.live_inode = contentlet.inode THEN 1 ELSE 0 END)::int) " +
122+
" END) = 1) AS live," +
123+
" (MAX(CASE WHEN field_variable.variable_value = 'true' THEN 1 ELSE 0 END)) = 1 AS uniquePerSite" +
124+
" FROM contentlet" +
125+
" INNER JOIN structure ON structure.inode = contentlet.structure_inode" +
126+
" INNER JOIN field ON structure.inode = field.structure_inode" +
127+
" INNER JOIN identifier ON contentlet.identifier = identifier.id" +
128+
" INNER JOIN contentlet_version_info ON contentlet_version_info.live_inode = contentlet.inode OR" +
129+
" contentlet_version_info.working_inode = contentlet.inode" +
130+
" LEFT JOIN field_variable ON field_variable.field_id = field.inode AND field_variable.variable_key = '" + UNIQUE_PER_SITE_FIELD_VARIABLE_NAME + "'" +
131+
" WHERE jsonb_extract_path_text(contentlet_as_json -> 'fields', field.velocity_var_name) IS NOT NULL" +
132+
" AND field.unique_ = true" +
133+
" GROUP BY structure.inode," +
134+
" field.velocity_var_name," +
135+
" contentlet.language_id," +
136+
" (CASE WHEN field_variable.variable_value = 'true' THEN identifier.host_inode ELSE '' END)," +
137+
" jsonb_extract_path_text(contentlet_as_json -> 'fields', field.velocity_var_name)::jsonb ->>'value') as data_to_populate";
138+
139+
/**
140+
* Returns the number of records that share the same hash, a.k.a. unique key value. Such records
141+
* must be fixed for dotCMS to start up correctly.
142+
*/
143+
public static final String GET_RECORDS_WITH_SAME_HASH = "SELECT unique_key_val, COUNT(unique_key_val) " +
144+
"FROM unique_fields u " +
145+
"GROUP BY unique_key_val " +
146+
"HAVING COUNT(unique_key_val) > 1;";
147+
148+
/**
149+
* Returns all unique fields with the same hash, a.k.a. the same unique key value.
150+
*/
151+
public static final String GET_UNIQUE_FIELDS_BY_HASH = "SELECT * FROM unique_fields " +
152+
"WHERE unique_key_val = ?";
153+
154+
/**
155+
* Updates the unique value of the conflicting entries in the {@code unique_fields} table. That
156+
* is, entries that belong to separate Contentlets that have the exact same unique value. The
157+
* solution is to set a prefix to it, and re-generate the hash.
158+
*/
159+
public static final String FIX_DUPLICATE_ENTRY = "UPDATE unique_fields " +
160+
"SET unique_key_val = encode(sha256(convert_to(?::text, 'UTF8')), 'hex'), " +
161+
"supporting_values = ? " +
162+
"WHERE unique_key_val = ? AND supporting_values->'" + CONTENTLET_IDS_ATTR + "' @> ?::jsonb ";
163+
164+
}
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
package com.dotcms.contenttype.business.uniquefields.extratable;
2+
3+
import com.fasterxml.jackson.annotation.JsonGetter;
4+
5+
import java.util.ArrayList;
6+
import java.util.List;
7+
import java.util.Map;
8+
9+
/**
10+
* Represents a unique field conflict between Contentlets that share the same unique value. Such
11+
* conflicts are meant to be reported to user in the back-end so that they can be manually fix them.
12+
*
13+
* @author Jose Castro
14+
* @since Aug 6th, 2025
15+
*/
16+
public class UniqueFieldConflict {
17+
18+
private final String fieldName;
19+
private final String contentTypeId;
20+
private final String originalValue;
21+
private final List<Map<String, Object>> conflictingData;
22+
23+
private UniqueFieldConflict(final Builder builder) {
24+
this.fieldName = builder.fieldName;
25+
this.contentTypeId = builder.contentTypeId;
26+
this.originalValue = builder.originalValue;
27+
this.conflictingData = builder.conflictingData;
28+
}
29+
30+
/**
31+
* Returns the name of the field whose value is unique and conflicts with the value of one or
32+
* more Contentlets.
33+
*
34+
* @return The Velocity Variable Name of the field.
35+
*/
36+
@JsonGetter("fieldName")
37+
public String fieldName() {
38+
return this.fieldName;
39+
}
40+
41+
/**
42+
* Returns the ID of the Content Type that contains the field with the conflicting unique
43+
* value.
44+
*
45+
* @return The ID of the Content Type.
46+
*/
47+
@JsonGetter("contentTypeId")
48+
public String contentTypeId() {
49+
return this.contentTypeId;
50+
}
51+
52+
/**
53+
* Returns the value of the field that is unique and conflicts with the value of one or more
54+
* Contentlets.
55+
*
56+
* @return The conflicting unique value.
57+
*/
58+
@JsonGetter("originalValue")
59+
public String originalValue() {
60+
return this.originalValue;
61+
}
62+
63+
/**
64+
* Returns the list of conflicting Contentlets that share the same unique value. It provides the
65+
* Contentlet ID and its language ID.
66+
*
67+
* @return The list of conflicting Contentlets.
68+
*/
69+
@JsonGetter("conflictingData")
70+
public List<Map<String, Object>> conflictingData() {
71+
return this.conflictingData;
72+
}
73+
74+
@Override
75+
public String toString() {
76+
return "UniqueFieldConflict{" +
77+
"fieldName='" + fieldName + '\'' +
78+
", contentTypeId='" + contentTypeId + '\'' +
79+
", originalValue='" + originalValue + '\'' +
80+
", conflictingData=" + conflictingData +
81+
'}';
82+
}
83+
84+
/**
85+
* Allows you to create an instance of the {@link UniqueFieldConflict} class.
86+
*/
87+
public static class Builder {
88+
89+
private String fieldName;
90+
private String contentTypeId;
91+
private String originalValue;
92+
private List<Map<String, Object>> conflictingData;
93+
94+
public Builder fieldName(final String fieldName) {
95+
this.fieldName = fieldName;
96+
return this;
97+
}
98+
99+
public String fieldName() {
100+
return this.fieldName;
101+
}
102+
103+
public Builder contentTypeId(final String contentTypeId) {
104+
this.contentTypeId = contentTypeId;
105+
return this;
106+
}
107+
108+
public Builder originalValue(final String originalValue) {
109+
this.originalValue = originalValue;
110+
return this;
111+
}
112+
113+
public Builder conflictingData(final Map<String, Object> conflictingData) {
114+
if (null == this.conflictingData) {
115+
this.conflictingData = new ArrayList<>();
116+
}
117+
this.conflictingData.add(conflictingData);
118+
return this;
119+
}
120+
121+
public Builder conflictingData(final List<Map<String, Object>> conflictingData) {
122+
this.conflictingData = conflictingData;
123+
return this;
124+
}
125+
126+
public UniqueFieldConflict build() {
127+
return new UniqueFieldConflict(this);
128+
}
129+
130+
}
131+
132+
}

0 commit comments

Comments
 (0)