Skip to content

Commit 4c532d2

Browse files
authored
CNDB-12503 remove column name restrictions in SAI (#1539)
Trying to create an SAI or other index on a column, which name is either long or has non-alphanumeric characters, fails, while the table was successfully created with such name. This happens if an index name is not provided. The reason for this limitation is that the column name is used for generating the default index name, which is used in the file names. Fixes riptano/cndb#12503, fixes riptano/cndb#12609 This PR removes the check for column names during an index creation. Thus it allows to create indexes on existing columns with any names including qualified names and long names. 1. The check was preventing qualified column names with characters, which cannot be used in file names. This was never an issue as the index name construction from table and column names already removes any non-alphanumeric or non-underscore characters. 2. The check was preventing long column names. This limitation is removed by - Adding calculation of maximum amount characters, which can be used in index name, so it will allow to construct file names without exceeding the limit of 255 chars (255 is added as a constant). - Then the index name is truncated. 3. The uniqueness of index names is already fixed by existing implementation, which adds counter prefix when needed. This change to generated index name does not affect existing indexes as the existing name from `IndexMetadata` is always serialized to JSON, thus avoiding to re-generating the name. This fix required to fix CQL tester, since it was assuming that the restriction is in place. So column name regex pattern matcher is fixed in `createIndex` to allow non-restricted column names. There might still be possible corner cases on some column names used for generating index names, which will not work with `createIndex`. In such case `execute` should be used. It also includes several fixes in affected files: - Improve the name and usage of constants. - Fix warnings with minor improvements in affected files.
1 parent c755234 commit 4c532d2

File tree

12 files changed

+275
-46
lines changed

12 files changed

+275
-46
lines changed

src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ public Keyspaces apply(Keyspaces schema)
180180
long indexesOnAllTables = StreamSupport.stream(Keyspace.all().spliterator(), false).flatMap(ks -> ks.getColumnFamilyStores().stream())
181181
.flatMap(ks -> ks.indexManager.listIndexes().stream())
182182
.map(i -> i.getIndexMetadata().getIndexClassName())
183-
.filter(otherClassName -> className.equals(otherClassName)).count();
183+
.filter(className::equals).count();
184184
guardRails.totalThreshold.guard(indexesOnAllTables + 1, indexDescription, false, state);
185185
}
186186

@@ -224,10 +224,6 @@ private void validateIndexTarget(TableMetadata table, IndexMetadata.Kind kind, I
224224
if (null == column)
225225
throw ire("Column '%s' doesn't exist", target.column);
226226

227-
if ((kind == IndexMetadata.Kind.CUSTOM) && !SchemaConstants.isValidName(target.column.toString()))
228-
throw ire("Column '%s' is longer than the permissible name length of %d characters or" +
229-
" contains non-alphanumeric-underscore characters", target.column, SchemaConstants.NAME_LENGTH);
230-
231227
if (column.type.referencesDuration())
232228
{
233229
if (column.type.isCollection())
@@ -275,7 +271,7 @@ private String generateIndexName(KeyspaceMetadata keyspace, List<IndexTarget> ta
275271
{
276272
String baseName = targets.size() == 1
277273
? IndexMetadata.generateDefaultIndexName(tableName, targets.get(0).column)
278-
: IndexMetadata.generateDefaultIndexName(tableName);
274+
: IndexMetadata.generateDefaultIndexName(tableName, null);
279275
return keyspace.findAvailableIndexName(baseName);
280276
}
281277

src/java/org/apache/cassandra/db/lifecycle/LogRecord.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ public static LogRecord makeAbort(long updateTime)
153153

154154
public static LogRecord make(Type type, SSTable table)
155155
{
156-
String absoluteTablePath = table.descriptor.baseFileUri() + Component.separator;
156+
String absoluteTablePath = table.descriptor.baseFileUri() + Component.SEPARATOR;
157157
return make(type, getExistingFiles(absoluteTablePath), table.getComponentSize(), absoluteTablePath);
158158
}
159159

@@ -162,7 +162,7 @@ public static Map<SSTable, LogRecord> make(Type type, Iterable<SSTableReader> ta
162162
// contains a mapping from sstable absolute path (everything up until the 'Data'/'Index'/etc part of the filename) to the sstable
163163
Map<String, SSTable> absolutePaths = new HashMap<>();
164164
for (SSTableReader table : tables)
165-
absolutePaths.put(table.descriptor.baseFileUri() + Component.separator, table);
165+
absolutePaths.put(table.descriptor.baseFileUri() + Component.SEPARATOR, table);
166166

167167
// maps sstable base file name to the actual files on disk
168168
Map<String, List<File>> existingFiles = getExistingFiles(absolutePaths.keySet());

src/java/org/apache/cassandra/index/sai/disk/format/Version.java

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
import org.apache.cassandra.index.sai.disk.v6.V6OnDiskFormat;
3737
import org.apache.cassandra.index.sai.disk.v7.V7OnDiskFormat;
3838
import org.apache.cassandra.index.sai.utils.TypeUtil;
39+
import org.apache.cassandra.io.sstable.format.SSTableFormat;
40+
import org.apache.cassandra.schema.SchemaConstants;
3941
import org.apache.cassandra.utils.bytecomparable.ByteComparable;
4042

4143
import static com.google.common.base.Preconditions.checkArgument;
@@ -103,6 +105,48 @@ public static Version latest()
103105
return LATEST;
104106
}
105107

108+
/**
109+
* Calculates the maximum allowed length for SAI index names to ensure generated filenames
110+
* do not exceed the system's filename length limit (defined in {@link SchemaConstants#FILENAME_LENGTH}).
111+
* This accounts for all additional components in the filename.
112+
*/
113+
public static int calculateIndexNameAllowedLength()
114+
{
115+
int addedLength = getAddedLengthFromDescriptorAndVersion();
116+
assert addedLength < SchemaConstants.FILENAME_LENGTH;
117+
return SchemaConstants.FILENAME_LENGTH - addedLength;
118+
}
119+
120+
/**
121+
* Calculates the length of the added prefixes and suffixes from Descriptor constructor
122+
* and {@link Version#stargazerFileNameFormat}.
123+
*
124+
* @return the length of the added prefixes and suffixes
125+
*/
126+
private static int getAddedLengthFromDescriptorAndVersion()
127+
{
128+
// Prefixes and suffixes constructed by Version.stargazerFileNameFormat
129+
int versionNameLength = latest().toString().length();
130+
// room for up to 999 generations
131+
int generationLength = 3 + SAI_SEPARATOR.length();
132+
int addedLength = SAI_DESCRIPTOR.length()
133+
+ versionNameLength
134+
+ generationLength
135+
+ IndexComponentType.PRIMARY_KEY_BLOCK_OFFSETS.representation.length()
136+
+ SAI_SEPARATOR.length() * 3
137+
+ EXTENSION.length();
138+
139+
// Prefixes from Descriptor constructor
140+
int separatorLength = 1;
141+
int indexVersionLength = 2;
142+
int tableIdLength = 28;
143+
addedLength += indexVersionLength
144+
+ SSTableFormat.Type.BTI.name().length()
145+
+ tableIdLength
146+
+ separatorLength * 3;
147+
return addedLength;
148+
}
149+
106150
@Override
107151
public int hashCode()
108152
{
@@ -184,7 +228,7 @@ default String format(IndexComponentType indexComponentType, IndexContext indexC
184228
*/
185229
public static Optional<ParsedFileName> tryParseFileName(String filename)
186230
{
187-
if (!filename.endsWith(".db"))
231+
if (!filename.endsWith(EXTENSION))
188232
return Optional.empty();
189233

190234
// For flexibility, we handle both "full" filename, of the form "<descriptor>-SAI+....db", or just the component

src/java/org/apache/cassandra/io/sstable/Component.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
*/
3535
public class Component
3636
{
37-
public static final char separator = '-';
37+
public static final char SEPARATOR = '-';
3838

3939
final static EnumSet<Type> TYPES = EnumSet.allOf(Type.class);
4040

@@ -164,8 +164,8 @@ public static Component parse(String name)
164164
public File getFile(String absolutePath)
165165
{
166166
File ret;
167-
if (absolutePath.lastIndexOf(separator) != (absolutePath.length() - 1))
168-
ret = new File(PathUtils.getPath(absolutePath + separator + name));
167+
if (absolutePath.lastIndexOf(SEPARATOR) != (absolutePath.length() - 1))
168+
ret = new File(PathUtils.getPath(absolutePath + SEPARATOR + name));
169169
else
170170
ret = new File(PathUtils.getPath(absolutePath + name));
171171

src/java/org/apache/cassandra/io/sstable/Descriptor.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
import org.apache.cassandra.utils.Pair;
4040
import org.apache.cassandra.utils.UUIDGen;
4141

42-
import static org.apache.cassandra.io.sstable.Component.separator;
42+
import static org.apache.cassandra.io.sstable.Component.SEPARATOR;
4343

4444
/**
4545
* A SSTable is described by the keyspace and column family it contains data
@@ -108,7 +108,7 @@ public Descriptor(Version version, File directory, String ksname, String cfname,
108108

109109
hashCode = Objects.hashCode(version, this.directory, id, ksname, cfname, formatType);
110110

111-
filenamePart = version.toString() + separator + id + separator + formatType.name;
111+
filenamePart = version.toString() + SEPARATOR + id + SEPARATOR + formatType.name;
112112
String locationURI = directory.toUri().toString();
113113
if (!locationURI.endsWith(java.io.File.separator))
114114
locationURI = locationURI + java.io.File.separatorChar;
@@ -145,7 +145,7 @@ public String tmpFilenameForStreaming(Component component)
145145

146146
public String filenameFor(Component component)
147147
{
148-
return baseFilename() + separator + component.name();
148+
return baseFilename() + SEPARATOR + component.name();
149149
}
150150

151151
public File fileFor(Component component)
@@ -163,9 +163,9 @@ public String baseFilename()
163163

164164
private void appendFileName(StringBuilder buff)
165165
{
166-
buff.append(version).append(separator);
166+
buff.append(version).append(SEPARATOR);
167167
buff.append(id.toString());
168-
buff.append(separator).append(formatType.name);
168+
buff.append(SEPARATOR).append(formatType.name);
169169
}
170170

171171
public String baseFileUri()
@@ -186,7 +186,7 @@ public String relativeFilenameFor(Component component)
186186
buff.append(directory.name()).append(File.pathSeparator());
187187
}
188188

189-
return buff.append(filenamePart).append(separator).append(component.name()).toString();
189+
return buff.append(filenamePart).append(SEPARATOR).append(component.name()).toString();
190190
}
191191

192192
public SSTableFormat getFormat()

src/java/org/apache/cassandra/schema/IndexMetadata.java

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020

2121
import java.io.IOException;
2222
import java.lang.reflect.InvocationTargetException;
23-
import java.util.*;
23+
import java.util.HashMap;
24+
import java.util.List;
25+
import java.util.Map;
26+
import java.util.UUID;
2427
import java.util.concurrent.ConcurrentHashMap;
2528
import java.util.stream.Collectors;
2629

@@ -40,11 +43,14 @@
4043
import org.apache.cassandra.index.Index;
4144
import org.apache.cassandra.index.internal.CassandraIndex;
4245
import org.apache.cassandra.index.sai.StorageAttachedIndex;
46+
import org.apache.cassandra.index.sai.disk.format.Version;
4347
import org.apache.cassandra.io.util.DataInputPlus;
4448
import org.apache.cassandra.io.util.DataOutputPlus;
4549
import org.apache.cassandra.utils.FBUtilities;
4650
import org.apache.cassandra.utils.UUIDSerializer;
4751

52+
import javax.annotation.Nullable;
53+
4854
import static org.apache.cassandra.schema.SchemaConstants.PATTERN_NON_WORD_CHAR;
4955
import static org.apache.cassandra.schema.SchemaConstants.isValidName;
5056

@@ -57,6 +63,7 @@ public final class IndexMetadata
5763

5864
public static final Serializer serializer = new Serializer();
5965

66+
static final String INDEX_POSTFIX = "_idx";
6067
/**
6168
* A mapping of user-friendly index names to their fully qualified index class names.
6269
*/
@@ -106,20 +113,55 @@ public static IndexMetadata fromIndexTargets(List<IndexTarget> targets,
106113
return new IndexMetadata(name, newOptions, kind);
107114
}
108115

109-
public static String generateDefaultIndexName(String table, ColumnIdentifier column)
116+
/**
117+
* Generates a default index name from the table and column names.
118+
* Characters other than alphanumeric and underscore are removed.
119+
* Long index names are truncated to fit the length allowing constructing filenames.
120+
*
121+
* @param table the table name
122+
* @param column the column identifier. Can be null if the index is not column specific.
123+
* @return the generated index name
124+
*/
125+
public static String generateDefaultIndexName(String table, @Nullable ColumnIdentifier column)
110126
{
111-
return PATTERN_NON_WORD_CHAR.matcher(table + '_' + column.toString() + "_idx").replaceAll("");
127+
String indexNameUncleaned = table;
128+
if (column != null)
129+
indexNameUncleaned += '_' + column.toString();
130+
String indexNameUntrimmed = PATTERN_NON_WORD_CHAR.matcher(indexNameUncleaned).replaceAll("");
131+
String indexNameTrimmed = indexNameUntrimmed
132+
.substring(0,
133+
Math.min(calculateGeneratedIndexNameMaxLength(),
134+
indexNameUntrimmed.length()));
135+
return indexNameTrimmed + INDEX_POSTFIX;
112136
}
113137

114-
public static String generateDefaultIndexName(String table)
138+
/**
139+
* Calculates the maximum length of the generated index name to fit file names.
140+
* It includes the generated suffixes in account.
141+
* The calculation depends on how index implements file names construciton from index names.
142+
* This needs to be addressed, see CNDB-13240.
143+
*
144+
* @return the allowed length of the generated index name
145+
*/
146+
private static int calculateGeneratedIndexNameMaxLength()
115147
{
116-
return PATTERN_NON_WORD_CHAR.matcher(table + '_' + "idx").replaceAll("");
148+
// Speculative assumption that uniqueness breaker will fit into 999.
149+
// The value is used for trimming the index name if needed.
150+
// Introducing validation of index name length is TODO for CNDB-13198.
151+
int uniquenessSuffixLength = 4;
152+
int indexNameAddition = uniquenessSuffixLength + INDEX_POSTFIX.length();
153+
int allowedIndexNameLength = Version.calculateIndexNameAllowedLength();
154+
155+
assert allowedIndexNameLength >= indexNameAddition : "cannot happen with current implementation as allowedIndexNameLength is approximately 255 - ~76. However, allowedIndexNameLength was " + allowedIndexNameLength + " and indexNameAddition was " + indexNameAddition;
156+
157+
return allowedIndexNameLength - indexNameAddition;
117158
}
118159

119160
public void validate(TableMetadata table)
120161
{
121162
if (!isValidName(name, true))
122-
throw new ConfigurationException("Illegal index name " + name);
163+
throw new ConfigurationException(String.format("Index name must not be empty, or contain non-alphanumeric-underscore characters (got \"%s\")",
164+
name));
123165

124166
if (kind == null)
125167
throw new ConfigurationException("Index kind is null for index " + name);

src/java/org/apache/cassandra/schema/SchemaConstants.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ public final class SchemaConstants
5858
/* virtual keyspace names */
5959
public static final Set<String> VIRTUAL_KEYSPACE_NAMES = ImmutableSet.of(SCHEMA_VIRTUAL_KEYSPACE_NAME, SYSTEM_VIEWS_KEYSPACE_NAME);
6060

61+
/**
62+
* Longest acceptable file name. Longer names lead to file write or read errors.
63+
*/
64+
public static final int FILENAME_LENGTH = 255;
65+
6166
/**
6267
* longest permissible KS or CF name. Our main concern is that filename not be more than 255 characters;
6368
* the filename will contain both the KS and CF names. Since non-schema-name components only take up
@@ -66,8 +71,10 @@ public final class SchemaConstants
6671
*
6772
* Note: This extended to 222 for CNDB tenant specific keyspaces. The windows restriction is not valid here
6873
* because CNDB does not support windows.
74+
* 222 is maximum filename length of 255 chars minus a separator char and
75+
* 32 chars for table UUID.
6976
*/
70-
public static final int NAME_LENGTH = 222;
77+
public static final int NAME_LENGTH = FILENAME_LENGTH - 32 - 1;
7178

7279
// 59adb24e-f3cd-3e02-97f0-5b395827453f
7380
public static final UUID emptyVersion;

test/unit/org/apache/cassandra/cql3/CQLTester.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -234,11 +234,12 @@ public abstract class CQLTester
234234
public static final List<ProtocolVersion> PROTOCOL_VERSIONS = new ArrayList<>(ProtocolVersion.SUPPORTED.size());
235235

236236
private static final String CREATE_INDEX_NAME_REGEX = "(\\s*(\\w*|\"\\w*\")\\s*)";
237+
private static final String CREATE_INDEX_NAME_QUOTED_REGEX = "(\\s*(\\w*|\"[^\"]*\")\\s*)";
237238
private static final String CREATE_INDEX_REGEX = String.format("\\A\\s*CREATE(?:\\s+CUSTOM)?\\s+INDEX" +
238239
"(?:\\s+IF\\s+NOT\\s+EXISTS)?\\s*" +
239240
"%s?\\s*ON\\s+(%<s\\.)?%<s\\s*" +
240-
"(\\((?:\\s*\\w+\\s*\\()?%<s\\))?",
241-
CREATE_INDEX_NAME_REGEX);
241+
"(\\((?:\\s*\\w+\\s*\\()?%s\\))?",
242+
CREATE_INDEX_NAME_REGEX, CREATE_INDEX_NAME_QUOTED_REGEX);
242243
private static final Pattern CREATE_INDEX_PATTERN = Pattern.compile(CREATE_INDEX_REGEX, Pattern.CASE_INSENSITIVE);
243244

244245
public static final NettyOptions IMMEDIATE_CONNECTION_SHUTDOWN_NETTY_OPTIONS = new NettyOptions()
@@ -1061,26 +1062,27 @@ protected static Pair<String, String> getCreateIndexName(String keyspace, String
10611062
keyspace = parsedKeyspace;
10621063

10631064
String index = matcher.group(2);
1065+
boolean isQuotedGeneratedIndexName = false;
10641066
if (Strings.isNullOrEmpty(index))
10651067
{
10661068
String table = matcher.group(7);
10671069
if (Strings.isNullOrEmpty(table))
10681070
throw new IllegalArgumentException("Table name should be specified: " + formattedQuery);
10691071

10701072
String column = matcher.group(9);
1073+
isQuotedGeneratedIndexName = ParseUtils.isQuoted(column, '\"');
10711074

10721075
String baseName = Strings.isNullOrEmpty(column)
1073-
? IndexMetadata.generateDefaultIndexName(table)
1076+
? IndexMetadata.generateDefaultIndexName(table, null)
10741077
: IndexMetadata.generateDefaultIndexName(table, new ColumnIdentifier(column, true));
10751078

10761079
KeyspaceMetadata ks = Schema.instance.getKeyspaceMetadata(keyspace);
10771080
assertNotNull(ks);
10781081
index = ks.findAvailableIndexName(baseName);
10791082
}
1080-
10811083
index = ParseUtils.isQuoted(index, '\"')
10821084
? ParseUtils.unDoubleQuote(index)
1083-
: index.toLowerCase();
1085+
: isQuotedGeneratedIndexName ? index : index.toLowerCase();
10841086

10851087
return Pair.create(keyspace, index);
10861088
}

test/unit/org/apache/cassandra/db/DirectoriesTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,7 @@ private void testDirectoriesSymlinksHelper(boolean oldStyle) throws IOException
744744
for (TableMetadata tm : CFM)
745745
{
746746
Path keyspacedir = Files.createDirectories(ddir2.resolve(tm.keyspace));
747-
String tabledir = tm.name + (oldStyle ? "" : Component.separator + tm.id.toHexString());
747+
String tabledir = tm.name + (oldStyle ? "" : Component.SEPARATOR + tm.id.toHexString());
748748
Files.createSymbolicLink(keyspacedir.resolve(tabledir), symlinktarget);
749749
}
750750

@@ -806,7 +806,7 @@ public void testDataDirectoriesIterator() throws IOException
806806

807807
private String getNewFilename(TableMetadata tm, boolean oldStyle)
808808
{
809-
return tm.keyspace + File.pathSeparator() + tm.name + (oldStyle ? "" : Component.separator + tm.id.toHexString()) + "/na-1-big-Data.db";
809+
return tm.keyspace + File.pathSeparator() + tm.name + (oldStyle ? "" : Component.SEPARATOR + tm.id.toHexString()) + "/na-1-big-Data.db";
810810
}
811811

812812
private List<Directories.DataDirectoryCandidate> getWriteableDirectories(DataDirectory[] dataDirectories, long writeSize)

0 commit comments

Comments
 (0)