Skip to content

Commit 40103c9

Browse files
author
Theo van Kraay
committed
edits
1 parent 33bd486 commit 40103c9

File tree

1 file changed

+101
-95
lines changed

1 file changed

+101
-95
lines changed

articles/managed-instance-apache-cassandra/search-lucene-index.md

Lines changed: 101 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -24,42 +24,46 @@ This quickstart demonstrates how to query using Lucene Index.
2424
- Deploy an Azure Managed Instance for Apache Cassandra cluster. You can do this via the [portal](create-cluster-portal.md) - Lucene indexes will be enabled by default.
2525
- Connect to your cluster from [CQLSH](https://learn.microsoft.com/en-us/azure/managed-instance-apache-cassandra/create-cluster-portal#connecting-from-cqlsh).
2626

27-
## Create a managed instance cluster
27+
## Create keyspace and table
2828

2929
1. In your `CQLSH` command window, create a keyspace and table as below:
30-
31-
```SQL
32-
CREATE KEYSPACE demo
33-
WITH REPLICATION = {'class': 'NetworkTopologyStrategy', 'datacenter-1': 3};
34-
USE demo;
35-
CREATE TABLE tweets (
36-
id INT PRIMARY KEY,
37-
user TEXT,
38-
body TEXT,
39-
time TIMESTAMP,
40-
latitude FLOAT,
41-
longitude FLOAT
42-
);
43-
```
30+
31+
```SQL
32+
CREATE KEYSPACE demo
33+
WITH REPLICATION = {'class': 'NetworkTopologyStrategy', 'datacenter-1': 3};
34+
USE demo;
35+
CREATE TABLE tweets (
36+
id INT PRIMARY KEY,
37+
user TEXT,
38+
body TEXT,
39+
time TIMESTAMP,
40+
latitude FLOAT,
41+
longitude FLOAT
42+
);
43+
```
44+
45+
## Create custom secondary index using Lucene
4446

4547
1. Now create a custom secondary index on the table using Lucene Index:
4648

47-
```SQL
48-
CREATE CUSTOM INDEX tweets_index ON tweets ()
49-
USING 'com.stratio.cassandra.lucene.Index'
50-
WITH OPTIONS = {
51-
'refresh_seconds': '1',
52-
'schema': '{
53-
fields: {
54-
id: {type: "integer"},
55-
user: {type: "string"},
56-
body: {type: "text", analyzer: "english"},
57-
time: {type: "date", pattern: "yyyy/MM/dd"},
58-
place: {type: "geo_point", latitude: "latitude", longitude: "longitude"}
59-
}
60-
}'
61-
};
62-
```
49+
```SQL
50+
CREATE CUSTOM INDEX tweets_index ON tweets ()
51+
USING 'com.stratio.cassandra.lucene.Index'
52+
WITH OPTIONS = {
53+
'refresh_seconds': '1',
54+
'schema': '{
55+
fields: {
56+
id: {type: "integer"},
57+
user: {type: "string"},
58+
body: {type: "text", analyzer: "english"},
59+
time: {type: "date", pattern: "yyyy/MM/dd"},
60+
place: {type: "geo_point", latitude: "latitude", longitude: "longitude"}
61+
}
62+
}'
63+
};
64+
```
65+
66+
## Insert data
6367

6468
1. Insert the following sample tweets:
6569

@@ -71,92 +75,94 @@ This quickstart demonstrates how to query using Lucene Index.
7175
INSERT INTO tweets (id,user,body,time,latitude,longitude) VALUES (5,'quetzal','Click my link, like my stuff!', '2023-04-01T11:21:59.001+0000', 40.3930, -3.7329);
7276
```
7377

78+
## Search data
79+
7480
1. The index you created earlier will index all the columns in the table with the specified types, and it will be refreshed once per second. Alternatively, you can explicitly refresh all the index shards with an empty search with consistency ALL:
7581

76-
```SQL
77-
CONSISTENCY ALL
78-
SELECT * FROM tweets WHERE expr(tweets_index, '{refresh:true}');
79-
CONSISTENCY QUORUM
80-
```
82+
```SQL
83+
CONSISTENCY ALL
84+
SELECT * FROM tweets WHERE expr(tweets_index, '{refresh:true}');
85+
CONSISTENCY QUORUM
86+
```
8187

8288
1. Now, you can search for tweets within a certain date range:
8389

84-
```SQL
85-
SELECT * FROM tweets WHERE expr(tweets_index, '{filter: {type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"}}');
86-
```
90+
```SQL
91+
SELECT * FROM tweets WHERE expr(tweets_index, '{filter: {type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"}}');
92+
```
8793
1. The same search can be performed forcing an explicit refresh of the involved index shards:
8894

89-
```SQL
90-
SELECT * FROM tweets WHERE expr(tweets_index, '{
91-
filter: {type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
92-
refresh: true
93-
}') limit 100;
94-
```
95+
```SQL
96+
SELECT * FROM tweets WHERE expr(tweets_index, '{
97+
filter: {type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
98+
refresh: true
99+
}') limit 100;
100+
```
95101

96102
1. Now, to search the top 100 more relevant tweets where body field contains the phrase “Click my link” within the aforementioned date range:
97103

98-
```SQL
99-
SELECT * FROM tweets WHERE expr(tweets_index, '{
100-
filter: {type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
101-
query: {type: "phrase", field: "body", value: "Click my link", slop: 1}
102-
}') LIMIT 100;
103-
```
104+
```SQL
105+
SELECT * FROM tweets WHERE expr(tweets_index, '{
106+
filter: {type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
107+
query: {type: "phrase", field: "body", value: "Click my link", slop: 1}
108+
}') LIMIT 100;
109+
```
104110

105111
1. To refine the search to get only the tweets written by users whose names start with "q":
106112

107-
```SQL
108-
SELECT * FROM tweets WHERE expr(tweets_index, '{
109-
filter: [
110-
{type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
111-
{type: "prefix", field: "user", value: "q"}
112-
],
113-
query: {type: "phrase", field: "body", value: "Click my link", slop: 1}
114-
}') LIMIT 100;
115-
```
113+
```SQL
114+
SELECT * FROM tweets WHERE expr(tweets_index, '{
115+
filter: [
116+
{type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
117+
{type: "prefix", field: "user", value: "q"}
118+
],
119+
query: {type: "phrase", field: "body", value: "Click my link", slop: 1}
120+
}') LIMIT 100;
121+
```
116122

117123
1. To get the 100 more recent filtered results you can use the sort option:
118124

119-
```SQL
120-
SELECT * FROM tweets WHERE expr(tweets_index, '{
121-
filter: [
122-
{type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
123-
{type: "prefix", field: "user", value: "q"}
124-
],
125-
query: {type: "phrase", field: "body", value: "Click my link", slop: 1},
126-
sort: {field: "time", reverse: true}
127-
}') limit 100;
128-
```
125+
```SQL
126+
SELECT * FROM tweets WHERE expr(tweets_index, '{
127+
filter: [
128+
{type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
129+
{type: "prefix", field: "user", value: "q"}
130+
],
131+
query: {type: "phrase", field: "body", value: "Click my link", slop: 1},
132+
sort: {field: "time", reverse: true}
133+
}') limit 100;
134+
```
129135

130136
1. The previous search can be restricted to tweets created close to a geographical position:
131137

132-
```SQL
133-
SELECT * FROM tweets WHERE expr(tweets_index, '{
134-
filter: [
135-
{type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
136-
{type: "prefix", field: "user", value: "q"},
137-
{type: "geo_distance", field: "place", latitude: 40.3930, longitude: -3.7328, max_distance: "1km"}
138-
],
139-
query: {type: "phrase", field: "body", value: "Click my link", slop: 1},
140-
sort: {field: "time", reverse: true}
141-
}') limit 100;
142-
```
138+
```SQL
139+
SELECT * FROM tweets WHERE expr(tweets_index, '{
140+
filter: [
141+
{type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
142+
{type: "prefix", field: "user", value: "q"},
143+
{type: "geo_distance", field: "place", latitude: 40.3930, longitude: -3.7328, max_distance: "1km"}
144+
],
145+
query: {type: "phrase", field: "body", value: "Click my link", slop: 1},
146+
sort: {field: "time", reverse: true}
147+
}') limit 100;
148+
```
143149

144150
1. It is also possible to sort the results by distance to a geographical position:
145151

146-
```SQL
147-
SELECT * FROM tweets WHERE expr(tweets_index, '{
148-
filter: [
149-
{type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
150-
{type: "prefix", field: "user", value: "q"},
151-
{type: "geo_distance", field: "place", latitude: 40.3930, longitude: -3.7328, max_distance: "1km"}
152-
],
153-
query: {type: "phrase", field: "body", value: "Click my link", slop: 1},
154-
sort: [
155-
{field: "time", reverse: true},
156-
{field: "place", type: "geo_distance", latitude: 40.3930, longitude: -3.7328}
157-
]
158-
}') limit 100;
159-
```
152+
```SQL
153+
SELECT * FROM tweets WHERE expr(tweets_index, '{
154+
filter: [
155+
{type: "range", field: "time", lower: "2023/03/01", upper: "2023/05/01"},
156+
{type: "prefix", field: "user", value: "q"},
157+
{type: "geo_distance", field: "place", latitude: 40.3930, longitude: -3.7328, max_distance: "1km"}
158+
],
159+
query: {type: "phrase", field: "body", value: "Click my link", slop: 1},
160+
sort: [
161+
{field: "time", reverse: true},
162+
{field: "place", type: "geo_distance", latitude: 40.3930, longitude: -3.7328}
163+
]
164+
}') limit 100;
165+
```
160166

161167
For more in-depth information and samples see [Stratio's Cassandra Lucene Index](https://github.com/Stratio/cassandra-lucene-index/blob/branch-3.0.14/doc/documentation.rst).
162168

0 commit comments

Comments
 (0)