Skip to content

Commit 337e574

Browse files
authored
Continue cleanup (#339)
1 parent 081b66d commit 337e574

File tree

9 files changed

+35
-23
lines changed

9 files changed

+35
-23
lines changed

.gitattributes

Whitespace-only changes.

docs/quick-start.md

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ The second command will download and run a prebuilt AsterixDB docker container f
3636
After it finishes, you should see the messages as shown in the following screenshot:
3737
![docker][docker]
3838

39-
**Step 4**: Run the following command to ingest sample tweets (about 324K) and US population data into AsterixDB.
39+
**Step 4**: Run the following command to ingest sample tweets (about 47K) and US population data into AsterixDB.
4040

4141

4242
```
4343
~/cloudberry> ./script/ingestAllTwitterToLocalCluster.sh
4444
```
4545

46-
This step is downloading about 70MB of data, and it may take 5 minutes, again, depending on your network speed. You should see the messages as shown in the following screenshot:
46+
When it finishes you should see the messages as shown in the following screenshot:
4747
![ingestion][ingestion]
4848

4949
**Step 5**: Compile and run the Cloudberry server.
@@ -80,17 +80,24 @@ The instructions above assume that we use an AsterixDB instance in a Docker cont
8080

8181
**Step 8**: Follow the instructions on the [AsterixDB Installation Guide](https://ci.apache.org/projects/asterixdb/index.html) to install an AsterixDB cluster. Select your preferred installation option.
8282

83-
**Step 9**: Ingest twitter data.
83+
**Step 9**: Ingest twitter data to AsterixDB
8484

85-
**Step 10**: Change the Cloudberry middleware configuration to connect to this new AsterixDB cluster. You can modify the AsterixDB hostname in the configuration file `neo/conf/application.conf` and change the `asterixdb.url` value to the AsterixDB hostname.
85+
You need to give the RESTFul API link of the AsterixDB cluster and one of its NC names to the ingestion script as following:
86+
87+
```
88+
~/cloudberry> ./script/ingestAllTwitterToLocalCluster.sh http://YourAsterixDBServerIP:19002/aql ONE_OF_NC_NAMES
89+
```
90+
91+
**Step 10**: Change the Cloudberry middleware configuration to connect to this new AsterixDB cluster.
92+
You can modify the AsterixDB hostname in the configuration file `neo/conf/application.conf` by changing the `asterixdb.url` value.
8693

8794
```
8895
asterixdb.url = "http://YourAsterixDBHostName:19002/query/service"
8996
```
9097

9198
## Build your own application
9299

93-
For more information about Cloudberry, please read its [documentation](/documentation).
100+
TwitterMap is one example of how to use Cloudberry. To develop your own application, please find more information in [documentation](/documentation).
94101

95102
[architecture]: /img/quick-start-architecture.png
96103
{: width="800px"}

script/.gitattributes

Whitespace-only changes.

script/dockerClean.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22
#clean up the existing images
3-
docker stop -f cc nc1
3+
docker stop cc nc1
44
docker rm -f cc nc1
55
docker volume rm dbstore
66
# remove the local image to fetch the newest remote version

script/fileFeed.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
#!/usr/bin/env bash
2+
link=${1-"localhost"}
3+
host=$(basename $(dirname $link))
4+
host=${host%%:*}
5+
port=${2-"10001"}
26
sbt "project noah" --error "run-main edu.uci.ics.cloudberry.noah.feed.FileFeedDriver \
3-
-u localhost -p ${1:-10001}"
7+
-u $host -p $port"

script/ingestAllTwitterToLocalCluster.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@
1818
#===============================================================================
1919

2020
host=${1:-'http://localhost:19002/aql'}
21+
nc=${2:-"nc1"}
2122
echo "Ingesting sample tweets..."
22-
./script/ingestTwitterToLocalCluster.sh $host
23+
./script/ingestTwitterToLocalCluster.sh $host $nc
2324

2425
echo "Ingesting population data..."
25-
./script/ingestPopulationToLocalCluster.sh $host
26+
./script/ingestPopulationToLocalCluster.sh $host $nc
2627

2728
echo "Data ingestion completed!"

script/ingestPopulationToLocalCluster.sh

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020
set -o nounset # Treat unset variables as an error
2121

2222
host=${1:-'http://localhost:19002/aql'}
23+
nc=${2:-"nc1"}
2324
# ddl to register the twitter dataset
24-
cat <<'EOF' | curl -XPOST --data-binary @- $host
25+
cat <<EOF | curl -XPOST --data-binary @- $host
2526
use dataverse twitter;
2627
create type typeStatePopulation if not exists as open{
2728
name:string,
@@ -52,7 +53,7 @@ create dataset dsCityPopulation(typeCityPopulation) if not exists primary key ci
5253
5354
create feed StatePopulationFeed using socket_adapter
5455
(
55-
("sockets"="nc1:10002"),
56+
("sockets"="$nc:10002"),
5657
("address-type"="nc"),
5758
("type-name"="typeStatePopulation"),
5859
("format"="adm")
@@ -62,7 +63,7 @@ start feed StatePopulationFeed;
6263
6364
create feed CountyPopulationFeed using socket_adapter
6465
(
65-
("sockets"="nc1:10003"),
66+
("sockets"="$nc:10003"),
6667
("address-type"="nc"),
6768
("type-name"="typeCountyPopulation"),
6869
("format"="adm")
@@ -72,7 +73,7 @@ start feed CountyPopulationFeed;
7273
7374
create feed CityPopulationFeed using socket_adapter
7475
(
75-
("sockets"="nc1:10004"),
76+
("sockets"="$nc:10004"),
7677
("address-type"="nc"),
7778
("type-name"="typeCityPopulation"),
7879
("format"="adm")
@@ -83,13 +84,13 @@ EOF
8384

8485
echo 'Created population datasets in AsterixDB.'
8586
#Serve socket feed using local file
86-
cat ./noah/src/main/resources/population/adm/allStatePopulation.adm | ./script/fileFeed.sh 10002
87+
cat ./noah/src/main/resources/population/adm/allStatePopulation.adm | ./script/fileFeed.sh $host 10002
8788
echo 'Ingested state population dataset.'
8889

89-
cat ./noah/src/main/resources/population/adm/allCountyPopulation.adm | ./script/fileFeed.sh 10003
90+
cat ./noah/src/main/resources/population/adm/allCountyPopulation.adm | ./script/fileFeed.sh $host 10003
9091
echo 'Ingested county population dataset.'
9192

92-
cat ./noah/src/main/resources/population/adm/allCityPopulation.adm | ./script/fileFeed.sh 10004
93+
cat ./noah/src/main/resources/population/adm/allCityPopulation.adm | ./script/fileFeed.sh $host 10004
9394
echo 'Ingested city population dataset.'
9495

9596
cat <<'EOF' | curl -XPOST --data-binary @- $host

script/ingestTwitterToLocalCluster.sh

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020
set -o nounset # Treat unset variables as an error
2121

2222
# ddl to register the twitter dataset
23-
host=${1:-'http://localhost:19002/aql'}
24-
cat <<'EOF' | curl -XPOST --data-binary @- $host
23+
host=${1:-"http://localhost:19002/aql"}
24+
nc=${2:-"nc1"}
25+
cat <<EOF | curl -XPOST --data-binary @- $host
2526
drop dataverse twitter if exists;
2627
create dataverse twitter if not exists;
2728
use dataverse twitter
@@ -84,7 +85,7 @@ create index text_idx if not exists on ds_tweet("text") type fulltext;
8485
8586
create feed TweetFeed using socket_adapter
8687
(
87-
("sockets"="nc1:10001"),
88+
("sockets"="$nc:10001"),
8889
("address-type"="nc"),
8990
("type-name"="typeTweet"),
9091
("format"="adm")
@@ -94,11 +95,9 @@ start feed TweetFeed;
9495
EOF
9596

9697

97-
[ -f ./script/sample.adm.gz ] || { echo "Downloading the data..."; ./script/getSampleTweetsFromGDrive.sh; }
98-
#Serve socket feed using local file
99-
#git lfs fetch
98+
#[ -f ./script/sample.adm.gz ] || { echo "Downloading the data..."; ./script/getSampleTweetsFromGDrive.sh; }
10099

101100
echo "Start ingestion ..."
102-
gunzip -c ./script/sample.adm.gz | ./script/fileFeed.sh
101+
gunzip -c ./script/sample.adm.gz | ./script/fileFeed.sh $host 10001
103102
echo "Ingested sample tweets."
104103

script/sample.adm.gz

10.1 MB
Binary file not shown.

0 commit comments

Comments
 (0)