|
1 | 1 |
|
2 | | -## 1.格式: |
| 2 | +## 1.格式 |
3 | 3 | ``` |
4 | 4 | CREATE TABLE tableName( |
5 | 5 | columnFamily:columnName type as alias, |
|
31 | 31 | | PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息| |
32 | 32 | | PRIMARY KEY(keyInfo) | 维表主键定义;hbase 维表rowkey的构造方式;可选择的构造包括 md5(alias + alias), '常量',也包括上述方式的自由组合 | |
33 | 33 |
|
34 | | -## 3.参数 |
| 34 | +## 4.参数 |
| 35 | + |
| 36 | +参数详细说明请看[参数详细说明](./sideParams.md) |
35 | 37 |
|
36 | 38 | |参数名称|含义|是否必填|默认值| |
37 | 39 | |----|---|---|----| |
|
43 | 45 | | partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)|否|false| |
44 | 46 |
|
45 | 47 | -------------- |
46 | | -> 缓存策略 |
47 | | - * NONE: 不做内存缓存 |
48 | | - * LRU: |
49 | | - * cacheSize: 缓存的条目数量 |
50 | | - * cacheTTLMs:缓存的过期时间(ms) |
51 | 48 |
|
52 | | -## 4.样例 |
| 49 | +## 5.样例 |
| 50 | +### LRU维表示例 |
| 51 | +``` |
| 52 | +CREATE TABLE sideTable ( |
| 53 | + wtz:message varchar as message, |
| 54 | + wtz:info varchar as info , |
| 55 | + PRIMARY KEY (rowkey), |
| 56 | + PERIOD FOR SYSTEM_TIME |
| 57 | +) WITH ( |
| 58 | + type = 'hbase', |
| 59 | + zookeeperQuorum = '192.168.80.105:2181,192.168.80.106:2181,192.168.80.107:2181', |
| 60 | + zookeeperParent = '/hbase', |
| 61 | + tableName = 'testFlinkStreamSql', |
| 62 | + parallelism = '1', |
| 63 | + cache = 'LRU', |
| 64 | + cacheSize ='10000', |
| 65 | + cacheTTLMs ='60000', |
| 66 | + parallelism ='1', |
| 67 | + partitionedJoin='false' |
| 68 | +); |
| 69 | +``` |
| 70 | + |
| 71 | +### ALL维表示例 |
| 72 | +``` |
| 73 | +CREATE TABLE sideTable ( |
| 74 | + wtz:message varchar as message, |
| 75 | + wtz:info varchar as info , |
| 76 | + PRIMARY KEY (rowkey), |
| 77 | + PERIOD FOR SYSTEM_TIME |
| 78 | +) WITH ( |
| 79 | + type = 'hbase', |
| 80 | + zookeeperQuorum = '192.168.80.105:2181,192.168.80.106:2181,192.168.80.107:2181', |
| 81 | + zookeeperParent = '/hbase', |
| 82 | + tableName = 'testFlinkStreamSql', |
| 83 | + parallelism = '1', |
| 84 | + cache = 'ALL', |
| 85 | + cacheTTLMs ='60000', |
| 86 | + parallelism ='1', |
| 87 | + partitionedJoin='false' |
| 88 | +); |
53 | 89 | ``` |
54 | | -CREATE TABLE sideTable( |
55 | | - cf:name varchar as name, |
56 | | - cf:info int as info, |
57 | | - PRIMARY KEY(md5(name) + 'test'), |
58 | | - PERIOD FOR SYSTEM_TIME |
59 | | - )WITH( |
60 | | - type ='hbase', |
61 | | - zookeeperQuorum ='rdos1:2181', |
62 | | - zookeeperParent ='/hbase', |
63 | | - tableName ='workerinfo', |
64 | | - cache ='LRU', |
65 | | - cacheSize ='10000', |
66 | | - cacheTTLMs ='60000', |
67 | | - parallelism ='1', |
68 | | - partitionedJoin='true' |
| 90 | + |
| 91 | +### hbase异步维表关联完整案例 |
| 92 | +``` |
| 93 | +CREATE TABLE MyTable( |
| 94 | + id varchar, |
| 95 | + name varchar, |
| 96 | + address varchar |
| 97 | +)WITH( |
| 98 | + type = 'kafka10', |
| 99 | + bootstrapServers = '172.16.101.224:9092', |
| 100 | + zookeeperQuorm = '172.16.100.188:2181/kafka', |
| 101 | + offsetReset = 'latest', |
| 102 | + topic = 'tiezhu_test_in', |
| 103 | + groupId = 'flink_sql', |
| 104 | + timezone = 'Asia/Shanghai', |
| 105 | + topicIsPattern = 'false', |
| 106 | + parallelism = '1' |
| 107 | +); |
| 108 | +
|
| 109 | +CREATE TABLE MyResult( |
| 110 | + id varchar, |
| 111 | + name varchar, |
| 112 | + address varchar, |
| 113 | + message varchar, |
| 114 | + info varchar |
| 115 | +)WITH( |
| 116 | + type = 'console' |
69 | 117 | ); |
| 118 | + |
| 119 | + CREATE TABLE sideTable ( |
| 120 | + wtz:message varchar as message, |
| 121 | + wtz:info varchar as info , |
| 122 | + PRIMARY KEY (rowkey), |
| 123 | + PERIOD FOR SYSTEM_TIME |
| 124 | +) WITH ( |
| 125 | + type = 'hbase', |
| 126 | + zookeeperQuorum = '192.168.80.105:2181,192.168.80.106:2181,192.168.80.107:2181', |
| 127 | + zookeeperParent = '/hbase', |
| 128 | + tableName = 'testFlinkStreamSql', |
| 129 | + parallelism = '1', |
| 130 | + cache = 'LRU', |
| 131 | + cacheSize ='10000', |
| 132 | + cacheTTLMs ='60000', |
| 133 | + parallelism ='1', |
| 134 | + partitionedJoin='false' |
| 135 | +); |
70 | 136 |
|
| 137 | +insert |
| 138 | +into |
| 139 | + MyResult |
| 140 | + select |
| 141 | + a.name, |
| 142 | + a.id, |
| 143 | + a.address, |
| 144 | + b.message, |
| 145 | + b.info |
| 146 | + from |
| 147 | + MyTable a |
| 148 | + left join |
| 149 | + sideTable b |
| 150 | + on a.id=b.rowkey; |
71 | 151 | ``` |
72 | 152 |
|
| 153 | +## 6.hbase中数据存储形式 |
| 154 | + |
| 155 | +在hbase中,数据是以列簇的形式存储,其中rowKey作为主键,按字典排序。 |
73 | 156 |
|
| 157 | +在样例中,wtz为列族名,message, info为列名,数据在hbase中的存储情况为: |
| 158 | +``` |
| 159 | +hbase(main):002:0> scan 'testFlinkStreamSql' |
| 160 | +ROW COLUMN+CELL |
| 161 | + 0 column=wtz:info, timestamp=1587089266719, value=hadoop |
| 162 | + 0 column=wtz:message, timestamp=1587089245780, value=hbase |
| 163 | + 1 column=wtz:info, timestamp=1587088818432, value=flink |
| 164 | + 1 column=wtz:message, timestamp=1587088796633, value=dtstack |
| 165 | + 2 column=wtz:info, timestamp=1587088858564, value=sql |
| 166 | + 2 column=wtz:message, timestamp=1587088840507, value=stream |
| 167 | +``` |
0 commit comments