Skip to content

Commit d03dfd8

Browse files
committed
Merge pull request #49 from gianm/rowkey-hashing
Rollup-aware partitioning by default for Maps, and a Partitioner interface for other types.
2 parents d709dd9 + 6e35d77 commit d03dfd8

File tree

12 files changed

+729
-139
lines changed

12 files changed

+729
-139
lines changed
Lines changed: 19 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,29 @@
11
/*
2-
* Tranquility.
3-
* Copyright 2013, 2014, 2015 Metamarkets Group, Inc.
2+
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. Metamarkets licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
49
*
5-
* Licensed under the Apache License, Version 2.0 (the "License");
6-
* you may not use this file except in compliance with the License.
7-
* You may obtain a copy of the License at
10+
* http://www.apache.org/licenses/LICENSE-2.0
811
*
9-
* http://www.apache.org/licenses/LICENSE-2.0
10-
*
11-
* Unless required by applicable law or agreed to in writing, software
12-
* distributed under the License is distributed on an "AS IS" BASIS,
13-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14-
* See the License for the specific language governing permissions and
15-
* limitations under the License.
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
1618
*/
19+
1720
package com.metamx.tranquility.beam
1821

19-
import com.google.common.hash.Hashing
20-
import com.metamx.common.scala.Logging
21-
import com.twitter.util.Future
22+
import com.metamx.tranquility.partition.HashCodePartitioner
2223

23-
/**
24-
* Partitions events based on their hashCode modulo the number of delegate beams, and propagates the partitioned events
25-
* via the appropriate beam.
26-
*/
2724
class HashPartitionBeam[A](
28-
val delegates: IndexedSeq[Beam[A]]
29-
) extends Beam[A] with Logging
25+
beams: IndexedSeq[Beam[A]]
26+
) extends MergingPartitioningBeam[A](new HashCodePartitioner[A], beams)
3027
{
31-
def propagate(events: Seq[A]) = {
32-
val futures = events.groupBy(event => Hashing.consistentHash(event.hashCode, delegates.size)) map {
33-
case (i, group) =>
34-
delegates(i).propagate(group)
35-
}
36-
Future.collect(futures.toList).map(_.sum)
37-
}
38-
39-
def close() = {
40-
Future.collect(delegates map (_.close())) map (_ => ())
41-
}
42-
43-
override def toString = "HashPartitionBeam(%s)" format delegates.mkString(", ")
28+
override def toString = s"HashPartitionBeam(${beams.mkString(", ")})"
4429
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. Metamarkets licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package com.metamx.tranquility.beam
21+
22+
import com.metamx.common.scala.Logging
23+
import com.metamx.tranquility.partition.Partitioner
24+
import com.twitter.util.Future
25+
26+
/**
27+
* Partitions events based on the output of a Partitioner, and propagates the partitioned events via the
28+
* appropriate underlying beams.
29+
*/
30+
class MergingPartitioningBeam[A](
31+
val partitioner: Partitioner[A],
32+
val beams: IndexedSeq[Beam[A]]
33+
) extends Beam[A] with Logging
34+
{
35+
def propagate(events: Seq[A]) = {
36+
val futures = events.groupBy(partitioner.partition(_, beams.size)) map {
37+
case (i, group) =>
38+
beams(i).propagate(group)
39+
}
40+
Future.collect(futures.toList).map(_.sum)
41+
}
42+
43+
def close() = {
44+
Future.collect(beams map (_.close())) map (_ => ())
45+
}
46+
47+
override def toString = s"MergingPartitioningBeam(${beams.mkString(", ")})"
48+
}

core/src/main/scala/com/metamx/tranquility/druid/DruidBeams.scala

Lines changed: 88 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,12 @@ import com.metamx.emitter.service.ServiceEmitter
3030
import com.metamx.tranquility.beam.Beam
3131
import com.metamx.tranquility.beam.ClusteredBeam
3232
import com.metamx.tranquility.beam.ClusteredBeamTuning
33-
import com.metamx.tranquility.beam.HashPartitionBeam
33+
import com.metamx.tranquility.beam.MergingPartitioningBeam
3434
import com.metamx.tranquility.finagle.BeamService
3535
import com.metamx.tranquility.finagle.FinagleRegistry
3636
import com.metamx.tranquility.finagle.FinagleRegistryConfig
37+
import com.metamx.tranquility.partition.GenericTimeAndDimsPartitioner
38+
import com.metamx.tranquility.partition.Partitioner
3739
import com.metamx.tranquility.typeclass.JavaObjectWriter
3840
import com.metamx.tranquility.typeclass.JsonWriter
3941
import com.metamx.tranquility.typeclass.ObjectWriter
@@ -50,29 +52,29 @@ import scala.collection.JavaConverters._
5052
import scala.language.reflectiveCalls
5153

5254
/**
53-
* Builds Beams or Finagle services that send events to the Druid indexing service.
54-
*
55-
* {{{
56-
* val curator = CuratorFrameworkFactory.newClient("localhost:2181", new BoundedExponentialBackoffRetry(100, 30000, 30))
57-
* curator.start()
58-
* val dataSource = "foo"
59-
* val dimensions = Seq("bar")
60-
* val aggregators = Seq(new LongSumAggregatorFactory("baz", "baz"))
61-
* val service = DruidBeams
62-
* .builder[Map[String, Any]](eventMap => new DateTime(eventMap("timestamp")))
63-
* .curator(curator)
64-
* .discoveryPath("/test/discovery")
65-
* .location(DruidLocation(new DruidEnvironment("druid:local:indexer", "druid:local:firehose:%s"), dataSource))
66-
* .rollup(DruidRollup(dimensions, aggregators, QueryGranularity.MINUTE))
67-
* .tuning(new ClusteredBeamTuning(Granularity.HOUR, 10.minutes, 1, 1))
68-
* .buildService()
69-
* val future = service(Seq(Map("timestamp" -> "2010-01-02T03:04:05.678Z", "bar" -> "hey", "baz" -> 3)))
70-
* println("result = %s" format Await.result(future))
71-
* }}}
72-
*
73-
* Your event type (in this case, {{{Map[String, Any]}}} must be serializable via Jackson to JSON that Druid can
74-
* understand. If Jackson is not an appropriate choice, you can provide an ObjectWriter via {{{.objectWriter(...)}}}.
75-
*/
55+
* Builds Beams or Finagle services that send events to the Druid indexing service.
56+
*
57+
* {{{
58+
* val curator = CuratorFrameworkFactory.newClient("localhost:2181", new BoundedExponentialBackoffRetry(100, 30000, 30))
59+
* curator.start()
60+
* val dataSource = "foo"
61+
* val dimensions = Seq("bar")
62+
* val aggregators = Seq(new LongSumAggregatorFactory("baz", "baz"))
63+
* val service = DruidBeams
64+
* .builder[Map[String, Any]](eventMap => new DateTime(eventMap("timestamp")))
65+
* .curator(curator)
66+
* .discoveryPath("/test/discovery")
67+
* .location(DruidLocation(new DruidEnvironment("druid:local:indexer", "druid:local:firehose:%s"), dataSource))
68+
* .rollup(DruidRollup(dimensions, aggregators, QueryGranularity.MINUTE))
69+
* .tuning(new ClusteredBeamTuning(Granularity.HOUR, 10.minutes, 1, 1))
70+
* .buildService()
71+
* val future = service(Seq(Map("timestamp" -> "2010-01-02T03:04:05.678Z", "bar" -> "hey", "baz" -> 3)))
72+
* println("result = %s" format Await.result(future))
73+
* }}}
74+
*
75+
* Your event type (in this case, {{{Map[String, Any]}}} must be serializable via Jackson to JSON that Druid can
76+
* understand. If Jackson is not an appropriate choice, you can provide an ObjectWriter via {{{.objectWriter(...)}}}.
77+
*/
7678
object DruidBeams
7779
{
7880
val DefaultTimestampSpec = new TimestampSpec("timestamp", "iso", null)
@@ -108,52 +110,77 @@ object DruidBeams
108110

109111
def rollup(rollup: DruidRollup) = new Builder[EventType](config.copy(_rollup = Some(rollup)))
110112

111-
def timestampSpec(timestampSpec: TimestampSpec) = new Builder[EventType](config.copy(_timestampSpec = Some(timestampSpec)))
113+
def timestampSpec(timestampSpec: TimestampSpec) = {
114+
new Builder[EventType](config.copy(_timestampSpec = Some(timestampSpec)))
115+
}
112116

113-
def clusteredBeamZkBasePath(path: String) = new Builder[EventType](config.copy(_clusteredBeamZkBasePath = Some(path)))
117+
def clusteredBeamZkBasePath(path: String) = {
118+
new Builder[EventType](config.copy(_clusteredBeamZkBasePath = Some(path)))
119+
}
114120

115121
def clusteredBeamIdent(ident: String) = new Builder[EventType](config.copy(_clusteredBeamIdent = Some(ident)))
116122

117-
def druidBeamConfig(beamConfig: DruidBeamConfig) = new Builder[EventType](config.copy(_druidBeamConfig = Some(beamConfig)))
123+
def druidBeamConfig(beamConfig: DruidBeamConfig) = {
124+
new Builder[EventType](config.copy(_druidBeamConfig = Some(beamConfig)))
125+
}
118126

119127
def emitter(emitter: ServiceEmitter) = new Builder[EventType](config.copy(_emitter = Some(emitter)))
120128

121-
def finagleRegistry(registry: FinagleRegistry) = new Builder[EventType](config.copy(_finagleRegistry = Some(registry)))
129+
def finagleRegistry(registry: FinagleRegistry) = {
130+
new Builder[EventType](config.copy(_finagleRegistry = Some(registry)))
131+
}
122132

123133
def timekeeper(timekeeper: Timekeeper) = new Builder[EventType](config.copy(_timekeeper = Some(timekeeper)))
124134

125-
def beamDecorateFn(f: (Interval, Int) => Beam[EventType] => Beam[EventType]) = new
126-
Builder(config.copy(_beamDecorateFn = Some(f)))
135+
def beamDecorateFn(f: (Interval, Int) => Beam[EventType] => Beam[EventType]) = {
136+
new Builder(config.copy(_beamDecorateFn = Some(f)))
137+
}
138+
139+
def beamMergeFn(f: Seq[Beam[EventType]] => Beam[EventType]) = {
140+
if (config._partitioner.nonEmpty) {
141+
throw new IllegalStateException("Cannot set both 'beamMergeFn' and 'partitioner'")
142+
}
143+
new Builder[EventType](config.copy(_beamMergeFn = Some(f)))
144+
}
127145

128-
def beamMergeFn(f: Seq[Beam[EventType]] => Beam[EventType]) = new Builder[EventType](config.copy(_beamMergeFn = Some(f)))
146+
def partitioner(partitioner: Partitioner[EventType]) = {
147+
if (config._beamMergeFn.nonEmpty) {
148+
throw new IllegalStateException("Cannot set both 'beamMergeFn' and 'partitioner'")
149+
}
150+
new Builder[EventType](config.copy(_partitioner = Some(partitioner)))
151+
}
129152

130153
def alertMap(d: Dict) = new Builder[EventType](config.copy(_alertMap = Some(d)))
131154

132155
@deprecated("use .objectWriter(...)", "0.2.21")
133-
def eventWriter(writer: ObjectWriter[EventType]) = new Builder[EventType](config.copy(_objectWriter = Some(writer)))
156+
def eventWriter(writer: ObjectWriter[EventType]) = {
157+
new Builder[EventType](config.copy(_objectWriter = Some(writer)))
158+
}
134159

135-
def objectWriter(writer: ObjectWriter[EventType]) = new Builder[EventType](config.copy(_objectWriter = Some(writer)))
160+
def objectWriter(writer: ObjectWriter[EventType]) = {
161+
new Builder[EventType](config.copy(_objectWriter = Some(writer)))
162+
}
136163

137164
def objectWriter(writer: JavaObjectWriter[EventType]) = {
138165
new Builder[EventType](config.copy(_objectWriter = Some(ObjectWriter.wrap(writer))))
139166
}
140167

141-
def eventTimestamped(timeFn: EventType => DateTime) = new Builder[EventType](
142-
config.copy(
143-
_timestamper = Some(
144-
new Timestamper[EventType]
145-
{
146-
def timestamp(a: EventType) = timeFn(a)
147-
}
168+
def eventTimestamped(timeFn: EventType => DateTime) = {
169+
new Builder[EventType](
170+
config.copy(
171+
_timestamper = Some(
172+
new Timestamper[EventType]
173+
{
174+
def timestamp(a: EventType) = timeFn(a)
175+
}
176+
)
148177
)
149178
)
150-
)
179+
}
151180

152181
def buildBeam(): Beam[EventType] = {
153182
val things = config.buildAll()
154-
implicit val eventTimestamped = things.timestamper getOrElse {
155-
throw new IllegalArgumentException("WTF?! Should have had a Timestamperable event...")
156-
}
183+
implicit val eventTimestamped = things.timestamper
157184
val lifecycle = new Lifecycle
158185
val indexService = new IndexService(
159186
things.location.environment,
@@ -222,6 +249,7 @@ object DruidBeams
222249
_finagleRegistry: Option[FinagleRegistry] = None,
223250
_timekeeper: Option[Timekeeper] = None,
224251
_beamDecorateFn: Option[(Interval, Int) => Beam[EventType] => Beam[EventType]] = None,
252+
_partitioner: Option[Partitioner[EventType]] = None,
225253
_beamMergeFn: Option[Seq[Beam[EventType]] => Beam[EventType]] = None,
226254
_alertMap: Option[Dict] = None,
227255
_objectWriter: Option[ObjectWriter[EventType]] = None,
@@ -280,19 +308,30 @@ object DruidBeams
280308
val beamDecorateFn = _beamDecorateFn getOrElse {
281309
(interval: Interval, partition: Int) => (beam: Beam[EventType]) => beam
282310
}
283-
val beamMergeFn = _beamMergeFn getOrElse {
284-
(beams: Seq[Beam[EventType]]) => new HashPartitionBeam[EventType](beams.toIndexedSeq)
285-
}
286311
val alertMap = _alertMap getOrElse Map.empty
287312
val objectWriter = _objectWriter getOrElse {
288313
new JsonWriter[EventType]
289314
{
290-
protected def viaJsonGenerator(a: EventType, jg: JsonGenerator) {
315+
override protected def viaJsonGenerator(a: EventType, jg: JsonGenerator): Unit = {
291316
scalaObjectMapper.writeValue(jg, a)
292317
}
293318
}
294319
}
295-
val timestamper = _timestamper
320+
val timestamper = _timestamper getOrElse {
321+
throw new IllegalArgumentException("WTF?! Should have had a Timestamperable event...")
322+
}
323+
val beamMergeFn = _beamMergeFn getOrElse {
324+
val partitioner = _partitioner getOrElse {
325+
GenericTimeAndDimsPartitioner.create(
326+
timestamper,
327+
timestampSpec,
328+
rollup
329+
)
330+
}
331+
(beams: Seq[Beam[EventType]]) => {
332+
new MergingPartitioningBeam[EventType](partitioner, beams.toIndexedSeq)
333+
}
334+
}
296335
}
297336
}
298337

0 commit comments

Comments
 (0)