14. Building Amazon Kinesis Consumer app Amazon Kinesis
Datastream Store,Shue&Sort API, SDK KCL AWS Lambda Process
SparkKinesisStormkinesis-spout KCL StormSpark
15. Amazon Kinesis Datastream Store,Shue&Sort Process Run
SparkSQL on Kinesis Stream SQL
16. Run SparkSQL on Kinesis Stream import
org.apache.spark.streaming.kinesis.KinesisUtils! ! val
kinesisStreams = (0 until numStreams).map { i =>!
KinesisUtils.createStream(! ssc, streamName, endpointUrl,
kinesisCheckpointInterval,! InitialPositionInStream.LATEST,
StorageLevel.MEMORY_ONLY! )! }! val unionStreams =
ssc.union(kinesisStreams)! val words =
unionStreams.flatMap(...)!
17. import org.apache.spark.streaming.kinesis.KinesisUtils! !
val kinesisStreams = (0 until numStreams).map { i =>!
KinesisUtils.createStream(! ssc, streamName, endpointUrl,
kinesisCheckpointInterval,! InitialPositionInStream.LATEST,
StorageLevel.MEMORY_ONLY! )! }! ! val unionStreams =
ssc.union(kinesisStreams)! ! val words = unionStreams.flatMap(...)!
Run SparkSQL on Kinesis Stream Dstream DstreamUNION
DstreamTransformation
18. words.foreachRDD(foreachFunc = (rdd: RDD[String], time:
Time) => {! ! val sqlContext =
SQLContextSingleton.getInstance(rdd.sparkContext)! !
sqlContext.read.json(rdd).registerTempTable("words")! ! val
wordCountsDataFrame =! sqlContext.sql(select level, count(*) as
total ! from words! group by level)! ! println(s"========= $time
=========")! wordCountsDataFrame.show()! ! })! DStream Run SparkSQL
on Kinesis Stream JSON