Spark操作SequenceFile 2015-04-24 15:30

样例

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
package net.cheyo

import org.apache.spark._
import org.apache.spark.SparkContext._

object SequenceFileTest {
  def main(args: Array[String]) {
    val conf = new SparkConf()
    conf.setAppName("SequenceFileTest")
    conf.setMaster("local[3]")
    val sc = new SparkContext(conf)
    val data = List(("ABC", 1), ("BCD", 2), ("CDE", 3), ("DEF", 4), ("FGH", 5))
    val rdd = sc.parallelize(data, 1)
    val dir = "file:///D:/sequenceFile-" + System.currentTimeMillis()
    rdd.saveAsSequenceFile(dir)
    val rdd2 = sc.sequenceFile[String, Int](dir + "/part-00000")
    println(rdd2.collect().map(elem => (elem._1 + ", " + elem._2)).toList)
    sc.stop()
  }
}
Tags: #Spark    Post on Spark