# Create RDD from parallelize dataList = [("Java", 20000), ("Python", 100000), ("Scala", 3000)] rdd=spark.sparkContext.parallelize(dataList)