rdd = sc.parallelize([1, 1, 2, 3, 5, 8]) result = rdd.groupBy(lambda x: x % 2).collect() sorted([(x, sorted(y)) for (x,y) in result]) # [(0, [2, 8]), (1, [1, 1, 3, 5])]