DDLSchema = "user_id string, user_first_touch_timestamp long, email string" usersDF = (spark.read .option("sep", " ") .option("header", True) .schema(DDLSchema) .csv(usersCsvPath))