# here min_df is the mainimum frequency # and max_df is the maximum frequency count_vectorizer = CountVectorizer(min_df = 0.05, max_df = 0.9, stop_words = 'english')