import spacy
nlp = spacy.load('en_core_web_sm')
# Tokenize words so they can be individually looped through
docs = list(nlp.pipe(yourdataframe['yourtextcolumn'].apply(str)))
# Iterate over parsed document objects
for doc in docs:
# remove stop words by only keeping non-stop words
tokens = [token for token in doc if not token.is_stop]
# Or you can remove stop words in your count vectorizer step
import spacy
nlp = spacy.load('en_core_web_sm')
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_df=0.95, min_df=2, stop_words=nlp.Defaults.stop_words)