from nltk.tokenize import word_tokenize from nltk.util import ngrams def get_ngrams(text, n ): n_grams = ngrams(word_tokenize(text), n) return [ '_'.join(grams) for grams in n_grams] get_ngrams("this is a sentence", 2)