#mefiz.com from tika import parser rawText = parser.from_file('January2019.pdf') rawList = rawText['content'].splitlines()