# read table data from PDF into dataframe and save it as csv or json
#tabula-py requires a java environment, so let's check the java environment on your machine.
!java -version
!pip install -q tabula-py
import tabula
tabula.environment_info()
import tabula
pdf_path = "https://github.com/chezou/tabula-py/raw/master/tests/resources/data.pdf"
dfs = tabula.read_pdf(pdf_path, stream=True)
# read_pdf returns list of DataFrames
print(len(dfs))
dfs[0]
# You can convert from pdf into JSON, CSV, TSV
tabula.convert_into(pdf_path, "test.json", output_format="json")
!cat test.json