Search
 
SCRIPT & CODE EXAMPLE
 
CODE EXAMPLE FOR PYTHON

Scrapping tables in an HTML file with BeautifulSoup

import pandas as pd
import base64
import os
from bs4 import BeautifulSoup

file = 'file.html'
output = pd.DataFrame(columns=['Zone Entry', 'Date', 'Time', 'Zone Exit', 'Date', 'Time', 'Dwell', 'Fee'])
table_ids = ['ctl00_contentHolderBody_Table0', 'ctl00_contentHolderBody_Table1', 'ctl00_contentHolderBody_Table2']
all_rows = []
with open(file) as f:
    soup = BeautifulSoup(f, 'html.parser')
    for table in table_ids:
        table = soup.find(lambda tag: tag.name == 'table' and tag.has_attr('id') and tag['id'] == table) 
        rows = table.findAll(lambda tag: tag.name == 'td')
        for t in range(0, len(rows), 8):
            row = []
            for j in range(t,t+8):
                row.append(str(rows[j])[4:-5].strip())
            all_rows.append(row)

pd.DataFrame(all_rows).to_csv('output.csv')
 
PREVIOUS NEXT
Tagged: #Scrapping #tables #HTML #file #BeautifulSoup
ADD COMMENT
Topic
Name
3+7 =