from bs4 import BeautifulSoup
from lxml import etree
import requests
# Test url and xpath
URL = "https://en.wikipedia.org/wiki/Nike,_Inc."
xpath_address = """//*[@id="firstHeading"]"""
response = requests.get(URL)
soup = BeautifulSoup(response.content, "html.parser")
dom = etree.HTML(str(soup))
print(dom.xpath(xpath_address)[0].text)
from bs4 import BeautifulSoup
from lxml import etree
import requests
URL = "https://en.wikipedia.org/wiki/Nike,_Inc."
HEADERS = ({'User-Agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
'Accept-Language': 'en-US, en;q=0.5'})
webpage = requests.get(URL, headers=HEADERS)
soup = BeautifulSoup(webpage.content, "html.parser")
dom = etree.HTML(str(soup))
print(dom.xpath('//*[@id="firstHeading"]')[0].text)
try:
# Python 2
from urllib2 import urlopen
except ImportError:
from urllib.request import urlopen
from lxml import etree
url = "http://www.example.com/servlet/av/ResultTemplate=AVResult.html"
response = urlopen(url)
htmlparser = etree.HTMLParser()
tree = etree.parse(response, htmlparser)
tree.xpath(xpathselector)