from bs4 import BeautifulSoup
import requests
page = requests.get("https://www.google.dz/search?q=see")
soup = BeautifulSoup(page.content)
links = soup.findAll("a")
for link in links:
if link['href'].startswith('/url?q='):
print (link['href'].replace('/url?q=',''))
#Scrapes Python's URL, version number and logo from its Wikipedia page:
# $ pip3 install requests beautifulsoup4
import requests, bs4, os, sys
URL = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
try:
html = requests.get(URL).text
document = bs4.BeautifulSoup(html, 'html.parser')
table = document.find('table', class_='infobox vevent')
python_url = table.find('th', text='Website').next_sibling.a['href']
version = table.find('th', text='Stable release').next_sibling.strings.__next__()
logo_url = table.find('img')['src']
logo = requests.get(f'https:{logo_url}').content
filename = os.path.basename(logo_url)
with open(filename, 'wb') as file:
file.write(logo)
print(f'{python_url}, {version}, file://{os.path.abspath(filename)}')
except requests.exceptions.ConnectionError:
print("You've got problems with connection.", file=sys.stderr)
#import requests module to send request to the website
import requests
#from bs4 module import BeautifulSoup class
from bs4 import BeautifulSoup
r = requests.get(url=<website_url_here>).content
#server will send response and content is stored in 'r' object
#Use BeautifulSoup class with 'html.prser' or 'lxml'
soup = BeautifulSoup(r, 'html.parser')