A little less and a little more than you need, sorry:
data=myfile.readlines()
soup = BeautifulSoup(str(contents_of_that_file), 'html.parser')
text = soup.find_all(text=True)
blacklist = [
'[document]',
'noscript',
'header',
'html',
'meta',
'head',
'input',
'script',
'dc:date',
'title',
# there may be more elements you don't want, such as "style", etc.
]
for t in text:
if t.parent.name not in blacklist:
if "Hand wash" in t.strip():
print(">>" + t.strip() + "<<")
A little less and a little more than you need, sorry:
data=myfile. readlines( ) str(contents_ of_that_ file), 'html.parser') all(text= True)
soup = BeautifulSoup(
text = soup.find_
blacklist = [
'[document]',
'noscript',
'header',
'html',
'meta',
'head',
'input',
'script',
'dc:date',
'title',
# there may be more elements you don't want, such as "style", etc.
]
for t in text:
if t.parent.name not in blacklist:
if "Hand wash" in t.strip():
print(">>" + t.strip() + "<<")