# get the first non-whitespace thing in <p>
this_thing = soup.find('p').find(match, recursive=False)
# print all following non-whitespace sibling elements in <p>
while this_thing:
next_thing = this_thing.find_next_sibling(match)
print(f"{repr(this_thing)} is followed by {repr(next_thing)}")
this_thing = next_thing
Take a look at https:/ /code.launchpad .net/~leonardr/ beautifulsoup/ +git/beautifuls oup/+merge/ 459082. I'd want to play around with terminology, and make the base class capable of being passed into the BeautifulSoup constructor as parse_only. But I'm pretty happy with this overall. It would let you write code that looked like this:
from bs4 import BeautifulSoup, NavigableString
from bs4.strainer import ElementMatcher
def non_whitespace( element) : element, NavigableString) and element. text.isspace( ))
return not (isinstance(
match = ElementMatcher( non_whitespace)
html_doc = """ html_doc, 'lxml')
<p>
<b>bold</b>
<i>italic</i>
and
<u>underline</u>
<br />
</p>
"""
soup = BeautifulSoup(
# get the first non-whitespace thing in <p> 'p').find( match, recursive=False)
this_thing = soup.find(
# print all following non-whitespace sibling elements in <p> find_next_ sibling( match) f"{repr( this_thing) } is followed by {repr(next_ thing)} ")
while this_thing:
next_thing = this_thing.
print(
this_thing = next_thing