class UniqueAttributeValueList(List[str]):
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs)
def append(self, value: Any):
if value not in self: super().append(value)
def extend(self, values: List[Any]):
for value in values: self.append(value)
def remove(self, values: Any | List[Any]) -> None:
if not isinstance(values, list):
values = [values]
for value in values:
if value in self: super().remove(value)
if not self: print("DELETE ATTRIBUTE???")
# ???
1. Right now it works only if the original HTML defines the attribute. If the attribute doesn't exist in the HTML, then I get a KeyError because there is no object in the attributes dictionary to operate on.
2. If I remove all the values, I still get class="", but somehow I want to remove the attribute completely.
I think I have something sort of working!
Using the 4.13 branch:
==== _htmlparser import HTMLParserTreeB uilder
import bs4
from typing import (
Any,
List,
Type,
)
from bs4.builder import TreeBuilder
from bs4.builder.
default_builder: Type[TreeBuilder] = HTMLParserTreeB uilder
class UniqueAttribute ValueList( List[str] ):
super( ).__init_ _(*args, **kwargs)
def __init__(self, *args, **kwargs):
def append(self, value: Any):
super( ).append( value)
if value not in self:
def extend(self, values: List[Any]):
self. append( value)
for value in values:
def remove(self, values: Any | List[Any]) -> None:
super( ).remove( value)
print( "DELETE ATTRIBUTE???")
if not isinstance(values, list):
values = [values]
for value in values:
if value in self:
if not self:
# ???
builder = default_builder( valued_ attributes= {"*": set(["class"])}, value_list_ class=UniqueAtt ributeValueList
multi_
attribute_
)
markup = '<a class=""/>' up(markup, builder=builder)
soup = bs4.BeautifulSo
tag = soup.a
tag['class' ].append( '1') ].append( '1') 'class' ].extend( ['2', '2', '3']) 'class' ].remove( "2") 'class' ].remove( ["1", "3"])
print(tag)
tag['class'
print(tag)
tag.attrs[
print(tag)
tag.attrs[
print(tag)
tag.attrs[
print(tag)
====
gives the following output:
====
<a class="1"></a>
<a class="1"></a>
<a class="1 2 3"></a>
<a class="1 3"></a>
DELETE ATTRIBUTE???
<a class=""></a>
====
There are two things I need to figure out.
1. Right now it works only if the original HTML defines the attribute. If the attribute doesn't exist in the HTML, then I get a KeyError because there is no object in the attributes dictionary to operate on.
2. If I remove all the values, I still get class="", but somehow I want to remove the attribute completely.