Comment 7 for bug 2052943

Revision history for this message
Chris Papademetrious (chrispitude) wrote :

I think I have something sort of working!

Using the 4.13 branch:

====
import bs4
from typing import (
    Any,
    List,
    Type,
)
from bs4.builder import TreeBuilder
from bs4.builder._htmlparser import HTMLParserTreeBuilder

default_builder: Type[TreeBuilder] = HTMLParserTreeBuilder

class UniqueAttributeValueList(List[str]):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def append(self, value: Any):
        if value not in self:
            super().append(value)

    def extend(self, values: List[Any]):
        for value in values:
            self.append(value)

    def remove(self, values: Any | List[Any]) -> None:
        if not isinstance(values, list):
            values = [values]
        for value in values:
            if value in self:
                super().remove(value)
        if not self:
            print("DELETE ATTRIBUTE???")
            # ???

builder = default_builder(
    multi_valued_attributes={"*": set(["class"])},
    attribute_value_list_class=UniqueAttributeValueList
)

markup = '<a class=""/>'
soup = bs4.BeautifulSoup(markup, builder=builder)
tag = soup.a

tag['class'].append('1')
print(tag)
tag['class'].append('1')
print(tag)
tag.attrs['class'].extend(['2', '2', '3'])
print(tag)
tag.attrs['class'].remove("2")
print(tag)
tag.attrs['class'].remove(["1", "3"])
print(tag)
====

gives the following output:

====
<a class="1"></a>
<a class="1"></a>
<a class="1 2 3"></a>
<a class="1 3"></a>
DELETE ATTRIBUTE???
<a class=""></a>
====

There are two things I need to figure out.

1. Right now it works only if the original HTML defines the attribute. If the attribute doesn't exist in the HTML, then I get a KeyError because there is no object in the attributes dictionary to operate on.

2. If I remove all the values, I still get class="", but somehow I want to remove the attribute completely.