Comment 6 for bug 1974105

Revision history for this message
Luke P (lkp80877984) wrote : Re: slow performance within Tag.index(child)

"wasn't designed for"
Well, to your credit, it does that pretty darn well!

Here (btw, I'm learning python, you're welcome to give other feedback):

```
class TestMonkeyPatchBs4(unittest.TestCase):
    def test_bs4(self):
        for seed in range(0, 1000):
            # phase 1: randomly build a series of operations
            # starting with abcdex and ending in an empty string
            ops = []
            random.seed(seed)
            current = "abcdex"
            proposed_ops = [
                "+f",
                "+g",
                "+h",
                "+i",
                "+j",
                "!?",
                "!?",
                "!?",
                "!?",
                "!?",
                "-?",
                "-?",
                "-?",
                "-?",
                "-?",
                "@",
                "@",
            ]
            # there's an edge case where we do 5 removals, then a replace, which would err if there were only 5 chars
            # this is vanishingly unlikely, but can happen, so an extra x char fixes this
            random.shuffle(proposed_ops)

            while proposed_ops:
                proposed_op = proposed_ops.pop()
                i = random.randrange(0, len(current))
                char = current[i]
                if proposed_op[0] == "+":
                    ops.append(("+", i))
                    current = current[:i] + proposed_op[1] + current[i:]
                elif proposed_op[0] == "-":
                    ops.append(("-", i, char))
                    current = current[:i] + current[i + 1 :]
                elif proposed_op[0] == "!":
                    ops.append(("!", i, char))
                    inv_char = char.lower() if char.isupper() else char.upper()
                    current = current[:i] + inv_char + current[i + 1 :]
                else:
                    ops.append(("@"))
                # print(ops[-1])
                # print(current)

            while len(current) > 0:
                i = random.randrange(0, len(current))
                char = current[i]
                ops.append(("-", i, char))
                current = current[:i] + current[i + 1 :]
                # print(ops[-1])
                # print(current)

            # phase two: run the html equivalent of the inverse those operations in reverse
            # we should always end with abcde, for a reliable assertion
            soup = bs4.BeautifulSoup("<html></html>", "html.parser")
            body = soup.new_tag("body")
            soup.html.append(body)
            for rop in reversed(ops):
                if rop[0] == "+":
                    # exercise the inverse of insert, extract
                    body.contents[rop[1]].extract()
                elif rop[0] == "-":
                    # exercise the inverse of substring removal, insert
                    p_tag = soup.new_tag("p")
                    p_tag.append(rop[2])
                    body.insert(rop[1], p_tag)
                elif rop[0] == "!":
                    # exercise the inverse of substring replacement, replace_with
                    p_tag = soup.new_tag("p")
                    p_tag.append(rop[2])
                    body.contents[rop[1]].replace_with(p_tag)
                else:
                    # exercise the index method
                    for i in range(0, len(body.contents)):
                        self.assertEqual(i, body.index(body.contents[i]))

            self.assertEqual(
                b"<html><body><p>a</p><p>b</p><p>c</p><p>d</p><p>e</p><p>x</p></body></html>",
                soup.encode(encoding="utf-8"),
            )
```

Cheers,
Luke