From 32faf8a2b2f5cc0627d33126ecae96a8986b3e71 Mon Sep 17 00:00:00 2001 From: Elon Bing Date: Tue, 4 Sep 2018 23:58:58 +0200 Subject: [PATCH] Fix bug #1790299 (Launchpad): Quotes author shown with html code https://bugs.launchpad.net/variety/+bug/1790299 caused Goodreads quotes' authors to appear as "Author Name" --- data/plugins/quotes/GoodreadsSource.py | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/data/plugins/quotes/GoodreadsSource.py b/data/plugins/quotes/GoodreadsSource.py index f1d9f79..bee743d 100644 --- a/data/plugins/quotes/GoodreadsSource.py +++ b/data/plugins/quotes/GoodreadsSource.py @@ -94,24 +94,9 @@ class GoodreadsSource(IQuoteSource): for div in soup.find_all('div', 'quoteText'): logger.debug(lambda: "Parsing quote for div\n%s" % div) try: - quote_text = "" - first_a = div.find('a') - for elem in div.contents: - if elem == first_a: - break - else: - quote_text += str(elem) - quote_text = quote_text.replace('
', '\n').replace('
', '\n').replace('―', '').strip() - - if first_a: - author = first_a.contents[0] - link = "https://www.goodreads.com" + div.find('a')["href"] - i = div.find('i') - if i: - author = author + ', ' + (i.find('a') or i).contents[0] - else: - link = None - author = re.match(r'(\n\s+)+((.*)$)', quote_text, re.MULTILINE) + quote_text = "\n".join(div.find_all(text=True, recursive=False)).replace('―', '').strip() + author = div.find("span", attrs={"class": "authorOrTitle"}).string.strip().strip(',') + link = "https://www.goodreads.com" + div.find('a')["href"] quotes.append({"quote": quote_text, "author": author, "sourceName": "Goodreads", "link": link}) except Exception: logger.exception(lambda: "Could not extract Goodreads quote") -- 2.18.0