From 7488474f4bbc706274539de977a3b93bc1ce1fa8 Mon Sep 17 00:00:00 2001 From: ashibe89 Date: Mon, 7 Mar 2022 12:41:00 +0900 Subject: [PATCH] AmzonJP FIX to get series and black curtain tag --- src/calibre/ebooks/metadata/sources/amazon.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index a4a2690b12..3b278c4dc7 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -97,7 +97,7 @@ def parse_details_page(url, log, timeout, browser, domain): return if domain == 'jp': for a in root.xpath('//a[@href]'): - if 'black-curtain-redirect.html' in a.get('href'): + if ('black-curtain-redirect.html' in a.get('href')) or ('/black-curtain/save-eligibility/black-curtain' in a.get('href')): url = a.get('href') if url: if url.startswith('/'): @@ -733,7 +733,10 @@ def parse_series(self, root): a = series[0].xpath('descendant::a') if a: raw = self.tostring(a[0], encoding='unicode', method='text', with_tail=False) - m = re.search(r'(?:Book|Libro|Buch)\s+(?P[0-9.]+)\s+(?:of|de|von)\s+([0-9.]+)\s*:\s*(?P.+)', raw.strip()) + if self.domain == 'jp': + m = re.search(r'(?P[0-9.]+)\s*(?:巻|冊)\s*\(全\s*([0-9.]+)\s*(?:巻|冊)\):\s*(?P.+)', raw.strip()) + else: + m = re.search(r'(?:Book|Libro|Buch)\s+(?P[0-9.]+)\s+(?:of|de|von)\s+([0-9.]+)\s*:\s*(?P.+)', raw.strip()) if m is not None: ans = (m.group('series').strip(), float(m.group('index'))) -- 2.33.0.windows.2