From b162d36b9ed5dedd905029b8003a042e980d6790 Mon Sep 17 00:00:00 2001 From: cary-rowen Date: Tue, 27 Aug 2024 23:50:19 +0800 Subject: [PATCH] fix(epub): ensure date parsing handles non-string inputs gracefully Added type checking and conversion for the publication date metadata in EPUB files to prevent `TypeError` during date parsing. If the date cannot be parsed, a default value "Unknown Publication Date" is returned. --- bookworm/document/formats/epub.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bookworm/document/formats/epub.py b/bookworm/document/formats/epub.py index 1dadc087..e6a99dac 100644 --- a/bookworm/document/formats/epub.py +++ b/bookworm/document/formats/epub.py @@ -82,8 +82,15 @@ def metadata(self): desc = HTMLParser(info.get("description", "")).text() except: desc = None + date_value = info.get("date", "") + if not isinstance(date_value, str): + log.warning(f"Unexpected date format: {type(date_value)}. Converting to string.") + if isinstance(date_value, (int, float)): + date_value = str(date_value) + else: + date_value = "" if pubdate := dateparser.parse( - info.get("date", ""), + date_value, languages=[ self.language.two_letter_language_code, ], @@ -92,7 +99,7 @@ def metadata(self): pubdate, date_only=True, format="long", localized=True ) else: - publish_date = "" + publish_date = "Unknown Publication Date" return BookMetadata( title=self.epub.title, author=author.removeprefix("By ").strip(),