From 2d0af0ab496b84c97e1564d97eb5415f9ec6236b Mon Sep 17 00:00:00 2001
From: Wulf Rajek <40003252+AnonTester@users.noreply.github.com>
Date: Tue, 5 Apr 2022 01:26:35 +0100
Subject: [PATCH] Add pubdate, publisher and identifiers metadata #2163

---
 cps/comic.py     |  8 +++++--
 cps/constants.py |  2 +-
 cps/editbooks.py | 17 ++++++++++++++-
 cps/epub.py      | 22 ++++++++++++++++++--
 cps/fb2.py       |  4 +++-
 cps/uploader.py  | 54 +++++++-----------------------------------------
 6 files changed, 53 insertions(+), 54 deletions(-)

diff --git a/cps/comic.py b/cps/comic.py
index 2549579e..8f3a6f61 100644
--- a/cps/comic.py
+++ b/cps/comic.py
@@ -130,7 +130,9 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r
                 series=loaded_metadata.series or "",
                 series_id=loaded_metadata.issue or "",
                 languages=loaded_metadata.language,
-                publisher="")
+                publisher="",
+                pubdate="",
+                identifiers=[])
 
     return BookMeta(
         file_path=tmp_file_path,
@@ -143,4 +145,6 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r
         series="",
         series_id="",
         languages="",
-        publisher="")
+        publisher="",
+        pubdate="",
+        identifiers=[])
diff --git a/cps/constants.py b/cps/constants.py
index f40d16b0..762336dd 100644
--- a/cps/constants.py
+++ b/cps/constants.py
@@ -152,7 +152,7 @@ def selected_roles(dictionary):
 
 # :rtype: BookMeta
 BookMeta = namedtuple('BookMeta', 'file_path, extension, title, author, cover, description, tags, series, '
-                                  'series_id, languages, publisher')
+                                  'series_id, languages, publisher, pubdate, identifiers')
 
 STABLE_VERSION = {'version': '0.6.19 Beta'}
 
diff --git a/cps/editbooks.py b/cps/editbooks.py
index c07e5d24..db87d891 100755
--- a/cps/editbooks.py
+++ b/cps/editbooks.py
@@ -983,8 +983,13 @@ def create_book_on_upload(modify_date, meta):
     # combine path and normalize path from Windows systems
     path = os.path.join(author_dir, title_dir).replace('\\', '/')
 
+    if meta.pubdate != "":
+        pubdate = datetime.strptime(meta.pubdate[:10], "%Y-%m-%d")
+    else:
+        pubdate = datetime(101, 1, 1)
+
     # Calibre adds books with utc as timezone
-    db_book = db.Books(title, "", sort_authors, datetime.utcnow(), datetime(101, 1, 1),
+    db_book = db.Books(title, "", sort_authors, datetime.utcnow(), pubdate,
                        '1', datetime.utcnow(), path, meta.cover, db_author, [], "")
 
     modify_date |= modify_database_object(input_authors, db_book.authors, db.Authors, calibre_db.session,
@@ -1017,6 +1022,16 @@ def create_book_on_upload(modify_date, meta):
 
     # flush content, get db_book.id available
     calibre_db.session.flush()
+
+    # Handle identifiers now that db_book.id is available
+    identifier_list = []
+    for type_key, type_value in meta.identifiers:
+        identifier_list.append(db.Identifiers(type_value, type_key, db_book.id))
+    modification, warning = modify_identifiers(identifier_list, db_book.identifiers, calibre_db.session)
+    if warning:
+        flash(_("Identifiers are not Case Sensitive, Overwriting Old Identifier"), category="warning")
+    modify_date |= modification
+
     return db_book, input_authors, title_dir, renamed_authors
 
 
diff --git a/cps/epub.py b/cps/epub.py
index 80c12c35..d358d038 100644
--- a/cps/epub.py
+++ b/cps/epub.py
@@ -63,13 +63,15 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
 
     epub_metadata = {}
 
-    for s in ['title', 'description', 'creator', 'language', 'subject']:
+    for s in ['title', 'description', 'creator', 'language', 'subject', 'publisher', 'date']:
         tmp = p.xpath('dc:%s/text()' % s, namespaces=ns)
         if len(tmp) > 0:
             if s == 'creator':
                 epub_metadata[s] = ' & '.join(split_authors(tmp))
             elif s == 'subject':
                 epub_metadata[s] = ', '.join(tmp)
+            elif s == 'date':
+                epub_metadata[s] = tmp[0][:10]
             else:
                 epub_metadata[s] = tmp[0]
         else:
@@ -78,6 +80,12 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
     if epub_metadata['subject'] == 'Unknown':
         epub_metadata['subject'] = ''
 
+    if epub_metadata['publisher'] == u'Unknown':
+        epub_metadata['publisher'] = ''
+
+    if epub_metadata['date'] == u'Unknown':
+        epub_metadata['date'] = ''
+
     if epub_metadata['description'] == u'Unknown':
         description = tree.xpath("//*[local-name() = 'description']/text()")
         if len(description) > 0:
@@ -92,6 +100,14 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
 
     cover_file = parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path)
 
+    identifiers = []
+    for node in p.xpath('dc:identifier', namespaces=ns):
+        identifier_name=node.attrib.values()[-1];
+        identifier_value=node.text;
+        if identifier_name in ('uuid','calibre'):
+            continue;
+        identifiers.append( [identifier_name, identifier_value] )
+
     if not epub_metadata['title']:
         title = original_file_name
     else:
@@ -108,7 +124,9 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
         series=epub_metadata['series'].encode('utf-8').decode('utf-8'),
         series_id=epub_metadata['series_id'].encode('utf-8').decode('utf-8'),
         languages=epub_metadata['language'],
-        publisher="")
+        publisher=epub_metadata['publisher'].encode('utf-8').decode('utf-8'),
+        pubdate=epub_metadata['date'],
+        identifiers=identifiers)
 
 
 def parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path):
diff --git a/cps/fb2.py b/cps/fb2.py
index 21586736..c4b89fd6 100644
--- a/cps/fb2.py
+++ b/cps/fb2.py
@@ -77,4 +77,6 @@ def get_fb2_info(tmp_file_path, original_file_extension):
         series="",
         series_id="",
         languages="",
-        publisher="")
+        publisher="",
+        pubdate="",
+        identifiers=[])
diff --git a/cps/uploader.py b/cps/uploader.py
index 992d188c..7a0359b3 100644
--- a/cps/uploader.py
+++ b/cps/uploader.py
@@ -107,52 +107,10 @@ def default_meta(tmp_file_path, original_file_name, original_file_extension):
         series="",
         series_id="",
         languages="",
-        publisher="")
-
-
-def parse_xmp(pdf_file):
-    """
-    Parse XMP Metadata and prepare for BookMeta object
-    """
-    try:
-        xmp_info = pdf_file.getXmpMetadata()
-    except Exception as ex:
-        log.debug('Can not read XMP metadata {}'.format(ex))
-        return None
-
-    if xmp_info:
-        try:
-            xmp_author = xmp_info.dc_creator # list
-        except AttributeError:
-            xmp_author = ['']
-
-        if xmp_info.dc_title:
-            xmp_title = xmp_info.dc_title['x-default']
-        else:
-            xmp_title = ''
-
-        if xmp_info.dc_description:
-            xmp_description = xmp_info.dc_description['x-default']
-        else:
-            xmp_description = ''
-
-        languages = []
-        try:
-            for i in xmp_info.dc_language:
-                #calibre-web currently only takes one language.
-                languages.append(isoLanguages.get_lang3(i))
-        except AttributeError:
-            languages.append('')
-
-        xmp_tags = ', '.join(xmp_info.dc_subject)
-        xmp_publisher = ', '.join(xmp_info.dc_publisher)
-
-        return {'author': xmp_author,
-                    'title': xmp_title,
-                    'subject': xmp_description,
-                    'tags': xmp_tags, 'languages': languages,
-                    'publisher': xmp_publisher
-                    }
+        publisher="",
+        pubdate="",
+        identifiers=[]
+        )
 
 
 def parse_xmp(pdf_file):
@@ -251,7 +209,9 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
         series="",
         series_id="",
         languages=','.join(languages),
-        publisher=publisher)
+        publisher=publisher,
+        pubdate="",
+        identifiers=[])
 
 
 def pdf_preview(tmp_file_path, tmp_dir):