From d957b2d20ff248e6dd9a631e57ab84df3edc7990 Mon Sep 17 00:00:00 2001 From: Ozzieisaacs Date: Wed, 9 Dec 2020 11:04:29 +0100 Subject: [PATCH] Fix detect correct encoding for txt-reader --- cps/gdriveutils.py | 31 ++++++++++++++++++++++--------- cps/web.py | 11 +++++++++-- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/cps/gdriveutils.py b/cps/gdriveutils.py index 1c5dec05..7a86e020 100644 --- a/cps/gdriveutils.py +++ b/cps/gdriveutils.py @@ -20,6 +20,7 @@ from __future__ import division, print_function, unicode_literals import os import json import shutil +import chardet from flask import Response, stream_with_context from sqlalchemy import create_engine @@ -30,16 +31,25 @@ from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.exc import OperationalError, InvalidRequestError try: - from pydrive.auth import GoogleAuth - from pydrive.drive import GoogleDrive - from pydrive.auth import RefreshError from apiclient import errors from httplib2 import ServerNotFoundError - gdrive_support = True importError = None -except ImportError as err: - importError = err + gdrive_support = True +except ImportError as e: + importError = e gdrive_support = False +try: + from pydrive2.auth import GoogleAuth + from pydrive2.drive import GoogleDrive + from pydrive2.auth import RefreshError +except ImportError as err: + try: + from pydrive.auth import GoogleAuth + from pydrive.drive import GoogleDrive + from pydrive.auth import RefreshError + except ImportError as err: + importError = err + gdrive_support = False from . import logger, cli, config from .constants import CONFIG_DIR as _CONFIG_DIR @@ -545,21 +555,24 @@ def partial(total_byte_len, part_size_limit): return s # downloads files in chunks from gdrive -def do_gdrive_download(df, headers): +def do_gdrive_download(df, headers, convert_encoding=False): total_size = int(df.metadata.get('fileSize')) download_url = df.metadata.get('downloadUrl') s = partial(total_size, 1024 * 1024) # I'm downloading BIG files, so 100M chunk size is fine for me - def stream(): + def stream(convert_encoding): for byte in s: headers = {"Range": 'bytes=%s-%s' % (byte[0], byte[1])} resp, content = df.auth.Get_Http_Object().request(download_url, headers=headers) if resp.status == 206: + if convert_encoding: + result = chardet.detect(content) + content = content.decode(result['encoding']).encode('utf-8') yield content else: log.warning('An error occurred: %s', resp) return - return Response(stream_with_context(stream()), headers=headers) + return Response(stream_with_context(stream(convert_encoding)), headers=headers) _SETTINGS_YAML_TEMPLATE = """ diff --git a/cps/web.py b/cps/web.py index 5fbb39af..33bb9a91 100644 --- a/cps/web.py +++ b/cps/web.py @@ -29,13 +29,14 @@ import mimetypes import traceback import binascii import re +import chardet # dependency of requests from babel.dates import format_date from babel import Locale as LC from babel.core import UnknownLocaleError from flask import Blueprint, jsonify from flask import render_template, request, redirect, send_from_directory, make_response, g, flash, abort, url_for -from flask import session as flask_session +from flask import session as flask_session, send_file from flask_babel import gettext as _ from flask_login import login_user, logout_user, login_required, current_user, confirm_login from sqlalchemy.exc import IntegrityError, InvalidRequestError, OperationalError @@ -1495,8 +1496,14 @@ def serve_book(book_id, book_format, anyname): headers = Headers() headers["Content-Type"] = mimetypes.types_map.get('.' + book_format, "application/octet-stream") df = getFileFromEbooksFolder(book.path, data.name + "." + book_format) - return do_gdrive_download(df, headers) + return do_gdrive_download(df, headers, (book_format.upper() == 'TXT')) else: + if book_format.upper() == 'TXT': + rawdata = open(os.path.join(config.config_calibre_dir, book.path, data.name + "." + book_format), + "rb").read() + result = chardet.detect(rawdata) + return make_response( + rawdata.decode(result['encoding']).encode('utf-8')) return send_from_directory(os.path.join(config.config_calibre_dir, book.path), data.name + "." + book_format)