diff -durN ht-orig/__init__.py ht/__init__.py --- ht-orig/__init__.py 2014-08-08 07:52:28.000000000 +0100 +++ ht/__init__.py 2015-09-09 15:59:23.669991995 +0100 @@ -37,4 +37,27 @@ # Apply the changes ac = self.actual_plugin_ if ac is not None: - ac.changeConfig() \ No newline at end of file + ac.changeConfig() + + def cli_main(self, args): + from calibre.ebooks.oeb.polish.container import get_container + from calibre_plugins.hyphenatethis.config import DICT_ZIP, prefs + from calibre_plugins.hyphenatethis.hyphenator.hyphenator import Hyphenator + from calibre_plugins.hyphenatethis.workers.hypher import Hypher + from calibre_plugins.hyphenatethis.hyphenator.hutils import get_dict_by_lang, get_dict_left_righ_min + from calibre_plugins.hyphenatethis.workers.hjob import HyphenateThisThread + import argparse + + min_len = prefs['length'] + h = Hypher() + + for arg in args[1:]: + ebook = get_container(arg, tweak_mode=True) + + d = get_dict_by_lang('eng') + if ('eng.dic') in prefs['lroverride']: + lmin, rmin = prefs['lroverride']['eng.dic']['LEFTHYPHENMIN'], prefs['lroverride']['eng.dic']['RIGHTHYPHENMIN'] + else: + lmin, rmin = get_dict_left_righ_min(d) + h.hyphenate(ebook, Hyphenator(d, lmin, rmin), min_len) + ebook.commit() diff -durN ht-orig/changelog.txt ht/changelog.txt --- ht-orig/changelog.txt 2014-08-08 07:53:01.000000000 +0100 +++ ht/changelog.txt 2015-09-09 16:02:50.822993728 +0100 @@ -1,3 +1,6 @@ +[B]Version 0.0.9[/B] 2015-09-09 +Command-line hyphenation support. + [B]Version 0.0.8[/B] 2014-08-08 Get ready for Calibre 2 with Qt5! @@ -30,4 +33,4 @@ [B]Version 0.0.1[/B] 2013-03-18 The very first version of the plugin. -Soft-hyphenation of EPUB and AZW3. \ No newline at end of file +Soft-hyphenation of EPUB and AZW3. diff -durN ht-orig/config.py ht/config.py --- ht-orig/config.py 1970-01-01 01:00:00.000000000 +0100 +++ ht/config.py 2015-09-09 15:19:25.645143682 +0100 @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2014, Saulius P. ' +__docformat__ = 'restructuredtext en' + +import os +from calibre.utils.config import JSONConfig +from calibre.constants import config_dir + +DICT_ZIP = config_dir+os.sep+'plugins'+os.sep+'hyphthisdicts.zip' + +# This is where all preferences for this plugin will be stored +# Remember that this name (i.e. plugins/interface_demo) is also +# in a global namespace, so make it as unique as possible. +# You should always prefix your config file name with plugins/, +# so as to ensure you dont accidentally clobber a calibre config file + +prefs = JSONConfig('plugins'+os.sep+'hyphenatethis') + +# Set defaults +prefs.defaults['length'] = 5 +prefs.defaults['lroverride'] = {} +prefs.defaults['tags_ignore'] = 'h1, h2, h3' +prefs.defaults['tags_consider'] = '' +prefs.defaults['custom_col'] = ['', 'Hyphenated', 'Hyphens removed'] diff -durN ht-orig/gui/config.py ht/gui/config.py --- ht-orig/gui/config.py 2014-08-08 07:55:04.000000000 +0100 +++ ht/gui/config.py 2015-09-09 15:19:16.875855668 +0100 @@ -14,33 +14,15 @@ from PyQt4.Qt import QWidget, QPushButton, QGridLayout, QHBoxLayout, \ QVBoxLayout, QLabel, QLineEdit, QListWidget, QUrl, Qt, QCheckBox, QGroupBox -from calibre.utils.config import JSONConfig from calibre.gui2 import choose_files, error_dialog, open_url from calibre.utils.zipfile import ZipFile from lxml import etree from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER -from calibre.constants import config_dir from calibre.utils.localization import canonicalize_lang, get_language from calibre.ptempfile import PersistentTemporaryFile +from calibre.plugins.hyphenatethis.config import DICT_ZIP, prefs OOR_NS = 'http://openoffice.org/2001/registry' -DICT_ZIP = config_dir+os.sep+'plugins'+os.sep+'hyphthisdicts.zip' - -# This is where all preferences for this plugin will be stored -# Remember that this name (i.e. plugins/interface_demo) is also -# in a global namespace, so make it as unique as possible. -# You should always prefix your config file name with plugins/, -# so as to ensure you dont accidentally clobber a calibre config file - -prefs = JSONConfig('plugins'+os.sep+'hyphenatethis') - -# Set defaults -prefs.defaults['length'] = 5 -prefs.defaults['lroverride'] = {} -prefs.defaults['tags_ignore'] = 'h1, h2, h3' -prefs.defaults['tags_consider'] = '' -prefs.defaults['custom_col'] = ['', 'Hyphenated', 'Hyphens removed'] - class HyphenateThisConfigWidget(QWidget): @@ -339,4 +321,4 @@ prefs['tags_ignore'] = str(self.ti.text()) prefs['tags_consider'] = str(self.tc.text()) prefs['custom_col'] = [str(self.col.text()), unicode(str(self.hyvalue.text())), unicode(str(self.remvalue.text()))] - \ No newline at end of file + diff -durN ht-orig/gui/prompt.py ht/gui/prompt.py --- ht-orig/gui/prompt.py 2014-08-08 08:04:20.000000000 +0100 +++ ht/gui/prompt.py 2015-09-08 13:38:51.212045573 +0100 @@ -12,7 +12,6 @@ from PyQt5.Qt import QModelIndex, QItemSelectionModel, QDialog, Qt, QVariant, QVBoxLayout, QLabel, QListView, QDialogButtonBox, QSize except ImportError: from PyQt4.Qt import QModelIndex, QItemSelectionModel, QDialog, Qt, QVariant, QVBoxLayout, QLabel, QListView, QDialogButtonBox, QSize -#from calibre_plugins.savetoformat.gui.config import prefs from calibre.gui2.dialogs.select_formats import Formats class SelectHyphenateFormats(QDialog): diff -durN ht-orig/hyphenatethisaction.py ht/hyphenatethisaction.py --- ht-orig/hyphenatethisaction.py 2014-08-08 07:53:38.000000000 +0100 +++ ht/hyphenatethisaction.py 2015-09-08 13:36:40.612729924 +0100 @@ -10,7 +10,7 @@ from calibre.gui2 import error_dialog, Dispatcher from calibre_plugins.hyphenatethis.workers.hjob import HyphenateThisThread import os -from calibre_plugins.hyphenatethis.gui.config import DICT_ZIP +from calibre_plugins.hyphenatethis.config import DICT_ZIP __license__ = 'GPL 3' __copyright__ = '2014, Saulius P. ' @@ -187,4 +187,4 @@ def _endRemoving(self, job): if job.failed is True: error_dialog(self.gui, _('Removing hyphens failed'), job.description, show=True) - \ No newline at end of file + diff -durN ht-orig/hyphenator/hutils.py ht/hyphenator/hutils.py --- ht-orig/hyphenator/hutils.py 2013-03-28 21:58:17.000000000 +0000 +++ ht/hyphenator/hutils.py 2015-09-08 13:39:33.878555649 +0100 @@ -4,7 +4,7 @@ __copyright__ = '2013, Saulius P. ' __docformat__ = 'restructuredtext en' -from calibre_plugins.hyphenatethis.gui.config import DICT_ZIP +from calibre_plugins.hyphenatethis.config import DICT_ZIP from calibre.utils.zipfile import ZipFile from calibre.ptempfile import PersistentTemporaryFile @@ -21,7 +21,7 @@ return None def get_dict_left_righ_min(path): - from calibre_plugins.hyphenatethis.gui.config import prefs + from calibre_plugins.hyphenatethis.config import prefs dct = open(path).read(64) regl = re.compile(HYP_REGEX_LEFT_MIN) regr = re.compile(HYP_REGEX_RIGHT_MIN) @@ -33,4 +33,4 @@ rmin = int(regr.search(dct).group(1)) except: rmin = 2 - return (lmin, rmin) \ No newline at end of file + return (lmin, rmin) diff -durN ht-orig/workers/hjob.py ht/workers/hjob.py --- ht-orig/workers/hjob.py 2014-08-08 07:53:53.000000000 +0100 +++ ht/workers/hjob.py 2015-09-09 15:57:44.718113913 +0100 @@ -1,10 +1,8 @@ # -*- coding: utf-8 -*- -import re from calibre.ebooks.oeb.polish.container import get_container -from lxml import etree -from calibre.ebooks.oeb.parse_utils import XHTML_NS import traceback from calibre.ebooks.metadata.book.base import Metadata +from hypher import Hypher __license__ = 'GPL 3' __copyright__ = '2014, Saulius P. ' @@ -29,7 +27,7 @@ self.gui = gui self.job_manager = self.gui.job_manager self._run = True - self._r = re.compile(r"\w+|[^\w]", re.UNICODE) + self.hypher = Hypher() def _hyphenate(self, job): (idItem, fmt, mi, hyphs) = job.args @@ -39,59 +37,25 @@ self._updateJob(job, 0.02, _('Exploding the book...')) ebook = get_container(src) h = hyphs[mi.language] - - from calibre_plugins.hyphenatethis.gui.config import prefs + + from calibre_plugins.hyphenatethis.config import prefs min_len = prefs['length'] - - spines = [] - for spine in ebook.spine_items: - spines.append(spine) - - l = len(spines)+1 - p = 1 - - for spine in spines: - parsed = ebook.parsed(ebook.abspath_to_name(spine)) - ilist, q = self._createQuery() - try: - texts = etree.XPath(q, namespaces={'h':XHTML_NS})(parsed) - except: - job.log_write(traceback.format_exc()) - job.description = self.PARSE_ERROR - raise self.PARSE_ERROR - for t in texts: - parent = t.getparent() - if parent.tag in ilist: continue - newt = '' - wlist = self._r.findall(t) - for w in wlist: - if len(w) < min_len or u'-' in w: - newt += w - else: - newt += h.inserted(w).replace('-', u'\u00AD') - - if t.is_text: - parent.text = newt - elif t.is_tail: - parent.tail = newt - - with open(spine, 'w') as f: - f.write(etree.tostring(parsed, xml_declaration=True, pretty_print=True, encoding='UTF-8')) - - self._updateJob(job, float(p/l), _('Hyphenating...')) - p += 1 - - suff = '_hyph.'+fmt - out = PersistentTemporaryFile(suffix=suff) - ebook.commit(outpath=out.name) - - with open(out.name, 'rb') as f: - self.gui.library_view.model().db.add_format(idItem, fmt, f, index_is_id=True) - + + try: + self.hypher.hyphenate(ebook, h, min_len, self, job) + suff = '_hyph.'+fmt + out = PersistentTemporaryFile(suffix=suff) + ebook.commit(outpath=out.name) + with open(out.name, 'rb') as f: + self.gui.library_view.model().db.add_format(idItem, fmt, f, index_is_id=True) + except: + job.log_write(traceback.format_exc()) + job.description = self.PARSE_ERROR + raise self.PARSE_ERROR + self._setCustomCol(prefs, 1, idItem, job) self._updateJob(job, 1.0, _('Book hyphenated.')) - def _removehyphens(self, job): (idItem, fmt) = job.args @@ -99,48 +63,21 @@ src = self.gui.library_view.model().db.format(idItem, fmt, index_is_id=True, as_path=True) self._updateJob(job, 0.02, _('Exploding the book...')) + ebook = get_container(src) - from calibre_plugins.hyphenatethis.gui.config import prefs - - spines = [] - for spine in ebook.spine_items: - spines.append(spine) - - l = len(spines)+1 - p = 1 - - for spine in spines: - parsed = ebook.parsed(ebook.abspath_to_name(spine)) - ilist, q = self._createQuery() - try: - texts = etree.XPath(q, namespaces={'h':XHTML_NS})(parsed) - except: - job.log_write(traceback.format_exc()) - job.description = self.PARSE_ERROR - raise self.PARSE_ERROR - for t in texts: - parent = t.getparent() - if parent.tag in ilist: continue - newt = t.replace(u'\u00AD', '') - if t.is_text: - parent.text = newt - elif t.is_tail: - parent.tail = newt - - with open(spine, 'w') as f: - f.write(etree.tostring(parsed, xml_declaration=True, pretty_print=True, encoding='UTF-8')) - - self._updateJob(job, float(p/l), _('Removing hyphens...')) - p += 1 - - suff = '_remhyph.'+fmt - out = PersistentTemporaryFile(suffix=suff) - ebook.commit(outpath=out.name) - - with open(out.name, 'rb') as f: - self.gui.library_view.model().db.add_format(idItem, fmt, f, index_is_id=True) - + try: + self.hypher.dehyphenate(ebook, self, job) + suff = '_remhyph.'+fmt + out = PersistentTemporaryFile(suffix=suff) + ebook.commit(outpath=out.name) + with open(out.name, 'rb') as f: + self.gui.library_view.model().db.add_format(idItem, fmt, f, index_is_id=True) + except: + job.log_write(traceback.format_exc()) + job.description = self.PARSE_ERROR + raise self.PARSE_ERROR + self._setCustomCol(prefs, 2, idItem, job) self._updateJob(job, float(1.0), _('Hyphens removed')) @@ -162,26 +99,6 @@ job.log_write('Value could not be set to column %s.' % prefs['custom_col'][0]) - def _createQuery(self): - from calibre_plugins.hyphenatethis.gui.config import prefs - if prefs['tags_ignore'].strip() == '' and prefs['tags_consider'].strip() == '': - return [], '//h:body//text()' - ignore = '' - ilist = [] - if prefs['tags_ignore'].strip() != '': - for i in prefs['tags_ignore'].split(','): - ignore += " and local-name() != '%s'" % i.strip() - ilist.append(("{%s}"+i.strip()) % XHTML_NS) - ignore = ignore[5:] - consider = '' - if prefs['tags_consider'].strip() != '': - for i in prefs['tags_consider'].split(','): - consider += " or local-name() = '%s'" % i.strip() - consider = "%s" % consider[4:] - if ignore != '': - consider = ' and (%s)' % consider - return ilist, "//h:body//*[%s%s]//text()" % (ignore, consider) - def hyphenate(self, callback, idItem, fmt, mi, hyphs): description = _('Hyphenation of the book "%s" starting.') % mi.title job = HyphenateThisJob(callback, description, self.job_manager, idItem, fmt, mi, hyphs) @@ -332,4 +249,4 @@ self._log_file.write(what) class DecodingError(Exception): - pass \ No newline at end of file + pass diff -durN ht-orig/workers/hypher.py ht/workers/hypher.py --- ht-orig/workers/hypher.py 1970-01-01 01:00:00.000000000 +0100 +++ ht/workers/hypher.py 2015-09-09 15:58:05.066443611 +0100 @@ -0,0 +1,88 @@ +import re +from lxml import etree +from calibre.ebooks.oeb.parse_utils import XHTML_NS + +class Hypher(): + + def hyphenate(self, ebook, h, min_len, updater = None, job = None): + word_r = re.compile(r"\w+|[^\w]", re.UNICODE) + spines = [] + for spine in ebook.spine_items: + spines.append(spine) + + l = len(spines)+1 + p = 1 + + for spine in spines: + parsed = ebook.parsed(ebook.abspath_to_name(spine)) + ilist, q = self._createQuery() + texts = etree.XPath(q, namespaces={'h':XHTML_NS})(parsed) + for t in texts: + parent = t.getparent() + if parent.tag in ilist: continue + newt = '' + wlist = word_r.findall(t) + for w in wlist: + if len(w) < min_len or u'-' in w: + newt += w + else: + newt += h.inserted(w).replace('-', u'\u00AD') + + if t.is_text: + parent.text = newt + elif t.is_tail: + parent.tail = newt + + with open(spine, 'w') as f: + f.write(etree.tostring(parsed, xml_declaration=True, pretty_print=True, encoding='UTF-8')) + if updater and job: + updater._updateJob(job, float(p/l), _('Hyphenating...')) + p += 1 + + def dehyphenate(self, ebook, updater = None, job = None): + spines = [] + for spine in ebook.spine_items: + spines.append(spine) + + l = len(spines)+1 + p = 1 + + for spine in spines: + parsed = ebook.parsed(ebook.abspath_to_name(spine)) + ilist, q = self._createQuery() + texts = etree.XPath(q, namespaces={'h':XHTML_NS})(parsed) + for t in texts: + parent = t.getparent() + if parent.tag in ilist: continue + newt = t.replace(u'\u00AD', '') + if t.is_text: + parent.text = newt + elif t.is_tail: + parent.tail = newt + + with open(spine, 'w') as f: + f.write(etree.tostring(parsed, xml_declaration=True, pretty_print=True, encoding='UTF-8')) + + if updater and job: + updater._updateJob(job, float(p/l), _('Removing hyphens...')) + p += 1 + + def _createQuery(self): + from calibre_plugins.hyphenatethis.config import prefs + if prefs['tags_ignore'].strip() == '' and prefs['tags_consider'].strip() == '': + return [], '//h:body//text()' + ignore = '' + ilist = [] + if prefs['tags_ignore'].strip() != '': + for i in prefs['tags_ignore'].split(','): + ignore += " and local-name() != '%s'" % i.strip() + ilist.append(("{%s}"+i.strip()) % XHTML_NS) + ignore = ignore[5:] + consider = '' + if prefs['tags_consider'].strip() != '': + for i in prefs['tags_consider'].split(','): + consider += " or local-name() = '%s'" % i.strip() + consider = "%s" % consider[4:] + if ignore != '': + consider = ' and (%s)' % consider + return ilist, "//h:body//*[%s%s]//text()" % (ignore, consider)