Skip to content
Commits on Source (2)
ocrizer (0.4.2) UNRELEASED; urgency=medium
[ Colomban Wendling ]
* ocrize.py: fix error recovery when converting a PDF to an image fails.
* ocrengines/finereader_ocrizer.py: add post-processing to make the
output ODF files more readable with Orca.
* Fix converting TXT to ODT when LibreOffice is already running.
This fixes the Tesseract backend with the default option when
LibreOffice is already running.
[ Alex ARNAUD ]
* ocrize.py: display OCR engine on each call on the console, not only
when scanning a document
-- Colomban Wendling <cwendling@hypra.fr> Tue, 02 Oct 2018 17:17:41 +0200
......
......@@ -103,7 +103,6 @@ class OCRizer():
if not self.args.scan_document:
return
self.logger.info(_('OCR Engine : ' + self.args.ocr_engine))
self.send_notification(_("Attempting to scan a new image"))
sane_version = sane.init()
......@@ -211,7 +210,8 @@ class OCRizer():
return os.path.join(expended_output_dir, _("scanned_document_") + str(next_num).zfill(3))
def convert_txt_to_odt(self, filename):
cmd_args = ["soffice", "--headless", "--convert-to", "odt", "--outdir", os.path.split(filename)[0], filename]
temp_lo_user_dir = tempfile.mkdtemp()
cmd_args = ["soffice", "-env:UserInstallation=file://" + temp_lo_user_dir, "--headless", "--convert-to", "odt", "--outdir", os.path.split(filename)[0], filename]
if self.args.verbose >= 3:
print(" ".join(cmd_args))
try:
......@@ -219,6 +219,7 @@ class OCRizer():
except Exception as e:
e.notification_message = _("An internal error occured")
raise e
shutil.rmtree(temp_lo_user_dir, ignore_errors=True)
self.temp_files.append(filename) # add for cleanup
return re.sub("(?i)\.txt", ".odt", filename)
......@@ -345,6 +346,7 @@ class OCRizer():
def launch(self):
self.send_notification(_("Starting Optical Character Recognition"))
self.logger.info(_('OCR Engine : ') + self.args.ocr_engine)
try:
self.scan()
self.send_notification(_("Treating input image, please wait"))
......