Convertion of tiff image in Python script - OCR using tesseract

Posted by PYTHON TEAM on Ask Ubuntu See other posts from Ask Ubuntu or by PYTHON TEAM
Published on 2013-05-18T09:47:02Z Indexed on 2013/06/27 4:31 UTC
Read the original article Hit count: 567

I want to convert a tiff image file to text document. My code perfectly as I expected to convert tiff images with usual font but its not working for french script font . My tiff image file contains text. The font of text is in french script format.I here is my code

import Image

import subprocess

import util

import errors

tesseract_exe_name = 'tesseract' # Name of executable to be called at command line

scratch_image_name = "temp.bmp" # This file must be .bmp or other Tesseract-compatible 
format
scratch_text_name_root = "temp" # Leave out the .txt extension

cleanup_scratch_flag = True  # Temporary files cleaned up after OCR operation

def call_tesseract(input_filename, output_filename):

    """Calls external tesseract.exe on input file (restrictions on types),

    outputting output_filename+'txt'"""

    args = [tesseract_exe_name, input_filename, output_filename]

    proc = subprocess.Popen(args)


    retcode = proc.wait()

    if retcode!=0:

        errors.check_for_errors()

def image_to_string(im, cleanup = cleanup_scratch_flag):

    """Converts im to file, applies tesseract, and fetches resulting text.
    If cleanup=True, delete scratch files after operation."""

    try:

        util.image_to_scratch(im, scratch_image_name)

        call_tesseract(scratch_image_name, scratch_text_name_root)

        text = util.retrieve_text(scratch_text_name_root)

    finally:

        if cleanup:

            util.perform_cleanup(scratch_image_name, scratch_text_name_root)

    return text

def image_file_to_string(filename, cleanup = cleanup_scratch_flag, graceful_errors=True):

    If cleanup=True, delete scratch files after operation."""

    try:

        try:

            call_tesseract(filename, scratch_text_name_root)

            text = util.retrieve_text(scratch_text_name_root)

        except errors.Tesser_General_Exception:

            if graceful_errors:



                im = Image.open(filename)

                text = image_to_string(im, cleanup)

            else:

                raise

    finally:

        if cleanup:

            util.perform_cleanup(scratch_image_name, scratch_text_name_root)

    return text
if __name__=='__main__':

    im = Image.open("/home/oomsys/phototest.tif")

    text = image_to_string(im)

    print text

    try:

        text = image_file_to_string('fnord.tif', graceful_errors=False)

    except errors.Tesser_General_Exception, value:

        print "fnord.tif is incompatible filetype.  Try graceful_errors=True"

        print value

    text = image_file_to_string('fnord.tif', graceful_errors=True)

    print "fnord.tif contents:", text

    text = image_file_to_string('fonts_test.png', graceful_errors=True)

    print text

© Ask Ubuntu or respective owner

Related posts about application-development

Related posts about python