diff options
-rw-r--r-- | graphics/tesseract/README | 3 | ||||
-rw-r--r-- | graphics/tesseract/patches/tesseract-2.03-java.patch | 17 | ||||
-rw-r--r-- | graphics/tesseract/patches/tesseract-2.03-missing_includes.patch | 37 | ||||
-rw-r--r-- | graphics/tesseract/patches/tesseract-2.03-patch.patch | 53 | ||||
-rw-r--r-- | graphics/tesseract/tesseract.SlackBuild | 89 | ||||
-rw-r--r-- | graphics/tesseract/tesseract.info | 10 |
6 files changed, 189 insertions, 20 deletions
diff --git a/graphics/tesseract/README b/graphics/tesseract/README index 31bfc8b694..f9be7a1c4c 100644 --- a/graphics/tesseract/README +++ b/graphics/tesseract/README @@ -10,11 +10,12 @@ all available language packs. Note that you can install more than one (or even all) of the language packs, as they do not conflict with each other. The build script defaults to use English, but this is easily changed by passing an alternate value on the command line. + Here is the relevant code from the build script: # Language pack(s) to use # We'll install English by default, but you can pass another one (or all) # of them on the command line (space delimited). If you pass more than one # (again, space delimited), you must enclose the string in quotes. Examples: # TESSLANG=fra ./tesseract.SlackBuild - # TESSLANG="deu eng fra ita nld spa" ./tesseract.SlackBuild + # TESSLANG="deu deu-f eng fra ita nld por spa vie" ./tesseract.SlackBuild TESSLANG=${TESSLANG:-eng} # Default to English diff --git a/graphics/tesseract/patches/tesseract-2.03-java.patch b/graphics/tesseract/patches/tesseract-2.03-java.patch new file mode 100644 index 0000000000..f384dbdfc1 --- /dev/null +++ b/graphics/tesseract/patches/tesseract-2.03-java.patch @@ -0,0 +1,17 @@ +Index: tesseract-2.03/java/makefile +=================================================================== +--- tesseract-2.03.orig/java/makefile 2008-04-24 21:52:12.000000000 +0200 ++++ tesseract-2.03/java/makefile 2008-04-24 21:53:03.000000000 +0200 +@@ -39,8 +39,11 @@ + clean : + rm -f ScrollView.jar *.class + ++distclean : clean ++ rm -f Makefile ++ + # all-am does nothing, to make the java part optional. +-all all-am : ++all all-am install : + + # dist runs the autoconf makefile to archive the files correctly. + dist distdir : diff --git a/graphics/tesseract/patches/tesseract-2.03-missing_includes.patch b/graphics/tesseract/patches/tesseract-2.03-missing_includes.patch new file mode 100644 index 0000000000..24931c08a6 --- /dev/null +++ b/graphics/tesseract/patches/tesseract-2.03-missing_includes.patch @@ -0,0 +1,37 @@ +diff -Nur a/viewer/scrollview.cpp b/viewer/scrollview.cpp +--- a/viewer/scrollview.cpp 2008-04-21 19:06:24.000000000 -0500 ++++ b/viewer/scrollview.cpp 2009-06-22 21:33:53.747817922 -0500 +@@ -36,6 +36,9 @@ + #include <vector> + #include <string> + ++#include <string.h> ++#include <limits.h> ++ + #include "svutil.h" + + #ifdef HAVE_LIBLEPT +diff -Nur a/viewer/svmnode.cpp b/viewer/svmnode.cpp +--- a/viewer/svmnode.cpp 2008-04-18 23:44:06.000000000 -0500 ++++ b/viewer/svmnode.cpp 2009-06-22 21:34:12.375586651 -0500 +@@ -28,6 +28,8 @@ + + #include <iostream> + ++#include <string.h> ++ + #include "scrollview.h" + + // Create the empty root menu node. with just a caption. All other nodes should +diff -Nur a/viewer/svutil.cpp b/viewer/svutil.cpp +--- a/viewer/svutil.cpp 2008-04-21 19:07:25.000000000 -0500 ++++ b/viewer/svutil.cpp 2009-06-22 21:34:31.419367272 -0500 +@@ -38,6 +38,8 @@ + + #include <iostream> + #include <string> ++#include <string.h> ++#include <stdlib.h> + + const int kBufferSize = 65536; + const int kMaxMsgSize = 4096; diff --git a/graphics/tesseract/patches/tesseract-2.03-patch.patch b/graphics/tesseract/patches/tesseract-2.03-patch.patch new file mode 100644 index 0000000000..f2868dfa5c --- /dev/null +++ b/graphics/tesseract/patches/tesseract-2.03-patch.patch @@ -0,0 +1,53 @@ +*** a/ccmain/baseapi.cpp +--- b/ccmain/baseapi.cpp +*************** +*** 954,960 **** + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + OUTLINE *outline = it.data(); + outline->compute_bb(); +! result.bounding_union(outline->bounding_box()); + } + return result; + } +--- 954,960 ---- + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + OUTLINE *outline = it.data(); + outline->compute_bb(); +! result = result.bounding_union(outline->bounding_box()); + } + return result; + } +*************** +*** 966,972 **** + for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { + C_BLOB *blob = c_it.data(); + //bboxes.push(tessy_rectangle(blob->bounding_box())); +! result.bounding_union(blob->bounding_box()); + } + return result; + } +--- 966,972 ---- + for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { + C_BLOB *blob = c_it.data(); + //bboxes.push(tessy_rectangle(blob->bounding_box())); +! result = result.bounding_union(blob->bounding_box()); + } + return result; + } +*************** +*** 1026,1032 **** + for (int i = 0; i < n; i++) { + PBLOB *blob = it.data(); + TBOX current = pblob_get_bbox(blob); +! bln_rect.bounding_union(current); + + TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), + str, *len); +--- 1026,1032 ---- + for (int i = 0; i < n; i++) { + PBLOB *blob = it.data(); + TBOX current = pblob_get_bbox(blob); +! bln_rect = bln_rect.bounding_union(current); + + TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), + str, *len); diff --git a/graphics/tesseract/tesseract.SlackBuild b/graphics/tesseract/tesseract.SlackBuild index bfc36e41bd..63835762dd 100644 --- a/graphics/tesseract/tesseract.SlackBuild +++ b/graphics/tesseract/tesseract.SlackBuild @@ -1,14 +1,39 @@ #!/bin/sh -# Pierre Cazenave revision date 10/11/2007 +# Pierre Cazenave 10/11/2007. +# Updated 25/01/2009. +# Updated 08/04/2009. +# Updated 28/05/2009 for Slackware64. # Modified by Robby Workman <rworkman@slackbuilds.org> for better -# consistency with our other scripts +# consistency with our other scripts +# Thanks to S+*n_Pe*rm*n for a bug report from OCRopus. + +# Copyright 2009 Pierre Cazenave <pwcazenave {at} gmail [dot] com> +# All rights reserved. +# +# Redistribution and use of this script, with or without modification, is +# permitted provided that the following conditions are met: +# +# 1. Redistributions of this script must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PRGNAM=tesseract -VERSION=2.01 +VERSION=2.03 ARCH=${ARCH:-i486} -BUILD=${BUILD:-2} +BUILD=${BUILD:-5} TAG=${TAG:-_SBo} + CWD=$(pwd) TMP=${TMP:-/tmp/SBo} PKG=$TMP/package-$PRGNAM @@ -19,17 +44,22 @@ OUTPUT=${OUTPUT:-/tmp} # of them on the command line (space delimited). If you pass more than one # (again, space delimited), you must enclose the string in quotes. Examples: # TESSLANG=fra ./tesseract.SlackBuild -# TESSLANG="deu eng fra ita nld spa" ./tesseract.SlackBuild +# TESSLANG="deu deu-f eng fra ita nld por spa vie" ./tesseract.SlackBuild TESSLANG=${TESSLANG:-eng} # Default to English if [ "$ARCH" = "i486" ]; then SLKCFLAGS="-O2 -march=i486 -mtune=i686" + LIBDIRSUFFIX="" elif [ "$ARCH" = "i686" ]; then SLKCFLAGS="-O2 -march=i686 -mtune=i686" + LIBDIRSUFFIX="" elif [ "$ARCH" = "x86_64" ]; then SLKCFLAGS="-O2 -fPIC" + LIBDIRSUFFIX="64" fi +set -e + rm -rf $TMP/$PRGNAM-$VERSION $PKG mkdir -p $TMP $PKG $OUTPUT cd $TMP @@ -38,9 +68,21 @@ cd $PRGNAM-$VERSION chown -R root:root . chmod -R u+w,go+r-w,a-s . +# Patch the source with bug report from sister project OCRopus +# http://ocropus.googlecode.com/svn/trunk/tesseract-2.03-patch.diff +# Thanks to S+*n_Pe*rm*n for the bug report. +patch -p1 < $CWD/patches/tesseract-2.03-patch.patch +# Also patch for the java make install error. +# http://tesseract-ocr.googlegroups.com/attach/cd42bea980dbe946/java +# (renamed from java to tesseract-2.03-java.patch) +patch -p1 < $CWD/patches/tesseract-2.03-java.patch +# Fix up some missing includes needed for gcc in 13.0 +patch -p1 < $CWD/patches/tesseract-2.03-missing_includes.patch + CFLAGS="$SLKCFLAGS" \ ./configure \ --prefix=/usr \ + --libdir=/usr/lib${LIBDIRSUFFIX} \ --sysconfdir=/etc \ --localstatedir=/var \ --mandir=/usr/man \ @@ -48,16 +90,29 @@ CFLAGS="$SLKCFLAGS" \ make -# Let's extract the desired language tarballs +# Let's extract the desired language tarballs, with a hack for the different +# version numbers for certain language files. I shied away from wildcards as +# they're a bit unpredictable... for _language in $(echo "$TESSLANG") ; do - if [ -r $CWD/tesseract-2.00.$_language.tar.gz ]; then - tar xf $CWD/tesseract-2.00.$_language.tar.gz - SUCCESS=yes + if [ "$_language" == "deu-f" -o "$_language" == "por" -o "$_language" == "vie" ]; then + if [ -r $CWD/tesseract-2.01.$_language.tar.gz ]; then + tar xf $CWD/tesseract-2.01.$_language.tar.gz + SUCCESS=yes + else + echo "$CWD/tesseract-2.01.$_language.tar.gz not found." + sleep 5 + fi; else - echo "$CWD/tesseract-2.00.$_language.tar.gz not found." - sleep 5 - fi ; + if [ -r $CWD/tesseract-2.00.$_language.tar.gz ]; then + tar xf $CWD/tesseract-2.00.$_language.tar.gz + SUCCESS=yes + else + echo "$CWD/tesseract-2.00.$_language.tar.gz not found." + sleep 5 + fi; + fi done + if [ ! "$SUCCESS" = "yes" ]; then echo "No language packs were found, so this package will not work as is." echo "See $CWD/README for more information." @@ -67,16 +122,22 @@ fi make install DESTDIR=$PKG ( cd $PKG - find . | xargs file | grep "executable" | grep ELF | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null - find . | xargs file | grep "shared object" | grep ELF | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null + find . | xargs file | grep "executable" | grep ELF | cut -f 1 -d : | \ + xargs strip --strip-unneeded 2> /dev/null || true + find . | xargs file | grep "shared object" | grep ELF | cut -f 1 -d : | \ + xargs strip --strip-unneeded 2> /dev/null ) mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION cp -a AUTHORS COPYING ChangeLog INSTALL NEWS README ReleaseNotes \ $PKG/usr/doc/$PRGNAM-$VERSION cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild +cat $CWD/README > $PKG/usr/doc/$PRGNAM-$VERSION/README$TAG find $PKG/usr/doc/$PRGNAM-$VERSION -type f -exec chmod 644 {} \; +# remove zero size files (from extra potentially not installed language packs). +find $PKG/usr/share/tessdata -type f -size 0 -exec rm {} + + mkdir -p $PKG/install cat $CWD/slack-desc > $PKG/install/slack-desc diff --git a/graphics/tesseract/tesseract.info b/graphics/tesseract/tesseract.info index de9f56aac5..848deec1ee 100644 --- a/graphics/tesseract/tesseract.info +++ b/graphics/tesseract/tesseract.info @@ -1,8 +1,8 @@ PRGNAM="tesseract" -VERSION="2.01" +VERSION="2.03" HOMEPAGE="http://code.google.com/p/tesseract-ocr/" -DOWNLOAD="http://tesseract-ocr.googlecode.com/files/tesseract-2.01.tar.gz" -MD5SUM="fb0e6e7652b985049c11a4bc8e593885" +DOWNLOAD="http://tesseract-ocr.googlecode.com/files/tesseract-2.03.tar.gz" +MD5SUM="5777b70b11df16c1ac9aa155d7cfc553" MAINTAINER="Pierre Cazenave" -EMAIL="pwcazenave@gmail.com" -APPROVED="rworkman" +EMAIL="pwcazenave <at> gmail {dot} com" +APPROVED="dsomero,rworkman" |