Mon Jul 8 18:37:03 2019 UTC ()
tesseract: updated to 4.1.0

4.1.0 Release
Added new renders Alto, LSTMBox, WordStrBox.
Added character boxes in hOCR output.
Added python training scripts (experimental) as alternative shell scripts.
Better support AVX / AVX2 / SSE.
Disable OpenMP support by default.
Fix for bounding box problem.
Implemented support for whitelist/blacklist in LSTM engine.
Improved cmake configuration.
Code modernization and improvements.
A lot of bug fixes...


(adam)
diff -r1.47 -r1.48 pkgsrc/graphics/tesseract/Makefile
diff -r1.11 -r1.12 pkgsrc/graphics/tesseract/PLIST
diff -r1.21 -r1.22 pkgsrc/graphics/tesseract/distinfo
diff -r1.1 -r0 pkgsrc/graphics/tesseract/patches/patch-doc_Makefile.am
diff -r1.3 -r1.4 pkgsrc/graphics/tesseract/patches/patch-tessdata_Makefile.am

cvs diff -r1.47 -r1.48 pkgsrc/graphics/tesseract/Makefile (expand / switch to unified diff)

--- pkgsrc/graphics/tesseract/Makefile 2019/05/04 16:05:33 1.47
+++ pkgsrc/graphics/tesseract/Makefile 2019/07/08 18:37:03 1.48
@@ -1,17 +1,16 @@ @@ -1,17 +1,16 @@
1# $NetBSD: Makefile,v 1.47 2019/05/04 16:05:33 leot Exp $ 1# $NetBSD: Makefile,v 1.48 2019/07/08 18:37:03 adam Exp $
2 2
3DISTNAME= tesseract-4.0.0 3DISTNAME= tesseract-4.1.0
4PKGREVISION= 6 
5CATEGORIES= graphics 4CATEGORIES= graphics
6MASTER_SITES= ${MASTER_SITE_GITHUB:=tesseract-ocr/} 5MASTER_SITES= ${MASTER_SITE_GITHUB:=tesseract-ocr/}
7DISTFILES= ${DEFAULT_DISTFILES} 6DISTFILES= ${DEFAULT_DISTFILES}
8 7
9MAINTAINER= pkgsrc-users@NetBSD.org 8MAINTAINER= pkgsrc-users@NetBSD.org
10HOMEPAGE= https://github.com/tesseract-ocr/tesseract 9HOMEPAGE= https://github.com/tesseract-ocr/tesseract
11COMMENT= Open Source OCR Engine 10COMMENT= Open Source OCR Engine
12LICENSE= apache-2.0 11LICENSE= apache-2.0
13 12
14LANGVER= 4.0.0 13LANGVER= 4.0.0
15DISTFILES+= tessdata-${LANGVER}${EXTRACT_SUFX} 14DISTFILES+= tessdata-${LANGVER}${EXTRACT_SUFX}
16SITES.tessdata-${LANGVER}.tar.gz= -${MASTER_SITES:Q}tessdata/archive/${LANGVER}.tar.gz 15SITES.tessdata-${LANGVER}.tar.gz= -${MASTER_SITES:Q}tessdata/archive/${LANGVER}.tar.gz
17 16

cvs diff -r1.11 -r1.12 pkgsrc/graphics/tesseract/PLIST (expand / switch to unified diff)

--- pkgsrc/graphics/tesseract/PLIST 2018/11/18 18:07:20 1.11
+++ pkgsrc/graphics/tesseract/PLIST 2019/07/08 18:37:03 1.12
@@ -1,39 +1,38 @@ @@ -1,39 +1,38 @@
1@comment $NetBSD: PLIST,v 1.11 2018/11/18 18:07:20 adam Exp $ 1@comment $NetBSD: PLIST,v 1.12 2019/07/08 18:37:03 adam Exp $
2bin/ambiguous_words 2bin/ambiguous_words
3bin/classifier_tester 3bin/classifier_tester
4bin/cntraining 4bin/cntraining
5bin/combine_lang_model 5bin/combine_lang_model
6bin/combine_tessdata 6bin/combine_tessdata
7bin/dawg2wordlist 7bin/dawg2wordlist
8bin/language-specific.sh 8bin/language-specific.sh
9bin/lstmeval 9bin/lstmeval
10bin/lstmtraining 10bin/lstmtraining
11bin/merge_unicharsets 11bin/merge_unicharsets
12bin/mftraining 12bin/mftraining
13bin/set_unicharset_properties 13bin/set_unicharset_properties
14bin/shapeclustering 14bin/shapeclustering
15bin/tesseract 15bin/tesseract
16bin/tesstrain.sh 16bin/tesstrain.sh
17bin/tesstrain_utils.sh 17bin/tesstrain_utils.sh
18bin/text2image 18bin/text2image
19bin/unicharset_extractor 19bin/unicharset_extractor
20bin/wordlist2dawg 20bin/wordlist2dawg
21include/tesseract/apitypes.h 21include/tesseract/apitypes.h
22include/tesseract/baseapi.h 22include/tesseract/baseapi.h
23include/tesseract/capi.h 23include/tesseract/capi.h
24include/tesseract/genericvector.h 24include/tesseract/genericvector.h
25include/tesseract/helpers.h 25include/tesseract/helpers.h
26include/tesseract/host.h 
27include/tesseract/ltrresultiterator.h 26include/tesseract/ltrresultiterator.h
28include/tesseract/ocrclass.h 27include/tesseract/ocrclass.h
29include/tesseract/osdetect.h 28include/tesseract/osdetect.h
30include/tesseract/pageiterator.h 29include/tesseract/pageiterator.h
31include/tesseract/platform.h 30include/tesseract/platform.h
32include/tesseract/publictypes.h 31include/tesseract/publictypes.h
33include/tesseract/renderer.h 32include/tesseract/renderer.h
34include/tesseract/resultiterator.h 33include/tesseract/resultiterator.h
35include/tesseract/serialis.h 34include/tesseract/serialis.h
36include/tesseract/strngs.h 35include/tesseract/strngs.h
37include/tesseract/tess_version.h 36include/tesseract/tess_version.h
38include/tesseract/tesscallback.h 37include/tesseract/tesscallback.h
39include/tesseract/thresholder.h 38include/tesseract/thresholder.h
@@ -68,47 +67,51 @@ share/tessdata/bel.traineddata @@ -68,47 +67,51 @@ share/tessdata/bel.traineddata
68share/tessdata/ben.traineddata 67share/tessdata/ben.traineddata
69share/tessdata/bod.traineddata 68share/tessdata/bod.traineddata
70share/tessdata/bos.traineddata 69share/tessdata/bos.traineddata
71share/tessdata/bre.traineddata 70share/tessdata/bre.traineddata
72share/tessdata/bul.traineddata 71share/tessdata/bul.traineddata
73share/tessdata/cat.traineddata 72share/tessdata/cat.traineddata
74share/tessdata/ceb.traineddata 73share/tessdata/ceb.traineddata
75share/tessdata/ces.traineddata 74share/tessdata/ces.traineddata
76share/tessdata/chi_sim.traineddata 75share/tessdata/chi_sim.traineddata
77share/tessdata/chi_sim_vert.traineddata 76share/tessdata/chi_sim_vert.traineddata
78share/tessdata/chi_tra.traineddata 77share/tessdata/chi_tra.traineddata
79share/tessdata/chi_tra_vert.traineddata 78share/tessdata/chi_tra_vert.traineddata
80share/tessdata/chr.traineddata 79share/tessdata/chr.traineddata
 80share/tessdata/configs/alto
81share/tessdata/configs/ambigs.train 81share/tessdata/configs/ambigs.train
82share/tessdata/configs/api_config 82share/tessdata/configs/api_config
83share/tessdata/configs/bigram 83share/tessdata/configs/bigram
84share/tessdata/configs/box.train 84share/tessdata/configs/box.train
85share/tessdata/configs/box.train.stderr 85share/tessdata/configs/box.train.stderr
86share/tessdata/configs/digits 86share/tessdata/configs/digits
 87share/tessdata/configs/get.images
87share/tessdata/configs/hocr 88share/tessdata/configs/hocr
88share/tessdata/configs/inter 89share/tessdata/configs/inter
89share/tessdata/configs/kannada 90share/tessdata/configs/kannada
90share/tessdata/configs/linebox 91share/tessdata/configs/linebox
91share/tessdata/configs/logfile 92share/tessdata/configs/logfile
92share/tessdata/configs/lstm.train 93share/tessdata/configs/lstm.train
 94share/tessdata/configs/lstmbox
93share/tessdata/configs/lstmdebug 95share/tessdata/configs/lstmdebug
94share/tessdata/configs/makebox 96share/tessdata/configs/makebox
95share/tessdata/configs/pdf 97share/tessdata/configs/pdf
96share/tessdata/configs/quiet 98share/tessdata/configs/quiet
97share/tessdata/configs/rebox 99share/tessdata/configs/rebox
98share/tessdata/configs/strokewidth 100share/tessdata/configs/strokewidth
99share/tessdata/configs/tsv 101share/tessdata/configs/tsv
100share/tessdata/configs/txt 102share/tessdata/configs/txt
101share/tessdata/configs/unlv 103share/tessdata/configs/unlv
 104share/tessdata/configs/wordstrbox
102share/tessdata/cos.traineddata 105share/tessdata/cos.traineddata
103share/tessdata/cym.traineddata 106share/tessdata/cym.traineddata
104share/tessdata/dan.traineddata 107share/tessdata/dan.traineddata
105share/tessdata/dan_frak.traineddata 108share/tessdata/dan_frak.traineddata
106share/tessdata/deu.traineddata 109share/tessdata/deu.traineddata
107share/tessdata/deu_frak.traineddata 110share/tessdata/deu_frak.traineddata
108share/tessdata/div.traineddata 111share/tessdata/div.traineddata
109share/tessdata/dzo.traineddata 112share/tessdata/dzo.traineddata
110share/tessdata/ell.traineddata 113share/tessdata/ell.traineddata
111share/tessdata/eng.traineddata 114share/tessdata/eng.traineddata
112share/tessdata/eng.user-patterns 115share/tessdata/eng.user-patterns
113share/tessdata/eng.user-words 116share/tessdata/eng.user-words
114share/tessdata/enm.traineddata 117share/tessdata/enm.traineddata

cvs diff -r1.21 -r1.22 pkgsrc/graphics/tesseract/distinfo (expand / switch to unified diff)

--- pkgsrc/graphics/tesseract/distinfo 2019/05/04 16:05:33 1.21
+++ pkgsrc/graphics/tesseract/distinfo 2019/07/08 18:37:03 1.22
@@ -1,12 +1,11 @@ @@ -1,12 +1,11 @@
1$NetBSD: distinfo,v 1.21 2019/05/04 16:05:33 leot Exp $ 1$NetBSD: distinfo,v 1.22 2019/07/08 18:37:03 adam Exp $
2 2
3SHA1 (tessdata-4.0.0.tar.gz) = 94557a6ecdf8ff8bec131598759e7d3b0bca1911 3SHA1 (tessdata-4.0.0.tar.gz) = 94557a6ecdf8ff8bec131598759e7d3b0bca1911
4RMD160 (tessdata-4.0.0.tar.gz) = 2e826e866b56ff8b9cb2c6613f04d8c4a4ff98d7 4RMD160 (tessdata-4.0.0.tar.gz) = 2e826e866b56ff8b9cb2c6613f04d8c4a4ff98d7
5SHA512 (tessdata-4.0.0.tar.gz) = cd71bb99d44eefb53b359ba64b472c509fff773b2737a8d51e10d5d52d9a3a7ff870d470b1c72a7c78be3263b5ecfbb58a6eab13cf7128d8599681676cdcef6b 5SHA512 (tessdata-4.0.0.tar.gz) = cd71bb99d44eefb53b359ba64b472c509fff773b2737a8d51e10d5d52d9a3a7ff870d470b1c72a7c78be3263b5ecfbb58a6eab13cf7128d8599681676cdcef6b
6Size (tessdata-4.0.0.tar.gz) = 669258747 bytes 6Size (tessdata-4.0.0.tar.gz) = 669258747 bytes
7SHA1 (tesseract-4.0.0.tar.gz) = 243a4919d44bc64d1e7e4cac660c716c845a8d03 7SHA1 (tesseract-4.1.0.tar.gz) = 6e88cc4fd9f1681142bf74dc2df0559202cff3c2
8RMD160 (tesseract-4.0.0.tar.gz) = 0e95d343639ab98c6d3fbc528053b627b6e12282 8RMD160 (tesseract-4.1.0.tar.gz) = 034ffd9690478e28945c09001ce51f7fdceb2ff5
9SHA512 (tesseract-4.0.0.tar.gz) = 69e57d4ba1fc43d212fd0fff69a2b5d48a3b37cfee7054fdc083cbb7e04d92317609a32e457229661d70ce8d9b16c9d25e81bfc3861db660dd2c8f292202d447 9SHA512 (tesseract-4.1.0.tar.gz) = d617f5c5b826640b2871dbe3d7973bcc5e66fafd837921a20e009d683806ed50f0f258aa455019d99fc54f5cb65c2fa0380e3a3c92b39ab0684b8799c730b09d
10Size (tesseract-4.0.0.tar.gz) = 1961372 bytes 10Size (tesseract-4.1.0.tar.gz) = 1965053 bytes
11SHA1 (patch-doc_Makefile.am) = fd9cc782e766428de5709b77d7a2476be55ec7d8 11SHA1 (patch-tessdata_Makefile.am) = 8fe773d1c6318392296ba06996b51692edf32919
12SHA1 (patch-tessdata_Makefile.am) = 1fdbed9dafc1527eb52f354c8b78ba82f854b350 

File Deleted: pkgsrc/graphics/tesseract/patches/Attic/patch-doc_Makefile.am

cvs diff -r1.3 -r1.4 pkgsrc/graphics/tesseract/patches/patch-tessdata_Makefile.am (expand / switch to unified diff)

--- pkgsrc/graphics/tesseract/patches/patch-tessdata_Makefile.am 2019/05/04 16:05:33 1.3
+++ pkgsrc/graphics/tesseract/patches/patch-tessdata_Makefile.am 2019/07/08 18:37:03 1.4
@@ -1,22 +1,22 @@ @@ -1,22 +1,22 @@
1$NetBSD: patch-tessdata_Makefile.am,v 1.3 2019/05/04 16:05:33 leot Exp $ 1$NetBSD: patch-tessdata_Makefile.am,v 1.4 2019/07/08 18:37:03 adam Exp $
2 2
3Revert a trunk commit that broke install-lang for tesseract<4. 3Revert a trunk commit that broke install-lang for tesseract<4.
4 4
5--- tessdata/Makefile.am.orig 2018-10-29 08:53:12.000000000 +0000 5--- tessdata/Makefile.am.orig 2019-07-07 12:34:08.000000000 +0000
6+++ tessdata/Makefile.am 6+++ tessdata/Makefile.am
7@@ -29,6 +29,27 @@ langdata = bul.traineddata mlt.trainedda 7@@ -7,6 +7,27 @@ SUBDIRS = configs tessconfigs
8 chi_tra.traineddata ita.traineddata spa_old.traineddata \ 8
9 deu-frak.traineddata aze.traineddata 9 langdata =
10  10
11+.PHONY: install-langs 11+.PHONY: install-langs
12+install-langs: 12+install-langs:
13+ @if [ ! -d $(DESTDIR)$(datadir) ]; then mkdir -p $(DESTDIR)$(datadir); fi; 13+ @if [ ! -d $(DESTDIR)$(datadir) ]; then mkdir -p $(DESTDIR)$(datadir); fi;
14+ @if test "${LANGS}" != ""; then \ 14+ @if test "${LANGS}" != ""; then \
15+ for lang_code in ${LANGS}; do \ 15+ for lang_code in ${LANGS}; do \
16+ echo "installing data for $$lang_code"; \ 16+ echo "installing data for $$lang_code"; \
17+ $(INSTALL) -m 644 $(srcdir)/$$lang_code.* $(DESTDIR)$(datadir); \ 17+ $(INSTALL) -m 644 $(srcdir)/$$lang_code.* $(DESTDIR)$(datadir); \
18+ done; \ 18+ done; \
19+ else \ 19+ else \
20+ for l in ./*.traineddata; do \ 20+ for l in ./*.traineddata; do \
21+ filename=`basename $$l`; \ 21+ filename=`basename $$l`; \
22+ lang_code=$${filename%.*}; \ 22+ lang_code=$${filename%.*}; \