Skip to content

Commit

Permalink
[RELEASE] iText 7 pdfOcr - 1.0.2
Browse files Browse the repository at this point in the history
https://git.itextsupport.com/

* release/1.0.2:
  [RELEASE] 1.0.2-SNAPSHOT -> 1.0.2
  Drop revapi plugin in favor of japicmp plugin
  thai_03 test fails in .NET. Might be related to reading UTF-8 files issue
  Combine HOCR and TXT outputs for more precise text recognition
  Deprecate unused log message constant
  Stabilize test on different Tesseract versions
  Add possibility to set image preprocessing properties
  Tesseract does not respect image rotation when doing OCR
  Remove redundant MethodSignature subclass
  Use tesseract executable from path instead of tesseractDir in tests
  If path to tessdata contains non ASCII characters, code unexpectedly fails
  TextInfo: move from List<Float> to Rectangle
  Use generalized Jenkinsfile in the pipeline-library
  Deprecate Tesseract4LogMessageConstant#CANNOT_CONVERT_IMAGE_TO_GRAYSCALE
  Non-Ascii characters support for the output file
  Use ImageTypeDetector from io module to detect image types
  Use new SystemUtil#runProcessAndWait overload from 7.1.12-SNAPSHOT accepting working directory
  Only run on Jenkins nodes that have label tesseract
  [RELEASE] Update dependency versions
  • Loading branch information
iText-CI committed Oct 8, 2020
2 parents e413823 + 1efcf7c commit 8d2c19d
Show file tree
Hide file tree
Showing 64 changed files with 2,321 additions and 874 deletions.
297 changes: 3 additions & 294 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,298 +1,7 @@
#!/usr/bin/env groovy
@Library('pipeline-library')_

def schedule, sonarBranchName, sonarBranchTarget
switch (env.BRANCH_NAME) {
case ~/.*master.*/:
schedule = '@monthly'
sonarBranchName = '-Dsonar.branch.name=master'
sonarBranchTarget = ''
break
case ~/.*develop.*/:
schedule = '@midnight'
sonarBranchName = '-Dsonar.branch.name=develop'
sonarBranchTarget = '-Dsonar.branch.target=master'
break
default:
schedule = ''
sonarBranchName = '-Dsonar.branch.name=' + env.BRANCH_NAME
sonarBranchTarget = '-Dsonar.branch.target=develop'
break
}
def repoName = "pdfOcr"
def dependencyRegex = "itextcore"

pipeline {

agent { label '!master' }

environment {
JDK_VERSION = 'jdk-8-oracle'
tesseractDir = tool name: 'Tesseract', type: 'com.cloudbees.jenkins.plugins.customtools.CustomTool'
}

options {
ansiColor('xterm')
buildDiscarder logRotator(artifactNumToKeepStr: '1')
parallelsAlwaysFailFast()
skipStagesAfterUnstable()
timeout time: 1, unit: 'HOURS'
timestamps()
}

triggers {
cron(schedule)
}

tools {
maven 'M3'
jdk "${JDK_VERSION}"
}

stages {
stage('Abort possible previous builds') {
steps {
script {
abortPreviousBuilds()
}
}
}
stage('Wait for blocking jobs') {
steps {
script {
properties[[
$class : 'BuildBlockerProperty',
blockLevel : 'GLOBAL',
blockingJobs : "^iText_7_Java/itextcore/$env.JOB_BASE_NAME\$",
scanQueueFor : 'ALL',
useBuildBlocker: true
]]
}
}
}
stage('Build') {
options {
retry(2)
}
stages {
stage('Clean workspace') {
options {
timeout time: 5, unit: 'MINUTES'
}
steps {
withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
sh 'mvn --threads 2C --no-transfer-progress clean dependency:purge-local-repository ' +
'-Dinclude=com.itextpdf -DresolutionFuzziness=groupId -DreResolve=false ' +
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository"
}
script {
try {sh "rm -rf ${env.WORKSPACE.replace('\\','/')}/downloads"} catch (Exception ignored) {}
}
}
}
stage('Install branch dependencies') {
options {
timeout time: 5, unit: 'MINUTES'
}
when {
not {
anyOf {
branch "master"
branch "develop"
}
}
}
steps {
script {
getAndConfigureJFrogCLI()
sh "./jfrog rt dl branch-artifacts/${env.JOB_BASE_NAME}/**/java/ downloads/"
if (fileExists("downloads")) {
dir ("downloads") {
def mainPomFiles = findFiles glob: '**/main.pom'
mainPomFiles.each { pomFile ->
pomPath = pomFile.path.replace "\\", "/"
sh "mvn org.apache.maven.plugins:maven-install-plugin:3.0.0-M1:install-file --quiet " +
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository " +
"-Dpackaging=pom -Dfile=${pomPath} -DpomFile=${pomPath}"
}
def pomFiles = findFiles glob: '**/*.pom'
pomFiles.each { pomFile ->
if (pomFile.name != "main.pom") {
pomPath = pomFile.path.replace "\\", "/"
sh "mvn org.apache.maven.plugins:maven-install-plugin:3.0.0-M1:install-file --quiet " +
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\', '/')}/.repository " +
"-Dpackaging=pom -Dfile=${pomPath} -DpomFile=${pomPath}"
}
}
def jarFiles = findFiles glob: '**/*.jar'
jarFiles.each { jarFile ->
jarPath = jarFile.path.replace "\\", "/"
sh "mvn org.apache.maven.plugins:maven-install-plugin:3.0.0-M1:install-file --quiet " +
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\', '/')}/.repository " +
"-Dfile=${jarPath}"
}
}
}
}
}
}
stage('Compile') {
options {
timeout time: 10, unit: 'MINUTES'
}
steps {
withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
sh 'mvn --threads 2C --no-transfer-progress package -Dmaven.test.skip=true ' +
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository"
}
}
}
}
post {
failure {
sleep time: 2, unit: 'MINUTES'
}
success {
script { currentBuild.result = 'SUCCESS' }
}
}
}
stage('Static Code Analysis') {
options {
timeout time: 1, unit: 'HOURS'
}
steps {
withMaven(jdk: "${JDK_VERSION}", maven: 'M3', mavenLocalRepo: '.repository') {
sh 'mvn --no-transfer-progress verify --activate-profiles qa ' +
'-Dpmd.analysisCache=true ' +
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository"
}
recordIssues(tools: [
checkStyle(),
pmdParser(),
spotBugs(useRankAsPriority: true)
])
dependencyCheckPublisher pattern: 'target/dependency-check-report.xml'
}
}
stage('Run Tests') {
options {
timeout time: 30, unit: 'MINUTES'
}
steps {
withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
withSonarQubeEnv('Sonar') {
sh 'mvn --no-transfer-progress --activate-profiles test ' +
'-DgsExec="${gsExec}" -DcompareExec="${compareExec}" ' +
'-DtesseractDir="${tesseractDir}" ' +
'-Dmaven.main.skip=true -Dmaven.test.failure.ignore=false ' +
'org.jacoco:jacoco-maven-plugin:prepare-agent verify org.jacoco:jacoco-maven-plugin:report ' +
'-Dsonar.java.spotbugs.reportPaths="target/spotbugs.xml" ' +
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository " +
'sonar:sonar ' + sonarBranchName + ' ' + sonarBranchTarget
}
}
}
}
stage("Quality Gate") {
options {
timeout time: 1, unit: 'HOURS'
}
steps {
waitForQualityGate abortPipeline: true
}
}
stage('Artifactory Deploy') {
options {
timeout time: 5, unit: 'MINUTES'
}
when {
anyOf {
branch "master"
branch "develop"
}
}
steps {
withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
script {
def server = Artifactory.server 'itext-artifactory'
def rtMaven = Artifactory.newMavenBuild()
rtMaven.deployer server: server, releaseRepo: 'releases', snapshotRepo: 'snapshot'
rtMaven.tool = 'M3'
def buildInfo = rtMaven.run pom: 'pom.xml', goals: '--threads 2C --no-transfer-progress install --activate-profiles artifactory ' +
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository".toString()
server.publishBuildInfo buildInfo
}
}
}
}
stage('Branch Artifactory Deploy') {
options {
timeout time: 5, unit: 'MINUTES'
}
when {
not {
anyOf {
branch "master"
branch "develop"
}
}
}
steps {
script {
if (env.GIT_URL) {
repoName = ("${env.GIT_URL}" =~ /(.*\/)(.*)(\.git)/)[ 0 ][ 2 ]
findFiles(glob: '*/target/*.jar').each { item ->
if (!(item ==~ /.*\/[fs]b-contrib-.*?.jar/) && !(item ==~ /.*\/findsecbugs-plugin-.*?.jar/) && !(item ==~ /.*-sources.jar/) && !(item ==~ /.*-javadoc.jar/)) {
sh "./jfrog rt u \"${item.path}\" branch-artifacts/${env.BRANCH_NAME}/${repoName}/java/ --recursive=false --build-name ${env.BRANCH_NAME} --build-number ${env.BUILD_NUMBER} --props \"vcs.revision=${env.GIT_COMMIT};repo.name=${repoName}\""
}
}
findFiles(glob: '**/pom.xml').each { item ->
def pomPath = item.path.replace('\\', '/')
if (!(pomPath ==~ /.*target.*/)) {
def resPomName = "main.pom"
def subDirMatcher = (pomPath =~ /^.*(?<=\/|^)(.*)\/pom\.xml/)
if (subDirMatcher.matches()) {
resPomName = "${subDirMatcher[0][1]}.pom"
}
sh "./jfrog rt u \"${item.path}\" branch-artifacts/${env.BRANCH_NAME}/${repoName}/java/${resPomName} --recursive=false --build-name ${env.BRANCH_NAME} --build-number ${env.BUILD_NUMBER} --props \"vcs.revision=${env.GIT_COMMIT};repo.name=${repoName}\""
}
}
}
}
}
}
}

post {
always {
echo 'One way or another, I have finished \uD83E\uDD16'
}
success {
echo 'I succeeeded! \u263A'
cleanWs deleteDirs: true
}
unstable {
echo 'I am unstable \uD83D\uDE2E'
}
failure {
echo 'I failed \uD83D\uDCA9'
}
changed {
echo 'Things were different before... \uD83E\uDD14'
}
fixed {
script {
if (env.BRANCH_NAME.contains('master') || env.BRANCH_NAME.contains('develop')) {
slackNotifier "#ci", currentBuild.currentResult, "${env.BRANCH_NAME} - Back to normal"
}
}
}
regression {
script {
if (env.BRANCH_NAME.contains('master') || env.BRANCH_NAME.contains('develop')) {
slackNotifier "#ci", currentBuild.currentResult, "${env.BRANCH_NAME} - First failure"
}
}
}
}

}
automaticJavaBuild(repoName, dependencyRegex)
2 changes: 1 addition & 1 deletion pdfocr-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>com.itextpdf</groupId>
<artifactId>pdfocr-root</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>

<artifactId>pdfocr-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2020 iText Group NV
Authors: iText Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.pdfocr;

import com.itextpdf.io.image.ImageData;

/**
* Rotation information may be stored in image metadata.
* For OCR and adding image to document that rotation
* should be applied to the image, so that it is actually rotated,
* not via metadata properties.
* Interface ia responsible for extracting rotation from metadata
* and applying in to the image.
*/
public interface IImageRotationHandler {

/**
* Apply rotation to image data.
* If image is not rotated - does nothing.
* @param imageData to apply rotation to
* @return rotated image if rotation flag is set or self if no rotation
*/
public abstract ImageData applyRotation(ImageData imageData);

}
Loading

0 comments on commit 8d2c19d

Please sign in to comment.