Compare commits

...

1 Commits
master ... dl4j

Author SHA1 Message Date
ale
b2efcb2f44 some changes 2019-02-20 03:24:51 +01:00
4 changed files with 67 additions and 36 deletions

87
pom.xml
View File

@ -10,19 +10,19 @@
<packaging>war</packaging>
<name>arjion</name>
<description>Demo project of Apache Tika for Spring Boot</description>
<description>Demo project of Apache Tika for Spring Boot and ML</description>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.0.BUILD-SNAPSHOT</version>
<version>2.1.3.RELEASE</version>
<relativePath /> <!-- lookup parent from repository -->
</parent>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.8</java.version>
<java.version>11</java.version>
</properties>
<dependencies>
@ -38,6 +38,11 @@
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>x-pack-transport</artifactId>
<version>6.4.2</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
@ -45,37 +50,37 @@
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.tika/tika-app -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-app</artifactId>
<version>1.18</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.tika/tika-app -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-app</artifactId>
<version>1.20</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.18</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.20</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-jpeg2000</artifactId>
<version>1.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-jpeg2000</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>com.levigo.jbig2</groupId>
<artifactId>levigo-jbig2-imageio</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>com.levigo.jbig2</groupId>
<artifactId>levigo-jbig2-imageio</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-core</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-core</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>org.webjars</groupId>
@ -88,6 +93,17 @@
<version>3.1.1-1</version>
</dependency>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-native-platform</artifactId>
<version>1.0.0-beta3</version>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-core</artifactId>
<version>1.0.0-beta3</version>
</dependency>
</dependencies>
<build>
@ -116,6 +132,17 @@
<enabled>false</enabled>
</snapshots>
</repository>
<!-- add the elastic repo -->
<repository>
<id>elastic</id>
<url>https://artifacts.elastic.co/maven</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
<pluginRepositories>

View File

@ -10,7 +10,6 @@ import java.nio.file.Paths;
import java.text.Normalizer;
import java.util.HashMap;
import java.util.Map;
import javax.servlet.http.HttpServletResponse;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -88,13 +87,17 @@ public class MainController {
if (archivos.length > 0) {
// Recupera la configuración de Tika
TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
// Si no existe el directorio lo creamos
File fup = new File(uploadpath);
if(!fup.exists() || !fup.isDirectory()) {
fup.mkdir();
}
// Itera los archivos recibidos
for (int i = 0; i < archivos.length; i++) {
byte[] bytes = archivos[i].getBytes();
// Normaliza el título de los archivos
String normalized = Normalizer.normalize(archivos[i].getOriginalFilename(), Normalizer.Form.NFD),
filename = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
Path path = Paths.get(uploadpath + filename);
// Instancias necesarias
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser(tikaConfig);
@ -127,6 +130,7 @@ public class MainController {
return "exists";
} else {
// Guarda el archivo en el directorio configurado en las properties
Path path = Paths.get(uploadpath + filename);
Files.write(path, bytes);
}
// Añade los parámetros al VO para mostrar en la vista

View File

@ -6,7 +6,7 @@ elasticsearch.port=9300
elasticsearch.nodename=arjion
arjion.indexName=documentos
arjion.documentType=documento
arjion.uploadpath=/upload/
arjion.uploadpath=./upload/
arjion.tesseractpath=/usr/bin
arjion.tesseractdatapath=/usr/share/tesseract-ocr
spring.main.allow-bean-definition-overriding=true

View File

@ -2,10 +2,10 @@
"documento": {
"dynamic_templates": [
{
"metadata_as_keywords": {
"metadata_as_dynamic": {
"path_match": "metadata.*",
"mapping": {
"type": "keyword"
"type": "{dynamic_type}"
}
}
}