Compare commits

...

1 Commits
master ... dl4j

Author SHA1 Message Date
ale
b2efcb2f44 some changes 2019-02-20 03:24:51 +01:00
4 changed files with 67 additions and 36 deletions

37
pom.xml
View File

@ -10,19 +10,19 @@
<packaging>war</packaging> <packaging>war</packaging>
<name>arjion</name> <name>arjion</name>
<description>Demo project of Apache Tika for Spring Boot</description> <description>Demo project of Apache Tika for Spring Boot and ML</description>
<parent> <parent>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId> <artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.0.BUILD-SNAPSHOT</version> <version>2.1.3.RELEASE</version>
<relativePath /> <!-- lookup parent from repository --> <relativePath /> <!-- lookup parent from repository -->
</parent> </parent>
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.8</java.version> <java.version>11</java.version>
</properties> </properties>
<dependencies> <dependencies>
@ -38,6 +38,11 @@
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId> <artifactId>spring-boot-starter-web</artifactId>
</dependency> </dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>x-pack-transport</artifactId>
<version>6.4.2</version>
</dependency>
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
@ -49,13 +54,13 @@
<dependency> <dependency>
<groupId>org.apache.tika</groupId> <groupId>org.apache.tika</groupId>
<artifactId>tika-app</artifactId> <artifactId>tika-app</artifactId>
<version>1.18</version> <version>1.20</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.tika</groupId> <groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId> <artifactId>tika-parsers</artifactId>
<version>1.18</version> <version>1.20</version>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 --> <!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
@ -88,6 +93,17 @@
<version>3.1.1-1</version> <version>3.1.1-1</version>
</dependency> </dependency>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-native-platform</artifactId>
<version>1.0.0-beta3</version>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-core</artifactId>
<version>1.0.0-beta3</version>
</dependency>
</dependencies> </dependencies>
<build> <build>
@ -116,6 +132,17 @@
<enabled>false</enabled> <enabled>false</enabled>
</snapshots> </snapshots>
</repository> </repository>
<!-- add the elastic repo -->
<repository>
<id>elastic</id>
<url>https://artifacts.elastic.co/maven</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories> </repositories>
<pluginRepositories> <pluginRepositories>

View File

@ -10,7 +10,6 @@ import java.nio.file.Paths;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
@ -88,13 +87,17 @@ public class MainController {
if (archivos.length > 0) { if (archivos.length > 0) {
// Recupera la configuración de Tika // Recupera la configuración de Tika
TikaConfig tikaConfig = TikaConfig.getDefaultConfig(); TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
// Si no existe el directorio lo creamos
File fup = new File(uploadpath);
if(!fup.exists() || !fup.isDirectory()) {
fup.mkdir();
}
// Itera los archivos recibidos // Itera los archivos recibidos
for (int i = 0; i < archivos.length; i++) { for (int i = 0; i < archivos.length; i++) {
byte[] bytes = archivos[i].getBytes(); byte[] bytes = archivos[i].getBytes();
// Normaliza el título de los archivos // Normaliza el título de los archivos
String normalized = Normalizer.normalize(archivos[i].getOriginalFilename(), Normalizer.Form.NFD), String normalized = Normalizer.normalize(archivos[i].getOriginalFilename(), Normalizer.Form.NFD),
filename = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); filename = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
Path path = Paths.get(uploadpath + filename);
// Instancias necesarias // Instancias necesarias
Metadata metadata = new Metadata(); Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser(tikaConfig); Parser parser = new AutoDetectParser(tikaConfig);
@ -127,6 +130,7 @@ public class MainController {
return "exists"; return "exists";
} else { } else {
// Guarda el archivo en el directorio configurado en las properties // Guarda el archivo en el directorio configurado en las properties
Path path = Paths.get(uploadpath + filename);
Files.write(path, bytes); Files.write(path, bytes);
} }
// Añade los parámetros al VO para mostrar en la vista // Añade los parámetros al VO para mostrar en la vista

View File

@ -6,7 +6,7 @@ elasticsearch.port=9300
elasticsearch.nodename=arjion elasticsearch.nodename=arjion
arjion.indexName=documentos arjion.indexName=documentos
arjion.documentType=documento arjion.documentType=documento
arjion.uploadpath=/upload/ arjion.uploadpath=./upload/
arjion.tesseractpath=/usr/bin arjion.tesseractpath=/usr/bin
arjion.tesseractdatapath=/usr/share/tesseract-ocr arjion.tesseractdatapath=/usr/share/tesseract-ocr
spring.main.allow-bean-definition-overriding=true spring.main.allow-bean-definition-overriding=true

View File

@ -2,10 +2,10 @@
"documento": { "documento": {
"dynamic_templates": [ "dynamic_templates": [
{ {
"metadata_as_keywords": { "metadata_as_dynamic": {
"path_match": "metadata.*", "path_match": "metadata.*",
"mapping": { "mapping": {
"type": "keyword" "type": "{dynamic_type}"
} }
} }
} }