Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
b2efcb2f44 |
87
pom.xml
87
pom.xml
@ -10,19 +10,19 @@
|
||||
<packaging>war</packaging>
|
||||
|
||||
<name>arjion</name>
|
||||
<description>Demo project of Apache Tika for Spring Boot</description>
|
||||
<description>Demo project of Apache Tika for Spring Boot and ML</description>
|
||||
|
||||
<parent>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-parent</artifactId>
|
||||
<version>2.1.0.BUILD-SNAPSHOT</version>
|
||||
<version>2.1.3.RELEASE</version>
|
||||
<relativePath /> <!-- lookup parent from repository -->
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||
<java.version>1.8</java.version>
|
||||
<java.version>11</java.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
@ -38,6 +38,11 @@
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.elasticsearch.client</groupId>
|
||||
<artifactId>x-pack-transport</artifactId>
|
||||
<version>6.4.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
@ -45,37 +50,37 @@
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.tika/tika-app -->
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-app</artifactId>
|
||||
<version>1.18</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.tika/tika-app -->
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-app</artifactId>
|
||||
<version>1.20</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-parsers</artifactId>
|
||||
<version>1.18</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-parsers</artifactId>
|
||||
<version>1.20</version>
|
||||
</dependency>
|
||||
|
||||
<!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
|
||||
<dependency>
|
||||
<groupId>com.github.jai-imageio</groupId>
|
||||
<artifactId>jai-imageio-jpeg2000</artifactId>
|
||||
<version>1.3.0</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
|
||||
<dependency>
|
||||
<groupId>com.github.jai-imageio</groupId>
|
||||
<artifactId>jai-imageio-jpeg2000</artifactId>
|
||||
<version>1.3.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.levigo.jbig2</groupId>
|
||||
<artifactId>levigo-jbig2-imageio</artifactId>
|
||||
<version>2.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.levigo.jbig2</groupId>
|
||||
<artifactId>levigo-jbig2-imageio</artifactId>
|
||||
<version>2.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.github.jai-imageio</groupId>
|
||||
<artifactId>jai-imageio-core</artifactId>
|
||||
<version>1.4.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.jai-imageio</groupId>
|
||||
<artifactId>jai-imageio-core</artifactId>
|
||||
<version>1.4.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.webjars</groupId>
|
||||
@ -88,6 +93,17 @@
|
||||
<version>3.1.1-1</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.nd4j</groupId>
|
||||
<artifactId>nd4j-native-platform</artifactId>
|
||||
<version>1.0.0-beta3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.deeplearning4j</groupId>
|
||||
<artifactId>deeplearning4j-core</artifactId>
|
||||
<version>1.0.0-beta3</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
@ -116,6 +132,17 @@
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
<!-- add the elastic repo -->
|
||||
<repository>
|
||||
<id>elastic</id>
|
||||
<url>https://artifacts.elastic.co/maven</url>
|
||||
<releases>
|
||||
<enabled>true</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<pluginRepositories>
|
||||
|
@ -10,7 +10,6 @@ import java.nio.file.Paths;
|
||||
import java.text.Normalizer;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
@ -88,13 +87,17 @@ public class MainController {
|
||||
if (archivos.length > 0) {
|
||||
// Recupera la configuración de Tika
|
||||
TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
|
||||
// Si no existe el directorio lo creamos
|
||||
File fup = new File(uploadpath);
|
||||
if(!fup.exists() || !fup.isDirectory()) {
|
||||
fup.mkdir();
|
||||
}
|
||||
// Itera los archivos recibidos
|
||||
for (int i = 0; i < archivos.length; i++) {
|
||||
byte[] bytes = archivos[i].getBytes();
|
||||
// Normaliza el título de los archivos
|
||||
String normalized = Normalizer.normalize(archivos[i].getOriginalFilename(), Normalizer.Form.NFD),
|
||||
filename = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
|
||||
Path path = Paths.get(uploadpath + filename);
|
||||
// Instancias necesarias
|
||||
Metadata metadata = new Metadata();
|
||||
Parser parser = new AutoDetectParser(tikaConfig);
|
||||
@ -127,6 +130,7 @@ public class MainController {
|
||||
return "exists";
|
||||
} else {
|
||||
// Guarda el archivo en el directorio configurado en las properties
|
||||
Path path = Paths.get(uploadpath + filename);
|
||||
Files.write(path, bytes);
|
||||
}
|
||||
// Añade los parámetros al VO para mostrar en la vista
|
||||
|
@ -6,7 +6,7 @@ elasticsearch.port=9300
|
||||
elasticsearch.nodename=arjion
|
||||
arjion.indexName=documentos
|
||||
arjion.documentType=documento
|
||||
arjion.uploadpath=/upload/
|
||||
arjion.uploadpath=./upload/
|
||||
arjion.tesseractpath=/usr/bin
|
||||
arjion.tesseractdatapath=/usr/share/tesseract-ocr
|
||||
spring.main.allow-bean-definition-overriding=true
|
||||
|
@ -2,10 +2,10 @@
|
||||
"documento": {
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"metadata_as_keywords": {
|
||||
"metadata_as_dynamic": {
|
||||
"path_match": "metadata.*",
|
||||
"mapping": {
|
||||
"type": "keyword"
|
||||
"type": "{dynamic_type}"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user