some changes
This commit is contained in:
parent
aa8af0bfd4
commit
b2efcb2f44
87
pom.xml
87
pom.xml
@ -10,19 +10,19 @@
|
|||||||
<packaging>war</packaging>
|
<packaging>war</packaging>
|
||||||
|
|
||||||
<name>arjion</name>
|
<name>arjion</name>
|
||||||
<description>Demo project of Apache Tika for Spring Boot</description>
|
<description>Demo project of Apache Tika for Spring Boot and ML</description>
|
||||||
|
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-parent</artifactId>
|
<artifactId>spring-boot-starter-parent</artifactId>
|
||||||
<version>2.1.0.BUILD-SNAPSHOT</version>
|
<version>2.1.3.RELEASE</version>
|
||||||
<relativePath /> <!-- lookup parent from repository -->
|
<relativePath /> <!-- lookup parent from repository -->
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||||
<java.version>1.8</java.version>
|
<java.version>11</java.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
@ -38,6 +38,11 @@
|
|||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-web</artifactId>
|
<artifactId>spring-boot-starter-web</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.elasticsearch.client</groupId>
|
||||||
|
<artifactId>x-pack-transport</artifactId>
|
||||||
|
<version>6.4.2</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
@ -45,37 +50,37 @@
|
|||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/org.apache.tika/tika-app -->
|
<!-- https://mvnrepository.com/artifact/org.apache.tika/tika-app -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.tika</groupId>
|
<groupId>org.apache.tika</groupId>
|
||||||
<artifactId>tika-app</artifactId>
|
<artifactId>tika-app</artifactId>
|
||||||
<version>1.18</version>
|
<version>1.20</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.tika</groupId>
|
<groupId>org.apache.tika</groupId>
|
||||||
<artifactId>tika-parsers</artifactId>
|
<artifactId>tika-parsers</artifactId>
|
||||||
<version>1.18</version>
|
<version>1.20</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
|
<!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.jai-imageio</groupId>
|
<groupId>com.github.jai-imageio</groupId>
|
||||||
<artifactId>jai-imageio-jpeg2000</artifactId>
|
<artifactId>jai-imageio-jpeg2000</artifactId>
|
||||||
<version>1.3.0</version>
|
<version>1.3.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.levigo.jbig2</groupId>
|
<groupId>com.levigo.jbig2</groupId>
|
||||||
<artifactId>levigo-jbig2-imageio</artifactId>
|
<artifactId>levigo-jbig2-imageio</artifactId>
|
||||||
<version>2.0</version>
|
<version>2.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.jai-imageio</groupId>
|
<groupId>com.github.jai-imageio</groupId>
|
||||||
<artifactId>jai-imageio-core</artifactId>
|
<artifactId>jai-imageio-core</artifactId>
|
||||||
<version>1.4.0</version>
|
<version>1.4.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.webjars</groupId>
|
<groupId>org.webjars</groupId>
|
||||||
@ -88,6 +93,17 @@
|
|||||||
<version>3.1.1-1</version>
|
<version>3.1.1-1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.nd4j</groupId>
|
||||||
|
<artifactId>nd4j-native-platform</artifactId>
|
||||||
|
<version>1.0.0-beta3</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.deeplearning4j</groupId>
|
||||||
|
<artifactId>deeplearning4j-core</artifactId>
|
||||||
|
<version>1.0.0-beta3</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
@ -116,6 +132,17 @@
|
|||||||
<enabled>false</enabled>
|
<enabled>false</enabled>
|
||||||
</snapshots>
|
</snapshots>
|
||||||
</repository>
|
</repository>
|
||||||
|
<!-- add the elastic repo -->
|
||||||
|
<repository>
|
||||||
|
<id>elastic</id>
|
||||||
|
<url>https://artifacts.elastic.co/maven</url>
|
||||||
|
<releases>
|
||||||
|
<enabled>true</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</snapshots>
|
||||||
|
</repository>
|
||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
<pluginRepositories>
|
<pluginRepositories>
|
||||||
|
@ -10,7 +10,6 @@ import java.nio.file.Paths;
|
|||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletResponse;
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
@ -88,13 +87,17 @@ public class MainController {
|
|||||||
if (archivos.length > 0) {
|
if (archivos.length > 0) {
|
||||||
// Recupera la configuración de Tika
|
// Recupera la configuración de Tika
|
||||||
TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
|
TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
|
||||||
|
// Si no existe el directorio lo creamos
|
||||||
|
File fup = new File(uploadpath);
|
||||||
|
if(!fup.exists() || !fup.isDirectory()) {
|
||||||
|
fup.mkdir();
|
||||||
|
}
|
||||||
// Itera los archivos recibidos
|
// Itera los archivos recibidos
|
||||||
for (int i = 0; i < archivos.length; i++) {
|
for (int i = 0; i < archivos.length; i++) {
|
||||||
byte[] bytes = archivos[i].getBytes();
|
byte[] bytes = archivos[i].getBytes();
|
||||||
// Normaliza el título de los archivos
|
// Normaliza el título de los archivos
|
||||||
String normalized = Normalizer.normalize(archivos[i].getOriginalFilename(), Normalizer.Form.NFD),
|
String normalized = Normalizer.normalize(archivos[i].getOriginalFilename(), Normalizer.Form.NFD),
|
||||||
filename = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
|
filename = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
|
||||||
Path path = Paths.get(uploadpath + filename);
|
|
||||||
// Instancias necesarias
|
// Instancias necesarias
|
||||||
Metadata metadata = new Metadata();
|
Metadata metadata = new Metadata();
|
||||||
Parser parser = new AutoDetectParser(tikaConfig);
|
Parser parser = new AutoDetectParser(tikaConfig);
|
||||||
@ -127,6 +130,7 @@ public class MainController {
|
|||||||
return "exists";
|
return "exists";
|
||||||
} else {
|
} else {
|
||||||
// Guarda el archivo en el directorio configurado en las properties
|
// Guarda el archivo en el directorio configurado en las properties
|
||||||
|
Path path = Paths.get(uploadpath + filename);
|
||||||
Files.write(path, bytes);
|
Files.write(path, bytes);
|
||||||
}
|
}
|
||||||
// Añade los parámetros al VO para mostrar en la vista
|
// Añade los parámetros al VO para mostrar en la vista
|
||||||
|
@ -6,7 +6,7 @@ elasticsearch.port=9300
|
|||||||
elasticsearch.nodename=arjion
|
elasticsearch.nodename=arjion
|
||||||
arjion.indexName=documentos
|
arjion.indexName=documentos
|
||||||
arjion.documentType=documento
|
arjion.documentType=documento
|
||||||
arjion.uploadpath=/upload/
|
arjion.uploadpath=./upload/
|
||||||
arjion.tesseractpath=/usr/bin
|
arjion.tesseractpath=/usr/bin
|
||||||
arjion.tesseractdatapath=/usr/share/tesseract-ocr
|
arjion.tesseractdatapath=/usr/share/tesseract-ocr
|
||||||
spring.main.allow-bean-definition-overriding=true
|
spring.main.allow-bean-definition-overriding=true
|
||||||
|
@ -2,10 +2,10 @@
|
|||||||
"documento": {
|
"documento": {
|
||||||
"dynamic_templates": [
|
"dynamic_templates": [
|
||||||
{
|
{
|
||||||
"metadata_as_keywords": {
|
"metadata_as_dynamic": {
|
||||||
"path_match": "metadata.*",
|
"path_match": "metadata.*",
|
||||||
"mapping": {
|
"mapping": {
|
||||||
"type": "keyword"
|
"type": "{dynamic_type}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user