diff --git a/pom.xml b/pom.xml index 2166330..a3b56ff 100644 --- a/pom.xml +++ b/pom.xml @@ -10,19 +10,19 @@ war arjion - Demo project of Apache Tika for Spring Boot + Demo project of Apache Tika for Spring Boot and ML org.springframework.boot spring-boot-starter-parent - 2.1.0.BUILD-SNAPSHOT + 2.1.3.RELEASE UTF-8 UTF-8 - 1.8 + 11 @@ -38,6 +38,11 @@ org.springframework.boot spring-boot-starter-web + + org.elasticsearch.client + x-pack-transport + 6.4.2 + org.springframework.boot @@ -45,38 +50,38 @@ test - - - org.apache.tika - tika-app - 1.18 - + + + org.apache.tika + tika-app + 1.20 + - - org.apache.tika - tika-parsers - 1.18 - + + org.apache.tika + tika-parsers + 1.20 + - - - com.github.jai-imageio - jai-imageio-jpeg2000 - 1.3.0 - + + + com.github.jai-imageio + jai-imageio-jpeg2000 + 1.3.0 + - - com.levigo.jbig2 - levigo-jbig2-imageio - 2.0 - + + com.levigo.jbig2 + levigo-jbig2-imageio + 2.0 + + + + com.github.jai-imageio + jai-imageio-core + 1.4.0 + - - com.github.jai-imageio - jai-imageio-core - 1.4.0 - - org.webjars bootstrap @@ -88,6 +93,17 @@ 3.1.1-1 + + org.nd4j + nd4j-native-platform + 1.0.0-beta3 + + + org.deeplearning4j + deeplearning4j-core + 1.0.0-beta3 + + @@ -116,6 +132,17 @@ false + + + elastic + https://artifacts.elastic.co/maven + + true + + + false + + diff --git a/src/main/java/com/manalejandro/arjion/controllers/MainController.java b/src/main/java/com/manalejandro/arjion/controllers/MainController.java index a15d62b..a12b8ef 100644 --- a/src/main/java/com/manalejandro/arjion/controllers/MainController.java +++ b/src/main/java/com/manalejandro/arjion/controllers/MainController.java @@ -10,7 +10,6 @@ import java.nio.file.Paths; import java.text.Normalizer; import java.util.HashMap; import java.util.Map; - import javax.servlet.http.HttpServletResponse; import com.fasterxml.jackson.databind.ObjectMapper; @@ -88,13 +87,17 @@ public class MainController { if (archivos.length > 0) { // Recupera la configuración de Tika TikaConfig tikaConfig = TikaConfig.getDefaultConfig(); + // Si no existe el directorio lo creamos + File fup = new File(uploadpath); + if(!fup.exists() || !fup.isDirectory()) { + fup.mkdir(); + } // Itera los archivos recibidos for (int i = 0; i < archivos.length; i++) { byte[] bytes = archivos[i].getBytes(); // Normaliza el título de los archivos String normalized = Normalizer.normalize(archivos[i].getOriginalFilename(), Normalizer.Form.NFD), filename = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); - Path path = Paths.get(uploadpath + filename); // Instancias necesarias Metadata metadata = new Metadata(); Parser parser = new AutoDetectParser(tikaConfig); @@ -127,6 +130,7 @@ public class MainController { return "exists"; } else { // Guarda el archivo en el directorio configurado en las properties + Path path = Paths.get(uploadpath + filename); Files.write(path, bytes); } // Añade los parámetros al VO para mostrar en la vista diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index d57275f..6a7fc94 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -6,7 +6,7 @@ elasticsearch.port=9300 elasticsearch.nodename=arjion arjion.indexName=documentos arjion.documentType=documento -arjion.uploadpath=/upload/ +arjion.uploadpath=./upload/ arjion.tesseractpath=/usr/bin arjion.tesseractdatapath=/usr/share/tesseract-ocr spring.main.allow-bean-definition-overriding=true diff --git a/src/main/resources/elasticsearch/mapping.json b/src/main/resources/elasticsearch/mapping.json index f5d5753..3ae45c7 100644 --- a/src/main/resources/elasticsearch/mapping.json +++ b/src/main/resources/elasticsearch/mapping.json @@ -2,10 +2,10 @@ "documento": { "dynamic_templates": [ { - "metadata_as_keywords": { + "metadata_as_dynamic": { "path_match": "metadata.*", "mapping": { - "type": "keyword" + "type": "{dynamic_type}" } } }