Compare commits

..

12 Commits

Author SHA1 Message Date
manalejandro
bcacdd1cef search service 2018-07-23 02:02:07 +02:00
manalejandro
aa8af0bfd4 v0.3.0 2018-07-21 15:59:42 +02:00
manalejandro
3f5718522f Dockerfile 2018-07-17 00:51:30 +02:00
manalejandro
72c57f5d31 Dockerfile 2018-07-17 00:08:29 +02:00
manalejandro
c7af93d069 Dockerfile 2018-07-16 23:40:08 +02:00
manalejandro
01f386732d tesseract context trouble 2018-07-16 21:28:08 +02:00
manalejandro
d99f758e41 tesseract language 2018-07-16 15:57:10 +02:00
manalejandro
0d837b5a42 tesseract language 2018-07-15 21:59:21 +02:00
manalejandro
81bb011a34 tesseract ocr 2018-07-15 20:02:24 +02:00
manalejandro
2597cbeee5 some changes 2018-07-15 18:03:58 +02:00
manalejandro
d4f77698c7 jai-imageio-jpeg2000 dependency 2018-07-15 15:21:48 +02:00
manalejandro
93f951ca22 download filename 2018-07-15 04:30:38 +02:00
15 changed files with 269 additions and 47 deletions

15
Dockerfile Normal file
View File

@ -0,0 +1,15 @@
FROM debian:stable-slim
RUN apt-get update
RUN apt-get -y upgrade
RUN mkdir /upload /usr/share/man/man1 /usr/share/man/man8
RUN apt-get -y install --no-install-recommends apt apt-transport-https apt-utils readline-common curl gnupg software-properties-common dirmngr openjdk-8-jdk procps
RUN echo "deb https://artifacts.elastic.co/packages/6.x/apt stable main" > /etc/apt/sources.list.d/elastic-6.x.list
RUN apt-key adv --recv-keys D27D666CD88E42B4
RUN apt-get update
RUN apt-get -y install --no-install-recommends maven tesseract-ocr tesseract-ocr-spa elasticsearch git
RUN sed -i "s/#cluster.name: my-application/cluster.name: elasticsearch/" /etc/elasticsearch/elasticsearch.yml
RUN git clone https://gitlab.com/manalejandro/arjion
RUN mvn clean install -f /arjion/pom.xml
RUN echo "/etc/init.d/elasticsearch start && mvn spring-boot:run -f /arjion/pom.xml" > entrypoint.sh
EXPOSE 8080:8080
ENTRYPOINT ["bash", "entrypoint.sh"]

View File

@ -4,6 +4,11 @@
### Proof of Concept with [SpringBoot 2.1.0](https://start.spring.io/), [ElasticSearch](https://www.elastic.co/) and [Apache Tika](https://tika.apache.org/) ### Proof of Concept with [SpringBoot 2.1.0](https://start.spring.io/), [ElasticSearch](https://www.elastic.co/) and [Apache Tika](https://tika.apache.org/)
## Docker image
$ docker build -t debian:arjion --rm https://gitlab.com/manalejandro/arjion/raw/master/Dockerfile
$ docker run -ti -p 8080:8080 debian:arjion
## License ## License
MIT MIT

25
pom.xml
View File

@ -52,6 +52,31 @@
<version>1.18</version> <version>1.18</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.18</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-jpeg2000</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>com.levigo.jbig2</groupId>
<artifactId>levigo-jbig2-imageio</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-core</artifactId>
<version>1.4.0</version>
</dependency>
<dependency> <dependency>
<groupId>org.webjars</groupId> <groupId>org.webjars</groupId>
<artifactId>bootstrap</artifactId> <artifactId>bootstrap</artifactId>

View File

@ -3,14 +3,17 @@ package com.manalejandro.arjion.controllers;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URLEncoder;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.ArrayList; import java.util.HashMap;
import java.util.Map;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.manalejandro.arjion.model.Archivo; import com.manalejandro.arjion.model.Archivo;
import com.manalejandro.arjion.model.Documento; import com.manalejandro.arjion.model.Documento;
import com.manalejandro.arjion.services.MainService; import com.manalejandro.arjion.services.MainService;
@ -24,12 +27,14 @@ import org.apache.tika.language.LanguageIdentifier;
import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler; import org.apache.tika.sax.BodyContentHandler;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.ByteArrayResource; import org.springframework.core.io.ByteArrayResource;
import org.springframework.http.HttpHeaders; import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller; import org.springframework.stereotype.Controller;
import org.springframework.ui.Model; import org.springframework.ui.Model;
@ -49,6 +54,12 @@ public class MainController {
@Value("${arjion.uploadpath}") @Value("${arjion.uploadpath}")
private String uploadpath; private String uploadpath;
@Value("${arjion.tesseractpath}")
private String tesseractpath;
@Value("${arjion.tesseractdatapath}")
private String tesseractdatapath;
@Autowired @Autowired
public MainController(MainService mainService) { public MainController(MainService mainService) {
this.mainService = mainService; this.mainService = mainService;
@ -75,8 +86,7 @@ public class MainController {
documentoVO.setCount(mainService.count()); documentoVO.setCount(mainService.count());
documentoVO.setDocumentos(mainService.findAllDocumento()); documentoVO.setDocumentos(mainService.findAllDocumento());
if (archivos.length > 0) { if (archivos.length > 0) {
documentoVO.setArchivos(new ArrayList<Archivo>()); // Recupera la configuración de Tika
// Recupera la conficuración de Tika
TikaConfig tikaConfig = TikaConfig.getDefaultConfig(); TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
// Itera los archivos recibidos // Itera los archivos recibidos
for (int i = 0; i < archivos.length; i++) { for (int i = 0; i < archivos.length; i++) {
@ -87,26 +97,41 @@ public class MainController {
Path path = Paths.get(uploadpath + filename); Path path = Paths.get(uploadpath + filename);
// Instancias necesarias // Instancias necesarias
Metadata metadata = new Metadata(); Metadata metadata = new Metadata();
AutoDetectParser parser = new AutoDetectParser(tikaConfig); Parser parser = new AutoDetectParser(tikaConfig);
PDFParserConfig pdfConfig = new PDFParserConfig();
TesseractOCRConfig tesseractConfig = new TesseractOCRConfig();
tesseractConfig.setTesseractPath(tesseractpath);
tesseractConfig.setTessdataPath(tesseractdatapath);
tesseractConfig.setLanguage("spa+eng");
pdfConfig.setExtractInlineImages(true);
ParseContext parseContext = new ParseContext();
parseContext.set(TesseractOCRConfig.class, tesseractConfig);
parseContext.set(PDFParserConfig.class, pdfConfig);
// Usa -1 para no tener límite de 100000 chars // Usa -1 para no tener límite de 100000 chars
ContentHandler handler = new BodyContentHandler(-1); ContentHandler handler = new BodyContentHandler(-1);
// Castea los bytes al Stream de Tika // Castea los bytes al Stream de Tika
TikaInputStream stream = TikaInputStream.get(bytes); TikaInputStream stream = TikaInputStream.get(bytes);
// Parsea el contenido // Parsea el contenido
parser.parse(stream, handler, metadata, new ParseContext()); parser.parse(stream, handler, metadata, parseContext);
// Identifica el idioma del archivo // Identifica el idioma del archivo
LanguageIdentifier identifier = new LanguageIdentifier(handler.toString()); LanguageIdentifier identifier = new LanguageIdentifier(handler.toString());
// Almacena en elasticsearch // Almacena en elasticsearch
String[] names = metadata.names();
Map<String, String> meta = new HashMap<String, String>();
for (int j = 0; j < names.length; j++) {
meta.put(names[j], metadata.get(names[j]));
}
ObjectMapper mapper = new ObjectMapper();
if (!mainService.save(new Documento(filename, Long.valueOf(archivos[i].getSize()).intValue(), if (!mainService.save(new Documento(filename, Long.valueOf(archivos[i].getSize()).intValue(),
metadata.toString(), handler.toString(), identifier.getLanguage()))) { mapper.valueToTree(meta), handler.toString(), identifier.getLanguage()))) {
return "exists"; return "exists";
} else { } else {
// Guarda el archivo en el directorio configurado en las properties // Guarda el archivo en el directorio configurado en las properties
Files.write(path, bytes); Files.write(path, bytes);
} }
// Añade los parámetros al VO para mostrar en la vista // Añade los parámetros al VO para mostrar en la vista
documentoVO.getArchivos().add(new Archivo(filename, String.valueOf(archivos[i].getSize()), documentoVO.getArchivos().add(new Archivo(filename, Long.valueOf(archivos[i].getSize()).intValue(), meta,
metadata.toString(), handler.toString(), identifier.getLanguage())); handler.toString(), identifier.getLanguage()));
} }
} }
model.addAttribute("documentoVO", documentoVO); model.addAttribute("documentoVO", documentoVO);
@ -116,7 +141,10 @@ public class MainController {
@GetMapping(path = "/detail") @GetMapping(path = "/detail")
public String detail(final Model model, @RequestParam(value = "nombre", required = true) String nombre) { public String detail(final Model model, @RequestParam(value = "nombre", required = true) String nombre) {
DetailVO detailVO = new DetailVO(); DetailVO detailVO = new DetailVO();
detailVO.setDocumento(mainService.findOne(nombre)); ObjectMapper mapper = new ObjectMapper();
Documento doc = mainService.findOne(nombre);
detailVO.setArchivo(new Archivo(doc.getNombre(), doc.getTamano(),
mapper.convertValue(doc.getMetadata(), Map.class), doc.getContenido(), doc.getLenguaje()));
model.addAttribute("detailVO", detailVO); model.addAttribute("detailVO", detailVO);
return "detail"; return "detail";
} }
@ -130,9 +158,8 @@ public class MainController {
ByteArrayResource resource = new ByteArrayResource(Files.readAllBytes(path)); ByteArrayResource resource = new ByteArrayResource(Files.readAllBytes(path));
String type = file.toURL().openConnection().guessContentTypeFromName(filename); String type = file.toURL().openConnection().guessContentTypeFromName(filename);
HttpHeaders responseHeaders = new HttpHeaders(); HttpHeaders responseHeaders = new HttpHeaders();
responseHeaders.add("content-disposition", "attachment; filename=" + filename); responseHeaders.add("Content-Disposition", "attachment; filename=" + URLEncoder.encode(filename, "UTF-8"));
responseHeaders.add("Content-Type", type); responseHeaders.add("Content-Type", type);
return ResponseEntity.ok().contentLength(file.length()).headers(responseHeaders) return ResponseEntity.ok().contentLength(file.length()).headers(responseHeaders).body(resource);
.contentType(MediaType.parseMediaType("application/octet-stream")).body(resource);
} }
} }

View File

@ -1,14 +1,16 @@
package com.manalejandro.arjion.model; package com.manalejandro.arjion.model;
import java.util.Map;
public class Archivo { public class Archivo {
private String nombre; private String nombre;
private String tamano; private Integer tamano;
private String metadata; private Map metadata;
private String contenido; private String contenido;
private String lenguaje; private String lenguaje;
public Archivo(String nombre, String tamano, String metadata, String contenido, String lenguaje) { public Archivo(String nombre, Integer tamano, Map metadata, String contenido, String lenguaje) {
this.nombre = nombre; this.nombre = nombre;
this.tamano = tamano; this.tamano = tamano;
this.metadata = metadata; this.metadata = metadata;
@ -26,14 +28,14 @@ public class Archivo {
/** /**
* @return the tamano * @return the tamano
*/ */
public String getTamano() { public Integer getTamano() {
return tamano; return tamano;
} }
/** /**
* @return the metadata * @return the metadata
*/ */
public String getMetadata() { public Map getMetadata() {
return metadata; return metadata;
} }
@ -54,14 +56,14 @@ public class Archivo {
/** /**
* @param tamano the tamano to set * @param tamano the tamano to set
*/ */
public void setTamano(String tamano) { public void setTamano(Integer tamano) {
this.tamano = tamano; this.tamano = tamano;
} }
/** /**
* @param metadata the metadata to set * @param metadata the metadata to set
*/ */
public void setMetadata(String metadata) { public void setMetadata(Map metadata) {
this.metadata = metadata; this.metadata = metadata;
} }

View File

@ -0,0 +1,52 @@
package com.manalejandro.arjion.model;
import java.util.ArrayList;
import java.util.List;
public class Consulta {
private List<Documento> documentos = new ArrayList<Documento>();
private String suggest;
private List<String> autocomplete = new ArrayList<String>();
/**
* @return the documentos
*/
public List<Documento> getDocumentos() {
return documentos;
}
/**
* @return the suggest
*/
public String getSuggest() {
return suggest;
}
/**
* @return the autocomplete
*/
public List<String> getAutocomplete() {
return autocomplete;
}
/**
* @param documentos the documentos to set
*/
public void setDocumentos(List<Documento> documentos) {
this.documentos = documentos;
}
/**
* @param suggest the suggest to set
*/
public void setSuggest(String suggest) {
this.suggest = suggest;
}
/**
* @param autocomplete the autocomplete to set
*/
public void setAutocomplete(List<String> autocomplete) {
this.autocomplete = autocomplete;
}
}

View File

@ -1,13 +1,14 @@
package com.manalejandro.arjion.model; package com.manalejandro.arjion.model;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.JsonNode;
import org.springframework.data.annotation.Id; import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.Document; import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Mapping; import org.springframework.data.elasticsearch.annotations.Mapping;
import org.springframework.data.elasticsearch.annotations.Setting; import org.springframework.data.elasticsearch.annotations.Setting;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
@Document(indexName = "#{@indexName}", type = "#{@documentType}") @Document(indexName = "#{@indexName}", type = "#{@documentType}")
@Setting(settingPath = "/elasticsearch/settings.json") @Setting(settingPath = "/elasticsearch/settings.json")
@Mapping(mappingPath = "/elasticsearch/mapping.json") @Mapping(mappingPath = "/elasticsearch/mapping.json")
@ -15,15 +16,14 @@ public class Documento {
@Id @Id
public String nombre; public String nombre;
public Integer tamano; public Integer tamano;
public String metadata; public JsonNode metadata;
public String contenido; public String contenido;
public String lenguaje; public String lenguaje;
@JsonCreator @JsonCreator
public Documento(@JsonProperty("nombre") String nombre, @JsonProperty("tamano") Integer tamano, public Documento(@JsonProperty("nombre") String nombre, @JsonProperty("tamano") Integer tamano,
@JsonProperty("metadata") String metadata, @JsonProperty("contenido") String contenido, @JsonProperty("metadata") JsonNode metadata, @JsonProperty("contenido") String contenido,
@JsonProperty("lenguaje") String lenguaje) { @JsonProperty("lenguaje") String lenguaje) {
super();
this.nombre = nombre; this.nombre = nombre;
this.tamano = tamano; this.tamano = tamano;
this.metadata = metadata; this.metadata = metadata;
@ -65,14 +65,14 @@ public class Documento {
* @return the metadata * @return the metadata
*/ */
@JsonProperty("metadata") @JsonProperty("metadata")
public String getMetadata() { public JsonNode getMetadata() {
return metadata; return metadata;
} }
/** /**
* @param metadata the metadata to set * @param metadata the metadata to set
*/ */
public void setMetadata(String metadata) { public void setMetadata(JsonNode metadata) {
this.metadata = metadata; this.metadata = metadata;
} }

View File

@ -2,8 +2,11 @@ package com.manalejandro.arjion.services;
import java.util.List; import java.util.List;
import com.manalejandro.arjion.model.Consulta;
import com.manalejandro.arjion.model.Documento; import com.manalejandro.arjion.model.Documento;
import org.springframework.data.domain.Pageable;
public interface MainService { public interface MainService {
public boolean save(Documento doc); public boolean save(Documento doc);
@ -13,4 +16,8 @@ public interface MainService {
public List<Documento> findAllDocumento(); public List<Documento> findAllDocumento();
public Documento findOne(String nombre); public Documento findOne(String nombre);
public Integer maxTamano();
public Consulta search(String busqueda, String[] tipo, Integer tamano, Pageable pageable);
} }

View File

@ -1,22 +1,46 @@
package com.manalejandro.arjion.services; package com.manalejandro.arjion.services;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.manalejandro.arjion.model.Consulta;
import com.manalejandro.arjion.model.Documento; import com.manalejandro.arjion.model.Documento;
import com.manalejandro.arjion.repositories.MainRepository; import com.manalejandro.arjion.repositories.MainRepository;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.SuggestBuilders;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationContext;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@Service @Service
public class MainServiceImpl implements MainService { public class MainServiceImpl implements MainService {
private final ApplicationContext appContext;
private final MainRepository mainRepository; private final MainRepository mainRepository;
@Value("#{@indexName}")
private String index;
@Value("#{@documentType}")
private String document;
@Autowired @Autowired
public MainServiceImpl(MainRepository mainRepository) { public MainServiceImpl(MainRepository mainRepository, ApplicationContext appContext) {
this.mainRepository = mainRepository; this.mainRepository = mainRepository;
this.appContext = appContext;
} }
@Override @Override
@ -48,4 +72,56 @@ public class MainServiceImpl implements MainService {
public Documento findOne(String nombre) { public Documento findOne(String nombre) {
return mainRepository.findById(nombre).get(); return mainRepository.findById(nombre).get();
} }
@Override
public Integer maxTamano() {
return mainRepository.findAll(new Sort(Sort.Direction.DESC, "tamano")).iterator().next().getTamano();
}
@Override
public Consulta search(String busqueda, String[] tipo, Integer tamano, Pageable pageable) {
Client client = (Client) appContext.getBean("client");
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
if (busqueda != null && !"null".equals(busqueda) && !busqueda.isEmpty()) {
boolQueryBuilder.must(QueryBuilders.matchQuery("nombre", busqueda));
boolQueryBuilder.should(QueryBuilders.matchQuery("contenido", busqueda));
}
if (tipo != null && tipo.length > 0)
boolQueryBuilder.filter(QueryBuilders.termsQuery("tipo", tipo));
if (tamano != null && tamano >= 0)
boolQueryBuilder.must(QueryBuilders.rangeQuery("tamano").to(tamano).includeUpper(true));
AggregationBuilder aggregation = AggregationBuilders.terms("by_xarchivo").field("x_archivo").size(10000);
SuggestBuilder suggest = new SuggestBuilder()
.addSuggestion("suggest", SuggestBuilders.completionSuggestion("nombre").text(busqueda).size(10))
.addSuggestion("phrase", SuggestBuilders.phraseSuggestion("nombre").text(busqueda).size(1)
.realWordErrorLikelihood((float) 0.95).maxErrors((float) 0.5).gramSize(2));
System.out.println(boolQueryBuilder);
SearchResponse response = client.prepareSearch(index).setQuery(boolQueryBuilder).addAggregation(aggregation)
.suggest(suggest).setSize(pageable.getPageSize()).setFrom(pageable.getPageNumber()).execute()
.actionGet();
Consulta consulta = new Consulta();
consulta.setSuggest(response.getSuggest().getSuggestion("phrase").getEntries().get(0).getOptions().size() > 0
? response.getSuggest().getSuggestion("phrase").getEntries().get(0).getOptions().get(0).getText()
.string()
: "");
for (Entry<? extends Option> entry : response.getSuggest().getSuggestion("suggest").getEntries()) {
entry.getOptions().forEach(option -> {
String suggestText = option.getText().string().trim(),
autocompleteClean = busqueda.replaceAll("[^\\p{Alnum}\\p{IsAlphabetic} ]", "");
for (String item : autocompleteClean.split(" ")) {
if (item.length() > 0) {
consulta.getAutocomplete().add(
suggestText.replaceAll("(?i)((?!<)" + item + "(?![^<>]*>))", "<strong>$1</strong>"));
}
}
});
}
ObjectMapper mapper = new ObjectMapper();
try {
consulta.setDocumentos(mapper.readValue(response.getHits().getHits().toString(), List.class));
} catch (IOException e) {
e.printStackTrace();
}
return consulta;
}
} }

View File

@ -1,22 +1,22 @@
package com.manalejandro.arjion.vo; package com.manalejandro.arjion.vo;
import com.manalejandro.arjion.model.Documento; import com.manalejandro.arjion.model.Archivo;
public class DetailVO { public class DetailVO {
private Documento documento; private Archivo archivo;
/** /**
* @return the documento * @return the archivo
*/ */
public Documento getDocumento() { public Archivo getArchivo() {
return documento; return archivo;
} }
/** /**
* @param documento the documento to set * @param archivo the archivo to set
*/ */
public void setDocumento(Documento documento) { public void setArchivo(Archivo archivo) {
this.documento = documento; this.archivo = archivo;
} }
} }

View File

@ -1,5 +1,6 @@
package com.manalejandro.arjion.vo; package com.manalejandro.arjion.vo;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import com.manalejandro.arjion.model.Archivo; import com.manalejandro.arjion.model.Archivo;
@ -7,9 +8,9 @@ import com.manalejandro.arjion.model.Documento;
public class DocumentoVO { public class DocumentoVO {
private List<Archivo> archivos; private List<Archivo> archivos = new ArrayList<Archivo>();
private long count; private long count;
private List<Documento> documentos; private List<Documento> documentos = new ArrayList<Documento>();
/** /**
* @return the archivos * @return the archivos

View File

@ -7,6 +7,8 @@ elasticsearch.nodename=arjion
arjion.indexName=documentos arjion.indexName=documentos
arjion.documentType=documento arjion.documentType=documento
arjion.uploadpath=/upload/ arjion.uploadpath=/upload/
arjion.tesseractpath=/usr/bin
arjion.tesseractdatapath=/usr/share/tesseract-ocr
spring.main.allow-bean-definition-overriding=true spring.main.allow-bean-definition-overriding=true
spring.thymeleaf.enabled=true spring.thymeleaf.enabled=true
spring.thymeleaf.prefix=classpath:/templates/ spring.thymeleaf.prefix=classpath:/templates/

View File

@ -1,5 +1,15 @@
{ {
"documento": { "documento": {
"dynamic_templates": [
{
"metadata_as_keywords": {
"path_match": "metadata.*",
"mapping": {
"type": "keyword"
}
}
}
],
"properties": { "properties": {
"@timestamp": { "@timestamp": {
"type": "date", "type": "date",
@ -15,7 +25,7 @@
"type": "long" "type": "long"
}, },
"metadata": { "metadata": {
"type": "text" "type": "object"
}, },
"contenido": { "contenido": {
"type": "text" "type": "text"

View File

@ -16,20 +16,20 @@
<a th:href="@{/}"> <a th:href="@{/}">
<h1 class="text-primary">Arjion</h1> <h1 class="text-primary">Arjion</h1>
</a> </a>
<h3 class="text-warning">[[${detailVO.documento.nombre}]]</h3> <h3 class="text-warning">[[${detailVO.archivo.nombre}]]</h3>
</header> </header>
<section class="col-md-12"> <section class="col-md-12">
<hr> <hr>
</section> </section>
<section> <section>
<span class="col-md-1 text-muted">Tamaño</span> <span class="col-md-1 text-muted">Tamaño</span>
<span class="col-md-11 text-muted">[[${detailVO.documento.tamano}]] bytes</span> <span class="col-md-11 text-muted">[[${detailVO.archivo.tamano}]] bytes</span>
<span class="col-md-1 text-muted">Lenguaje</span> <span class="col-md-1 text-muted">Lenguaje</span>
<span class="col-md-11 text-muted">[[${detailVO.documento.lenguaje}]]</span> <span class="col-md-11 text-muted">[[${detailVO.archivo.lenguaje}]]</span>
<span class="col-md-1 text-success">Metadatos</span> <span class="col-md-1 text-success">Metadatos</span>
<span class="col-md-11 text-success">[[${detailVO.documento.metadata}]]</span> <span class="col-md-11 text-success"><ul><li th:each="meta : ${detailVO.archivo.metadata}"><span th:text="${meta.key}"></span>: <span th:text="${meta.value}"></span></li></ul></span>
<span class="col-md-1 text-warning">Contenido</span> <span class="col-md-1 text-warning">Contenido</span>
<pre class="col-md-11 text-warning">[[${detailVO.documento.contenido}]]</pre> <pre class="col-md-11 text-warning">[[${detailVO.archivo.contenido}]]</pre>
</div> </div>
</section> </section>
<section class="col-md-12"> <section class="col-md-12">
@ -37,7 +37,7 @@
</section> </section>
<footer class="col-md-12 text-center"> <footer class="col-md-12 text-center">
<span class="col-md-12"> <span class="col-md-12">
<button class="btn btn-primary" th:onclick="'window.location.pathname=\'' + @{/} + '\''">Volver</button> <a class="btn btn-primary" th:href="@{/}">Volver</a>
</span> </span>
<span>2018</span> <span>2018</span>
</footer> </footer>

View File

@ -49,7 +49,7 @@
<span class="col-md-1 text-muted">Lenguaje</span> <span class="col-md-1 text-muted">Lenguaje</span>
<span class="col-md-11 text-muted">[[${arc.lenguaje}]]</span> <span class="col-md-11 text-muted">[[${arc.lenguaje}]]</span>
<span class="col-md-1 text-success">Metadatos</span> <span class="col-md-1 text-success">Metadatos</span>
<span class="col-md-11 text-success">[[${arc.metadata}]]</span> <span class="col-md-11 text-success"><ul><li th:each="meta : ${arc.metadata}"><span th:text="${meta.key}"></span>: <span th:text="${meta.value}"></span></li></ul></span>
<span class="col-md-1 text-warning">Contenido</span> <span class="col-md-1 text-warning">Contenido</span>
<pre class="col-md-11 text-warning">[[${arc.contenido}]]</pre> <pre class="col-md-11 text-warning">[[${arc.contenido}]]</pre>
<span class="col-md-12"> <span class="col-md-12">