Compare commits
14 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
bcacdd1cef | ||
|
aa8af0bfd4 | ||
|
3f5718522f | ||
|
72c57f5d31 | ||
|
c7af93d069 | ||
|
01f386732d | ||
|
d99f758e41 | ||
|
0d837b5a42 | ||
|
81bb011a34 | ||
|
2597cbeee5 | ||
|
d4f77698c7 | ||
|
93f951ca22 | ||
|
236e3b4047 | ||
|
bb3ccad495 |
15
Dockerfile
Normal file
15
Dockerfile
Normal file
@ -0,0 +1,15 @@
|
||||
FROM debian:stable-slim
|
||||
RUN apt-get update
|
||||
RUN apt-get -y upgrade
|
||||
RUN mkdir /upload /usr/share/man/man1 /usr/share/man/man8
|
||||
RUN apt-get -y install --no-install-recommends apt apt-transport-https apt-utils readline-common curl gnupg software-properties-common dirmngr openjdk-8-jdk procps
|
||||
RUN echo "deb https://artifacts.elastic.co/packages/6.x/apt stable main" > /etc/apt/sources.list.d/elastic-6.x.list
|
||||
RUN apt-key adv --recv-keys D27D666CD88E42B4
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install --no-install-recommends maven tesseract-ocr tesseract-ocr-spa elasticsearch git
|
||||
RUN sed -i "s/#cluster.name: my-application/cluster.name: elasticsearch/" /etc/elasticsearch/elasticsearch.yml
|
||||
RUN git clone https://gitlab.com/manalejandro/arjion
|
||||
RUN mvn clean install -f /arjion/pom.xml
|
||||
RUN echo "/etc/init.d/elasticsearch start && mvn spring-boot:run -f /arjion/pom.xml" > entrypoint.sh
|
||||
EXPOSE 8080:8080
|
||||
ENTRYPOINT ["bash", "entrypoint.sh"]
|
@ -4,6 +4,11 @@
|
||||
|
||||
### Proof of Concept with [SpringBoot 2.1.0](https://start.spring.io/), [ElasticSearch](https://www.elastic.co/) and [Apache Tika](https://tika.apache.org/)
|
||||
|
||||
## Docker image
|
||||
|
||||
$ docker build -t debian:arjion --rm https://gitlab.com/manalejandro/arjion/raw/master/Dockerfile
|
||||
$ docker run -ti -p 8080:8080 debian:arjion
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
27
pom.xml
27
pom.xml
@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.manalejandro</groupId>
|
||||
<artifactId>arjion</artifactId>
|
||||
<version>0.1.0-SNAPSHOT</version>
|
||||
<version>0.2.0-SNAPSHOT</version>
|
||||
<packaging>war</packaging>
|
||||
|
||||
<name>arjion</name>
|
||||
@ -52,6 +52,31 @@
|
||||
<version>1.18</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-parsers</artifactId>
|
||||
<version>1.18</version>
|
||||
</dependency>
|
||||
|
||||
<!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
|
||||
<dependency>
|
||||
<groupId>com.github.jai-imageio</groupId>
|
||||
<artifactId>jai-imageio-jpeg2000</artifactId>
|
||||
<version>1.3.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.levigo.jbig2</groupId>
|
||||
<artifactId>levigo-jbig2-imageio</artifactId>
|
||||
<version>2.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.github.jai-imageio</groupId>
|
||||
<artifactId>jai-imageio-core</artifactId>
|
||||
<version>1.4.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.webjars</groupId>
|
||||
<artifactId>bootstrap</artifactId>
|
||||
|
@ -1,12 +1,23 @@
|
||||
package com.manalejandro.arjion.controllers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.text.Normalizer;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.manalejandro.arjion.model.Archivo;
|
||||
import com.manalejandro.arjion.model.Documento;
|
||||
import com.manalejandro.arjion.services.MainService;
|
||||
import com.manalejandro.arjion.vo.DetailVO;
|
||||
import com.manalejandro.arjion.vo.DocumentoVO;
|
||||
|
||||
import org.apache.tika.config.TikaConfig;
|
||||
@ -16,8 +27,15 @@ import org.apache.tika.language.LanguageIdentifier;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.parser.AutoDetectParser;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.parser.ocr.TesseractOCRConfig;
|
||||
import org.apache.tika.parser.pdf.PDFParserConfig;
|
||||
import org.apache.tika.sax.BodyContentHandler;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.io.ByteArrayResource;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
@ -31,12 +49,27 @@ import org.xml.sax.SAXException;
|
||||
@Controller
|
||||
public class MainController {
|
||||
|
||||
private final MainService mainService;
|
||||
|
||||
@Value("${arjion.uploadpath}")
|
||||
private String uploadpath;
|
||||
|
||||
@Value("${arjion.tesseractpath}")
|
||||
private String tesseractpath;
|
||||
|
||||
@Value("${arjion.tesseractdatapath}")
|
||||
private String tesseractdatapath;
|
||||
|
||||
@Autowired
|
||||
public MainController(MainService mainService) {
|
||||
this.mainService = mainService;
|
||||
}
|
||||
|
||||
@RequestMapping(path = "/")
|
||||
public String indexPage(final Model model) {
|
||||
DocumentoVO documentoVO = new DocumentoVO();
|
||||
documentoVO.setCount(mainService.count());
|
||||
documentoVO.setDocumentos(mainService.findAllDocumento());
|
||||
model.addAttribute("documentoVO", documentoVO);
|
||||
return "index";
|
||||
}
|
||||
@ -50,22 +83,83 @@ public class MainController {
|
||||
public String uploadPage(final Model model, @RequestParam("archivos") MultipartFile[] archivos)
|
||||
throws IOException, TikaException, SAXException {
|
||||
DocumentoVO documentoVO = new DocumentoVO();
|
||||
documentoVO.setArchivos(new ArrayList<Archivo>());
|
||||
TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
|
||||
for (int i = 0; i < archivos.length; i++) {
|
||||
byte[] bytes = archivos[i].getBytes();
|
||||
Path path = Paths.get(uploadpath + archivos[i].getOriginalFilename());
|
||||
Files.write(path, bytes);
|
||||
Metadata metadata = new Metadata();
|
||||
AutoDetectParser parser = new AutoDetectParser(tikaConfig);
|
||||
ContentHandler handler = new BodyContentHandler(-1);
|
||||
TikaInputStream stream = TikaInputStream.get(bytes);
|
||||
parser.parse(stream, handler, metadata, new ParseContext());
|
||||
LanguageIdentifier identifier = new LanguageIdentifier(handler.toString());
|
||||
documentoVO.getArchivos().add(new Archivo(archivos[i].getOriginalFilename(), String.valueOf(archivos[i].getSize()),
|
||||
metadata.toString(), handler.toString(), identifier.getLanguage()));
|
||||
documentoVO.setCount(mainService.count());
|
||||
documentoVO.setDocumentos(mainService.findAllDocumento());
|
||||
if (archivos.length > 0) {
|
||||
// Recupera la configuración de Tika
|
||||
TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
|
||||
// Itera los archivos recibidos
|
||||
for (int i = 0; i < archivos.length; i++) {
|
||||
byte[] bytes = archivos[i].getBytes();
|
||||
// Normaliza el título de los archivos
|
||||
String normalized = Normalizer.normalize(archivos[i].getOriginalFilename(), Normalizer.Form.NFD),
|
||||
filename = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
|
||||
Path path = Paths.get(uploadpath + filename);
|
||||
// Instancias necesarias
|
||||
Metadata metadata = new Metadata();
|
||||
Parser parser = new AutoDetectParser(tikaConfig);
|
||||
PDFParserConfig pdfConfig = new PDFParserConfig();
|
||||
TesseractOCRConfig tesseractConfig = new TesseractOCRConfig();
|
||||
tesseractConfig.setTesseractPath(tesseractpath);
|
||||
tesseractConfig.setTessdataPath(tesseractdatapath);
|
||||
tesseractConfig.setLanguage("spa+eng");
|
||||
pdfConfig.setExtractInlineImages(true);
|
||||
ParseContext parseContext = new ParseContext();
|
||||
parseContext.set(TesseractOCRConfig.class, tesseractConfig);
|
||||
parseContext.set(PDFParserConfig.class, pdfConfig);
|
||||
// Usa -1 para no tener límite de 100000 chars
|
||||
ContentHandler handler = new BodyContentHandler(-1);
|
||||
// Castea los bytes al Stream de Tika
|
||||
TikaInputStream stream = TikaInputStream.get(bytes);
|
||||
// Parsea el contenido
|
||||
parser.parse(stream, handler, metadata, parseContext);
|
||||
// Identifica el idioma del archivo
|
||||
LanguageIdentifier identifier = new LanguageIdentifier(handler.toString());
|
||||
// Almacena en elasticsearch
|
||||
String[] names = metadata.names();
|
||||
Map<String, String> meta = new HashMap<String, String>();
|
||||
for (int j = 0; j < names.length; j++) {
|
||||
meta.put(names[j], metadata.get(names[j]));
|
||||
}
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
if (!mainService.save(new Documento(filename, Long.valueOf(archivos[i].getSize()).intValue(),
|
||||
mapper.valueToTree(meta), handler.toString(), identifier.getLanguage()))) {
|
||||
return "exists";
|
||||
} else {
|
||||
// Guarda el archivo en el directorio configurado en las properties
|
||||
Files.write(path, bytes);
|
||||
}
|
||||
// Añade los parámetros al VO para mostrar en la vista
|
||||
documentoVO.getArchivos().add(new Archivo(filename, Long.valueOf(archivos[i].getSize()).intValue(), meta,
|
||||
handler.toString(), identifier.getLanguage()));
|
||||
}
|
||||
}
|
||||
model.addAttribute("documentoVO", documentoVO);
|
||||
return "index";
|
||||
}
|
||||
}
|
||||
|
||||
@GetMapping(path = "/detail")
|
||||
public String detail(final Model model, @RequestParam(value = "nombre", required = true) String nombre) {
|
||||
DetailVO detailVO = new DetailVO();
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
Documento doc = mainService.findOne(nombre);
|
||||
detailVO.setArchivo(new Archivo(doc.getNombre(), doc.getTamano(),
|
||||
mapper.convertValue(doc.getMetadata(), Map.class), doc.getContenido(), doc.getLenguaje()));
|
||||
model.addAttribute("detailVO", detailVO);
|
||||
return "detail";
|
||||
}
|
||||
|
||||
@GetMapping(path = "/download")
|
||||
public ResponseEntity<ByteArrayResource> download(final HttpServletResponse response,
|
||||
@RequestParam(value = "filename", required = true) String filename)
|
||||
throws IOException, MalformedURLException {
|
||||
File file = new File(uploadpath + filename);
|
||||
Path path = Paths.get(file.getAbsolutePath());
|
||||
ByteArrayResource resource = new ByteArrayResource(Files.readAllBytes(path));
|
||||
String type = file.toURL().openConnection().guessContentTypeFromName(filename);
|
||||
HttpHeaders responseHeaders = new HttpHeaders();
|
||||
responseHeaders.add("Content-Disposition", "attachment; filename=" + URLEncoder.encode(filename, "UTF-8"));
|
||||
responseHeaders.add("Content-Type", type);
|
||||
return ResponseEntity.ok().contentLength(file.length()).headers(responseHeaders).body(resource);
|
||||
}
|
||||
}
|
@ -1,14 +1,16 @@
|
||||
package com.manalejandro.arjion.model;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class Archivo {
|
||||
|
||||
private String nombre;
|
||||
private String tamano;
|
||||
private String metadata;
|
||||
private Integer tamano;
|
||||
private Map metadata;
|
||||
private String contenido;
|
||||
private String lenguaje;
|
||||
|
||||
public Archivo(String nombre, String tamano, String metadata, String contenido, String lenguaje) {
|
||||
public Archivo(String nombre, Integer tamano, Map metadata, String contenido, String lenguaje) {
|
||||
this.nombre = nombre;
|
||||
this.tamano = tamano;
|
||||
this.metadata = metadata;
|
||||
@ -26,14 +28,14 @@ public class Archivo {
|
||||
/**
|
||||
* @return the tamano
|
||||
*/
|
||||
public String getTamano() {
|
||||
public Integer getTamano() {
|
||||
return tamano;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the metadata
|
||||
*/
|
||||
public String getMetadata() {
|
||||
public Map getMetadata() {
|
||||
return metadata;
|
||||
}
|
||||
|
||||
@ -54,14 +56,14 @@ public class Archivo {
|
||||
/**
|
||||
* @param tamano the tamano to set
|
||||
*/
|
||||
public void setTamano(String tamano) {
|
||||
public void setTamano(Integer tamano) {
|
||||
this.tamano = tamano;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param metadata the metadata to set
|
||||
*/
|
||||
public void setMetadata(String metadata) {
|
||||
public void setMetadata(Map metadata) {
|
||||
this.metadata = metadata;
|
||||
}
|
||||
|
||||
|
52
src/main/java/com/manalejandro/arjion/model/Consulta.java
Normal file
52
src/main/java/com/manalejandro/arjion/model/Consulta.java
Normal file
@ -0,0 +1,52 @@
|
||||
package com.manalejandro.arjion.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class Consulta {
|
||||
private List<Documento> documentos = new ArrayList<Documento>();
|
||||
private String suggest;
|
||||
private List<String> autocomplete = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* @return the documentos
|
||||
*/
|
||||
public List<Documento> getDocumentos() {
|
||||
return documentos;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the suggest
|
||||
*/
|
||||
public String getSuggest() {
|
||||
return suggest;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the autocomplete
|
||||
*/
|
||||
public List<String> getAutocomplete() {
|
||||
return autocomplete;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param documentos the documentos to set
|
||||
*/
|
||||
public void setDocumentos(List<Documento> documentos) {
|
||||
this.documentos = documentos;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param suggest the suggest to set
|
||||
*/
|
||||
public void setSuggest(String suggest) {
|
||||
this.suggest = suggest;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param autocomplete the autocomplete to set
|
||||
*/
|
||||
public void setAutocomplete(List<String> autocomplete) {
|
||||
this.autocomplete = autocomplete;
|
||||
}
|
||||
}
|
@ -1,32 +1,108 @@
|
||||
package com.manalejandro.arjion.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
|
||||
import org.springframework.data.annotation.Id;
|
||||
import org.springframework.data.elasticsearch.annotations.Document;
|
||||
import org.springframework.data.elasticsearch.annotations.Mapping;
|
||||
import org.springframework.data.elasticsearch.annotations.Setting;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
@Document(indexName = "#{@indexName}", type = "#{@documentType}")
|
||||
@Setting(settingPath = "/elasticsearch/settings.json")
|
||||
@Mapping(mappingPath = "/elasticsearch/mapping.json")
|
||||
public class Documento {
|
||||
@Id
|
||||
public Integer id;
|
||||
@Id
|
||||
public String nombre;
|
||||
public Integer tamano;
|
||||
public JsonNode metadata;
|
||||
public String contenido;
|
||||
public String lenguaje;
|
||||
|
||||
@JsonCreator
|
||||
public Documento(@JsonProperty("id") Integer id) {
|
||||
super();
|
||||
this.id = id;
|
||||
}
|
||||
@JsonCreator
|
||||
public Documento(@JsonProperty("nombre") String nombre, @JsonProperty("tamano") Integer tamano,
|
||||
@JsonProperty("metadata") JsonNode metadata, @JsonProperty("contenido") String contenido,
|
||||
@JsonProperty("lenguaje") String lenguaje) {
|
||||
this.nombre = nombre;
|
||||
this.tamano = tamano;
|
||||
this.metadata = metadata;
|
||||
this.contenido = contenido;
|
||||
this.lenguaje = lenguaje;
|
||||
}
|
||||
|
||||
@JsonProperty("id")
|
||||
public Integer getId() {
|
||||
return id;
|
||||
}
|
||||
/**
|
||||
* @return the nombre
|
||||
*/
|
||||
@JsonProperty("nombre")
|
||||
public String getNombre() {
|
||||
return nombre;
|
||||
}
|
||||
|
||||
public void setId(Integer id) {
|
||||
this.id = id;
|
||||
}
|
||||
/**
|
||||
* @param nombre the nombre to set
|
||||
*/
|
||||
public void setNombre(String nombre) {
|
||||
this.nombre = nombre;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the tamano
|
||||
*/
|
||||
@JsonProperty("tamano")
|
||||
public Integer getTamano() {
|
||||
return tamano;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param tamano the tamano to set
|
||||
*/
|
||||
public void setTamano(Integer tamano) {
|
||||
this.tamano = tamano;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the metadata
|
||||
*/
|
||||
@JsonProperty("metadata")
|
||||
public JsonNode getMetadata() {
|
||||
return metadata;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param metadata the metadata to set
|
||||
*/
|
||||
public void setMetadata(JsonNode metadata) {
|
||||
this.metadata = metadata;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the contenido
|
||||
*/
|
||||
@JsonProperty("contenido")
|
||||
public String getContenido() {
|
||||
return contenido;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param contenido the contenido to set
|
||||
*/
|
||||
public void setContenido(String contenido) {
|
||||
this.contenido = contenido;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the lenguaje
|
||||
*/
|
||||
@JsonProperty("lenguaje")
|
||||
public String getLenguaje() {
|
||||
return lenguaje;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param lenguaje the lenguaje to set
|
||||
*/
|
||||
public void setLenguaje(String lenguaje) {
|
||||
this.lenguaje = lenguaje;
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,23 @@
|
||||
package com.manalejandro.arjion.services;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
import java.util.List;
|
||||
|
||||
import com.manalejandro.arjion.model.Consulta;
|
||||
import com.manalejandro.arjion.model.Documento;
|
||||
|
||||
import org.springframework.data.domain.Pageable;
|
||||
|
||||
@Service
|
||||
public interface MainService {
|
||||
|
||||
public boolean save(Documento doc);
|
||||
|
||||
public long count();
|
||||
|
||||
public List<Documento> findAllDocumento();
|
||||
|
||||
public Documento findOne(String nombre);
|
||||
|
||||
public Integer maxTamano();
|
||||
|
||||
public Consulta search(String busqueda, String[] tipo, Integer tamano, Pageable pageable);
|
||||
}
|
||||
|
@ -1,5 +1,127 @@
|
||||
package com.manalejandro.arjion.services;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.manalejandro.arjion.model.Consulta;
|
||||
import com.manalejandro.arjion.model.Documento;
|
||||
import com.manalejandro.arjion.repositories.MainRepository;
|
||||
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.index.query.BoolQueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilders;
|
||||
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
|
||||
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
|
||||
import org.elasticsearch.search.suggest.SuggestBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestBuilders;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
public class MainServiceImpl implements MainService {
|
||||
|
||||
private final ApplicationContext appContext;
|
||||
private final MainRepository mainRepository;
|
||||
|
||||
@Value("#{@indexName}")
|
||||
private String index;
|
||||
@Value("#{@documentType}")
|
||||
private String document;
|
||||
|
||||
@Autowired
|
||||
public MainServiceImpl(MainRepository mainRepository, ApplicationContext appContext) {
|
||||
this.mainRepository = mainRepository;
|
||||
this.appContext = appContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean save(Documento doc) {
|
||||
if (!mainRepository.existsById(doc.nombre)) {
|
||||
if (mainRepository.save(doc) != null)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long count() {
|
||||
return mainRepository.count();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Documento> findAllDocumento() {
|
||||
List<Documento> docs = new ArrayList<Documento>();
|
||||
mainRepository.findAll().forEach(doc -> {
|
||||
docs.add((Documento) doc);
|
||||
});
|
||||
return docs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Documento findOne(String nombre) {
|
||||
return mainRepository.findById(nombre).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer maxTamano() {
|
||||
return mainRepository.findAll(new Sort(Sort.Direction.DESC, "tamano")).iterator().next().getTamano();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Consulta search(String busqueda, String[] tipo, Integer tamano, Pageable pageable) {
|
||||
Client client = (Client) appContext.getBean("client");
|
||||
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
|
||||
if (busqueda != null && !"null".equals(busqueda) && !busqueda.isEmpty()) {
|
||||
boolQueryBuilder.must(QueryBuilders.matchQuery("nombre", busqueda));
|
||||
boolQueryBuilder.should(QueryBuilders.matchQuery("contenido", busqueda));
|
||||
}
|
||||
if (tipo != null && tipo.length > 0)
|
||||
boolQueryBuilder.filter(QueryBuilders.termsQuery("tipo", tipo));
|
||||
if (tamano != null && tamano >= 0)
|
||||
boolQueryBuilder.must(QueryBuilders.rangeQuery("tamano").to(tamano).includeUpper(true));
|
||||
AggregationBuilder aggregation = AggregationBuilders.terms("by_xarchivo").field("x_archivo").size(10000);
|
||||
SuggestBuilder suggest = new SuggestBuilder()
|
||||
.addSuggestion("suggest", SuggestBuilders.completionSuggestion("nombre").text(busqueda).size(10))
|
||||
.addSuggestion("phrase", SuggestBuilders.phraseSuggestion("nombre").text(busqueda).size(1)
|
||||
.realWordErrorLikelihood((float) 0.95).maxErrors((float) 0.5).gramSize(2));
|
||||
System.out.println(boolQueryBuilder);
|
||||
SearchResponse response = client.prepareSearch(index).setQuery(boolQueryBuilder).addAggregation(aggregation)
|
||||
.suggest(suggest).setSize(pageable.getPageSize()).setFrom(pageable.getPageNumber()).execute()
|
||||
.actionGet();
|
||||
Consulta consulta = new Consulta();
|
||||
consulta.setSuggest(response.getSuggest().getSuggestion("phrase").getEntries().get(0).getOptions().size() > 0
|
||||
? response.getSuggest().getSuggestion("phrase").getEntries().get(0).getOptions().get(0).getText()
|
||||
.string()
|
||||
: "");
|
||||
for (Entry<? extends Option> entry : response.getSuggest().getSuggestion("suggest").getEntries()) {
|
||||
entry.getOptions().forEach(option -> {
|
||||
String suggestText = option.getText().string().trim(),
|
||||
autocompleteClean = busqueda.replaceAll("[^\\p{Alnum}\\p{IsAlphabetic} ]", "");
|
||||
for (String item : autocompleteClean.split(" ")) {
|
||||
if (item.length() > 0) {
|
||||
consulta.getAutocomplete().add(
|
||||
suggestText.replaceAll("(?i)((?!<)" + item + "(?![^<>]*>))", "<strong>$1</strong>"));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
try {
|
||||
consulta.setDocumentos(mapper.readValue(response.getHits().getHits().toString(), List.class));
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return consulta;
|
||||
}
|
||||
}
|
||||
|
22
src/main/java/com/manalejandro/arjion/vo/DetailVO.java
Normal file
22
src/main/java/com/manalejandro/arjion/vo/DetailVO.java
Normal file
@ -0,0 +1,22 @@
|
||||
package com.manalejandro.arjion.vo;
|
||||
|
||||
import com.manalejandro.arjion.model.Archivo;
|
||||
|
||||
public class DetailVO {
|
||||
|
||||
private Archivo archivo;
|
||||
|
||||
/**
|
||||
* @return the archivo
|
||||
*/
|
||||
public Archivo getArchivo() {
|
||||
return archivo;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param archivo the archivo to set
|
||||
*/
|
||||
public void setArchivo(Archivo archivo) {
|
||||
this.archivo = archivo;
|
||||
}
|
||||
}
|
@ -1,11 +1,16 @@
|
||||
package com.manalejandro.arjion.vo;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.manalejandro.arjion.model.Archivo;
|
||||
import com.manalejandro.arjion.model.Documento;
|
||||
|
||||
public class DocumentoVO {
|
||||
private List<Archivo> archivos;
|
||||
|
||||
private List<Archivo> archivos = new ArrayList<Archivo>();
|
||||
private long count;
|
||||
private List<Documento> documentos = new ArrayList<Documento>();
|
||||
|
||||
/**
|
||||
* @return the archivos
|
||||
@ -20,4 +25,26 @@ public class DocumentoVO {
|
||||
public void setArchivos(List<Archivo> archivos) {
|
||||
this.archivos = archivos;
|
||||
}
|
||||
|
||||
public long getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
public void setCount(long count) {
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the documentos
|
||||
*/
|
||||
public List<Documento> getDocumentos() {
|
||||
return documentos;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param documentos the documentos to set
|
||||
*/
|
||||
public void setDocumentos(List<Documento> documentos) {
|
||||
this.documentos = documentos;
|
||||
}
|
||||
}
|
||||
|
@ -7,10 +7,12 @@ elasticsearch.nodename=arjion
|
||||
arjion.indexName=documentos
|
||||
arjion.documentType=documento
|
||||
arjion.uploadpath=/upload/
|
||||
arjion.tesseractpath=/usr/bin
|
||||
arjion.tesseractdatapath=/usr/share/tesseract-ocr
|
||||
spring.main.allow-bean-definition-overriding=true
|
||||
spring.thymeleaf.enabled=true
|
||||
spring.thymeleaf.prefix=classpath:/templates/
|
||||
spring.thymeleaf.suffix=.html
|
||||
spring.thymeleaf.cache=false
|
||||
spring.servlet.multipart.max-file-size=10MB
|
||||
spring.servlet.multipart.max-request-size=20MB
|
||||
spring.servlet.multipart.max-file-size=20MB
|
||||
spring.servlet.multipart.max-request-size=100MB
|
@ -1,16 +1,38 @@
|
||||
{
|
||||
"documento": {
|
||||
"properties": {
|
||||
"@timestamp": {
|
||||
"type": "date",
|
||||
"format": "strict_date_optional_time||epoch_millis"
|
||||
},
|
||||
"@version": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"id": {
|
||||
"type": "long"
|
||||
}
|
||||
}
|
||||
}
|
||||
"documento": {
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"metadata_as_keywords": {
|
||||
"path_match": "metadata.*",
|
||||
"mapping": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"@timestamp": {
|
||||
"type": "date",
|
||||
"format": "strict_date_optional_time||epoch_millis"
|
||||
},
|
||||
"@version": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"nombre": {
|
||||
"type": "text"
|
||||
},
|
||||
"tamano": {
|
||||
"type": "long"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object"
|
||||
},
|
||||
"contenido": {
|
||||
"type": "text"
|
||||
},
|
||||
"lenguaje": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
hr {
|
||||
width: 100%;
|
||||
text-align: center;
|
||||
}
|
46
src/main/resources/templates/detail.html
Normal file
46
src/main/resources/templates/detail.html
Normal file
@ -0,0 +1,46 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Arjion</title>
|
||||
<link rel="stylesheet" th:href="@{/webjars/bootstrap/3.3.7-1/css/bootstrap.min.css}">
|
||||
<link rel="stylesheet" th:href="@{/css/main.css}">
|
||||
<script th:src="@{/webjars/jquery/3.1.1-1/jquery.min.js}"></script>
|
||||
<script th:src="@{/webjars/bootstrap/3.3.7-1/js/bootstrap.min.js}"></script>
|
||||
<script th:src="@{/js/main.js}"></script>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<header class="text-center">
|
||||
<a th:href="@{/}">
|
||||
<h1 class="text-primary">Arjion</h1>
|
||||
</a>
|
||||
<h3 class="text-warning">[[${detailVO.archivo.nombre}]]</h3>
|
||||
</header>
|
||||
<section class="col-md-12">
|
||||
<hr>
|
||||
</section>
|
||||
<section>
|
||||
<span class="col-md-1 text-muted">Tamaño</span>
|
||||
<span class="col-md-11 text-muted">[[${detailVO.archivo.tamano}]] bytes</span>
|
||||
<span class="col-md-1 text-muted">Lenguaje</span>
|
||||
<span class="col-md-11 text-muted">[[${detailVO.archivo.lenguaje}]]</span>
|
||||
<span class="col-md-1 text-success">Metadatos</span>
|
||||
<span class="col-md-11 text-success"><ul><li th:each="meta : ${detailVO.archivo.metadata}"><span th:text="${meta.key}"></span>: <span th:text="${meta.value}"></span></li></ul></span>
|
||||
<span class="col-md-1 text-warning">Contenido</span>
|
||||
<pre class="col-md-11 text-warning">[[${detailVO.archivo.contenido}]]</pre>
|
||||
</div>
|
||||
</section>
|
||||
<section class="col-md-12">
|
||||
<hr>
|
||||
</section>
|
||||
<footer class="col-md-12 text-center">
|
||||
<span class="col-md-12">
|
||||
<a class="btn btn-primary" th:href="@{/}">Volver</a>
|
||||
</span>
|
||||
<span>2018</span>
|
||||
</footer>
|
||||
</body>
|
||||
|
||||
</html>
|
18
src/main/resources/templates/exists.html
Normal file
18
src/main/resources/templates/exists.html
Normal file
@ -0,0 +1,18 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Error</title>
|
||||
<link rel="stylesheet" th:href="@{/webjars/bootstrap/3.3.7-1/css/bootstrap.min.css}">
|
||||
<script th:src="@{/webjars/jquery/3.1.1-1/jquery.min.js}"></script>
|
||||
<script th:src="@{/webjars/bootstrap/3.3.7-1/js/bootstrap.min.js}"></script>
|
||||
</head>
|
||||
|
||||
<body class="text-center text-danger">
|
||||
<h2>Error</h2>
|
||||
<h4>El archivo ya existe o hubo un error</h4>
|
||||
<button class="btn btn-primary" th:onclick="'window.location.pathname=\'' + @{/} + '\''">Volver</button>
|
||||
</body>
|
||||
|
||||
</html>
|
@ -2,57 +2,83 @@
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Arjion</title>
|
||||
<link rel="stylesheet" th:href="@{/webjars/bootstrap/3.3.7-1/css/bootstrap.min.css}">
|
||||
<link rel="stylesheet" th:href="@{/css/main.css}">
|
||||
<script th:src="@{/webjars/jquery/3.1.1-1/jquery.min.js}"></script>
|
||||
<script th:src="@{/webjars/bootstrap/3.3.7-1/js/bootstrap.min.js}"></script>
|
||||
<script th:src="@{/js/main.js}"></script>
|
||||
<meta charset="UTF-8">
|
||||
<title>Arjion</title>
|
||||
<link rel="stylesheet" th:href="@{/webjars/bootstrap/3.3.7-1/css/bootstrap.min.css}">
|
||||
<link rel="stylesheet" th:href="@{/css/main.css}">
|
||||
<script th:src="@{/webjars/jquery/3.1.1-1/jquery.min.js}"></script>
|
||||
<script th:src="@{/webjars/bootstrap/3.3.7-1/js/bootstrap.min.js}"></script>
|
||||
<script th:src="@{/js/main.js}"></script>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<header class="text-center">
|
||||
<a th:href="@{/}"><h1 class="text-primary">Arjion</h1></a>
|
||||
</header>
|
||||
<section class="text-center col-md-4 col-md-offset-4">
|
||||
<form class="form-horizontal form-label-left" method="post" enctype="multipart/form-data" novalidate="novalidate" th:action="@{/upload}"
|
||||
th:object="${documentForm}">
|
||||
<div class="input-group">
|
||||
<span class="input-group-addon">
|
||||
<span class="glyphicon glyphicon-file"></span>
|
||||
</span>
|
||||
<label class="input-group-addon custom-file">
|
||||
<input type="file" name="archivos" class="custom-file-input" multiple="multiple" />
|
||||
<span class="custom-file-control"></span>
|
||||
</label>
|
||||
<span class="input-group-addon">
|
||||
<button class="btn btn-primary" type="submit">
|
||||
<span class="glyphicon glyphicon-level-up"></span>
|
||||
</button>
|
||||
</span>
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
<hr class="col-md-12">
|
||||
<section>
|
||||
<div th:each="doc : ${documentoVO.archivos}" class="col-md-12">
|
||||
<span class="col-md-1 text-primary lead">Nombre</span>
|
||||
<span class="col-md-11 text-primary lead">[[${doc.nombre}]]</span>
|
||||
<span class="col-md-1 text-muted">Tamaño</span>
|
||||
<span class="col-md-11 text-muted">[[${doc.tamano}]] bytes</span>
|
||||
<span class="col-md-1 text-muted">Lenguaje</span>
|
||||
<span class="col-md-11 text-muted">[[${doc.lenguaje}]]</span>
|
||||
<span class="col-md-1 text-success">Metadatos</span>
|
||||
<span class="col-md-11 text-success">[[${doc.metadata}]]</span>
|
||||
<span class="col-md-1 text-warning">Contenido</span>
|
||||
<pre class="col-md-11 text-warning">[[${doc.contenido}]]</pre>
|
||||
<hr class="col-md-12">
|
||||
</div>
|
||||
</section>
|
||||
<footer class="col-md-12 text-center">
|
||||
<span>2018</span>
|
||||
</footer>
|
||||
<header class="text-center">
|
||||
<a th:href="@{/}">
|
||||
<h1 class="text-primary">Arjion</h1>
|
||||
</a>
|
||||
<h3 class="text-warning">[[${documentoVO.count}]] archivos</h3>
|
||||
</header>
|
||||
<section class="text-center col-md-4 col-md-offset-4">
|
||||
<form class="form-horizontal form-label-left" method="post" enctype="multipart/form-data" novalidate="novalidate" th:action="@{/upload}"
|
||||
th:object="${documentForm}">
|
||||
<div class="input-group">
|
||||
<span class="input-group-addon">
|
||||
<span class="glyphicon glyphicon-file"></span>
|
||||
</span>
|
||||
<label class="input-group-addon custom-file">
|
||||
<input type="file" name="archivos" class="custom-file-input" multiple="multiple" />
|
||||
<span class="custom-file-control"></span>
|
||||
</label>
|
||||
<span class="input-group-addon">
|
||||
<button class="btn btn-primary" type="submit">
|
||||
<span class="glyphicon glyphicon-level-up"></span>
|
||||
</button>
|
||||
</span>
|
||||
</div>
|
||||
</form>
|
||||
</section>
|
||||
<section class="col-md-12">
|
||||
<hr>
|
||||
</section>
|
||||
<section class="col-md-12">
|
||||
<div th:each="arc : ${documentoVO.archivos}" class="col-md-12">
|
||||
<span class="col-md-1 text-primary lead">Nombre</span>
|
||||
<span class="col-md-11 text-primary lead">[[${arc.nombre}]]</span>
|
||||
<span class="col-md-1 text-muted">Tamaño</span>
|
||||
<span class="col-md-11 text-muted">[[${arc.tamano}]] bytes</span>
|
||||
<span class="col-md-1 text-muted">Lenguaje</span>
|
||||
<span class="col-md-11 text-muted">[[${arc.lenguaje}]]</span>
|
||||
<span class="col-md-1 text-success">Metadatos</span>
|
||||
<span class="col-md-11 text-success"><ul><li th:each="meta : ${arc.metadata}"><span th:text="${meta.key}"></span>: <span th:text="${meta.value}"></span></li></ul></span>
|
||||
<span class="col-md-1 text-warning">Contenido</span>
|
||||
<pre class="col-md-11 text-warning">[[${arc.contenido}]]</pre>
|
||||
<span class="col-md-12">
|
||||
<hr>
|
||||
</span>
|
||||
</div>
|
||||
</section>
|
||||
<section class="col-md-12">
|
||||
<hr>
|
||||
</section>
|
||||
<section class="col-md-12 text-center">
|
||||
<div th:each="doc : ${documentoVO.documentos}">
|
||||
<div>
|
||||
<span class="col-md-12 text-primary">
|
||||
<a th:href="@{'/detail'(nombre=${doc.nombre})}" class="lead">[[${doc.nombre}]]</a> -
|
||||
<a th:href="@{'/download'(filename=${doc.nombre})}">download</a>
|
||||
<br>[[${doc.tamano}]] bytes
|
||||
<br>[[${doc.lenguaje}]]</span>
|
||||
<span class="col-md-12 text-success">[[${#strings.abbreviate(doc.metadata,200)}]]</span>
|
||||
<span class="col-md-12 text-warning">[[${#strings.abbreviate(doc.contenido,200)}]]</span>
|
||||
</div>
|
||||
<span class="col-md-12">
|
||||
<hr>
|
||||
</span>
|
||||
</div>
|
||||
</section>
|
||||
<footer class="col-md-12 text-center">
|
||||
<span>2018</span>
|
||||
</footer>
|
||||
</body>
|
||||
|
||||
</html>
|
Loading…
x
Reference in New Issue
Block a user