initial commit

Este commit está contenido en:
ale
2021-12-21 01:46:14 +01:00
commit 7573449466
Se han modificado 36 ficheros con 1817 adiciones y 0 borrados

34
.gitignore vendido Archivo normal
Ver fichero

@@ -0,0 +1,34 @@
HELP.md
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
### STS ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
esdata/

117
.mvn/wrapper/MavenWrapperDownloader.java vendido Archivo normal
Ver fichero

@@ -0,0 +1,117 @@
/*
* Copyright 2007-present the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.net.*;
import java.io.*;
import java.nio.channels.*;
import java.util.Properties;
public class MavenWrapperDownloader {
private static final String WRAPPER_VERSION = "0.5.6";
/**
* Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided.
*/
private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/"
+ WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar";
/**
* Path to the maven-wrapper.properties file, which might contain a downloadUrl property to
* use instead of the default one.
*/
private static final String MAVEN_WRAPPER_PROPERTIES_PATH =
".mvn/wrapper/maven-wrapper.properties";
/**
* Path where the maven-wrapper.jar will be saved to.
*/
private static final String MAVEN_WRAPPER_JAR_PATH =
".mvn/wrapper/maven-wrapper.jar";
/**
* Name of the property which should be used to override the default download url for the wrapper.
*/
private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl";
public static void main(String args[]) {
System.out.println("- Downloader started");
File baseDirectory = new File(args[0]);
System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath());
// If the maven-wrapper.properties exists, read it and check if it contains a custom
// wrapperUrl parameter.
File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH);
String url = DEFAULT_DOWNLOAD_URL;
if(mavenWrapperPropertyFile.exists()) {
FileInputStream mavenWrapperPropertyFileInputStream = null;
try {
mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile);
Properties mavenWrapperProperties = new Properties();
mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url);
} catch (IOException e) {
System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'");
} finally {
try {
if(mavenWrapperPropertyFileInputStream != null) {
mavenWrapperPropertyFileInputStream.close();
}
} catch (IOException e) {
// Ignore ...
}
}
}
System.out.println("- Downloading from: " + url);
File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH);
if(!outputFile.getParentFile().exists()) {
if(!outputFile.getParentFile().mkdirs()) {
System.out.println(
"- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'");
}
}
System.out.println("- Downloading to: " + outputFile.getAbsolutePath());
try {
downloadFileFromURL(url, outputFile);
System.out.println("Done");
System.exit(0);
} catch (Throwable e) {
System.out.println("- Error downloading");
e.printStackTrace();
System.exit(1);
}
}
private static void downloadFileFromURL(String urlString, File destination) throws Exception {
if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) {
String username = System.getenv("MVNW_USERNAME");
char[] password = System.getenv("MVNW_PASSWORD").toCharArray();
Authenticator.setDefault(new Authenticator() {
@Override
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(username, password);
}
});
}
URL website = new URL(urlString);
ReadableByteChannel rbc;
rbc = Channels.newChannel(website.openStream());
FileOutputStream fos = new FileOutputStream(destination);
fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
fos.close();
rbc.close();
}
}

BIN
.mvn/wrapper/maven-wrapper.jar vendido Archivo normal

Archivo binario no mostrado.

2
.mvn/wrapper/maven-wrapper.properties vendido Archivo normal
Ver fichero

@@ -0,0 +1,2 @@
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.3/apache-maven-3.8.3-bin.zip
wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar

9
Dockerfile Archivo normal
Ver fichero

@@ -0,0 +1,9 @@
FROM openjdk:11-jdk-bullseye
ENV JAVA_TOOL_OPTIONS -Dfile.encoding=UTF8
RUN apt update && apt -y upgrade && apt -y install tesseract-ocr tesseract-ocr-spa && apt clean
RUN addgroup --system --gid 1000 user
RUN adduser --system --uid 1000 --group user
RUN mkdir -p /upload /arjion2
RUN chown user.user -R /upload /arjion2
USER user
WORKDIR /arjion2

14
README.md Archivo normal
Ver fichero

@@ -0,0 +1,14 @@
## Arjion2
### del griego `archivo`
### Proof of Concept with [SpringBoot 2.6.1](https://start.spring.io/), [ElasticSearch](https://www.elastic.co/) and [Apache Tika](https://tika.apache.org/)
## Docker image
$ docker-compose build
$ docker-compose up -d
## License
MIT

43
docker-compose.yml Archivo normal
Ver fichero

@@ -0,0 +1,43 @@
version: "2.3"
services:
arjion2:
build: .
image: arjion2
hostname: arjion2
container_name: arjion2
restart: always
entrypoint:
- ./mvnw
- spring-boot:run
volumes:
- ./:/arjion2
ports:
- 8080:8080
networks:
arjion2:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2-amd64
hostname: elasticsearch
container_name: elasticsearch
restart: always
environment:
- node.name=arjion2
- cluster.name=elasticsearch
- discovery.type=single-node
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- ./esdata:/usr/share/elasticsearch/data
expose:
- 9200
networks:
arjion2:
networks:
arjion2:

310
mvnw vendido Archivo ejecutable
Ver fichero

@@ -0,0 +1,310 @@
#!/bin/sh
# ----------------------------------------------------------------------------
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# Maven Start Up Batch script
#
# Required ENV vars:
# ------------------
# JAVA_HOME - location of a JDK home dir
#
# Optional ENV vars
# -----------------
# M2_HOME - location of maven2's installed home dir
# MAVEN_OPTS - parameters passed to the Java VM when running Maven
# e.g. to debug Maven itself, use
# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
# MAVEN_SKIP_RC - flag to disable loading of mavenrc files
# ----------------------------------------------------------------------------
if [ -z "$MAVEN_SKIP_RC" ] ; then
if [ -f /etc/mavenrc ] ; then
. /etc/mavenrc
fi
if [ -f "$HOME/.mavenrc" ] ; then
. "$HOME/.mavenrc"
fi
fi
# OS specific support. $var _must_ be set to either true or false.
cygwin=false;
darwin=false;
mingw=false
case "`uname`" in
CYGWIN*) cygwin=true ;;
MINGW*) mingw=true;;
Darwin*) darwin=true
# Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home
# See https://developer.apple.com/library/mac/qa/qa1170/_index.html
if [ -z "$JAVA_HOME" ]; then
if [ -x "/usr/libexec/java_home" ]; then
export JAVA_HOME="`/usr/libexec/java_home`"
else
export JAVA_HOME="/Library/Java/Home"
fi
fi
;;
esac
if [ -z "$JAVA_HOME" ] ; then
if [ -r /etc/gentoo-release ] ; then
JAVA_HOME=`java-config --jre-home`
fi
fi
if [ -z "$M2_HOME" ] ; then
## resolve links - $0 may be a link to maven's home
PRG="$0"
# need this for relative symlinks
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG="`dirname "$PRG"`/$link"
fi
done
saveddir=`pwd`
M2_HOME=`dirname "$PRG"`/..
# make it fully qualified
M2_HOME=`cd "$M2_HOME" && pwd`
cd "$saveddir"
# echo Using m2 at $M2_HOME
fi
# For Cygwin, ensure paths are in UNIX format before anything is touched
if $cygwin ; then
[ -n "$M2_HOME" ] &&
M2_HOME=`cygpath --unix "$M2_HOME"`
[ -n "$JAVA_HOME" ] &&
JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
[ -n "$CLASSPATH" ] &&
CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
fi
# For Mingw, ensure paths are in UNIX format before anything is touched
if $mingw ; then
[ -n "$M2_HOME" ] &&
M2_HOME="`(cd "$M2_HOME"; pwd)`"
[ -n "$JAVA_HOME" ] &&
JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`"
fi
if [ -z "$JAVA_HOME" ]; then
javaExecutable="`which javac`"
if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then
# readlink(1) is not available as standard on Solaris 10.
readLink=`which readlink`
if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then
if $darwin ; then
javaHome="`dirname \"$javaExecutable\"`"
javaExecutable="`cd \"$javaHome\" && pwd -P`/javac"
else
javaExecutable="`readlink -f \"$javaExecutable\"`"
fi
javaHome="`dirname \"$javaExecutable\"`"
javaHome=`expr "$javaHome" : '\(.*\)/bin'`
JAVA_HOME="$javaHome"
export JAVA_HOME
fi
fi
fi
if [ -z "$JAVACMD" ] ; then
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
else
JAVACMD="`which java`"
fi
fi
if [ ! -x "$JAVACMD" ] ; then
echo "Error: JAVA_HOME is not defined correctly." >&2
echo " We cannot execute $JAVACMD" >&2
exit 1
fi
if [ -z "$JAVA_HOME" ] ; then
echo "Warning: JAVA_HOME environment variable is not set."
fi
CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher
# traverses directory structure from process work directory to filesystem root
# first directory with .mvn subdirectory is considered project base directory
find_maven_basedir() {
if [ -z "$1" ]
then
echo "Path not specified to find_maven_basedir"
return 1
fi
basedir="$1"
wdir="$1"
while [ "$wdir" != '/' ] ; do
if [ -d "$wdir"/.mvn ] ; then
basedir=$wdir
break
fi
# workaround for JBEAP-8937 (on Solaris 10/Sparc)
if [ -d "${wdir}" ]; then
wdir=`cd "$wdir/.."; pwd`
fi
# end of workaround
done
echo "${basedir}"
}
# concatenates all lines of a file
concat_lines() {
if [ -f "$1" ]; then
echo "$(tr -s '\n' ' ' < "$1")"
fi
}
BASE_DIR=`find_maven_basedir "$(pwd)"`
if [ -z "$BASE_DIR" ]; then
exit 1;
fi
##########################################################################################
# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
# This allows using the maven wrapper in projects that prohibit checking in binary data.
##########################################################################################
if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then
if [ "$MVNW_VERBOSE" = true ]; then
echo "Found .mvn/wrapper/maven-wrapper.jar"
fi
else
if [ "$MVNW_VERBOSE" = true ]; then
echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..."
fi
if [ -n "$MVNW_REPOURL" ]; then
jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
else
jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
fi
while IFS="=" read key value; do
case "$key" in (wrapperUrl) jarUrl="$value"; break ;;
esac
done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties"
if [ "$MVNW_VERBOSE" = true ]; then
echo "Downloading from: $jarUrl"
fi
wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar"
if $cygwin; then
wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"`
fi
if command -v wget > /dev/null; then
if [ "$MVNW_VERBOSE" = true ]; then
echo "Found wget ... using wget"
fi
if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
wget "$jarUrl" -O "$wrapperJarPath"
else
wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath"
fi
elif command -v curl > /dev/null; then
if [ "$MVNW_VERBOSE" = true ]; then
echo "Found curl ... using curl"
fi
if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
curl -o "$wrapperJarPath" "$jarUrl" -f
else
curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f
fi
else
if [ "$MVNW_VERBOSE" = true ]; then
echo "Falling back to using Java to download"
fi
javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java"
# For Cygwin, switch paths to Windows format before running javac
if $cygwin; then
javaClass=`cygpath --path --windows "$javaClass"`
fi
if [ -e "$javaClass" ]; then
if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
if [ "$MVNW_VERBOSE" = true ]; then
echo " - Compiling MavenWrapperDownloader.java ..."
fi
# Compiling the Java class
("$JAVA_HOME/bin/javac" "$javaClass")
fi
if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
# Running the downloader
if [ "$MVNW_VERBOSE" = true ]; then
echo " - Running MavenWrapperDownloader.java ..."
fi
("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR")
fi
fi
fi
fi
##########################################################################################
# End of extension
##########################################################################################
export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}
if [ "$MVNW_VERBOSE" = true ]; then
echo $MAVEN_PROJECTBASEDIR
fi
MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS"
# For Cygwin, switch paths to Windows format before running java
if $cygwin; then
[ -n "$M2_HOME" ] &&
M2_HOME=`cygpath --path --windows "$M2_HOME"`
[ -n "$JAVA_HOME" ] &&
JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
[ -n "$CLASSPATH" ] &&
CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
[ -n "$MAVEN_PROJECTBASEDIR" ] &&
MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"`
fi
# Provide a "standardized" way to retrieve the CLI args that will
# work with both Windows and non-Windows executions.
MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@"
export MAVEN_CMD_LINE_ARGS
WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
exec "$JAVACMD" \
$MAVEN_OPTS \
-classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \
"-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \
${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@"

182
mvnw.cmd vendido Archivo normal
Ver fichero

@@ -0,0 +1,182 @@
@REM ----------------------------------------------------------------------------
@REM Licensed to the Apache Software Foundation (ASF) under one
@REM or more contributor license agreements. See the NOTICE file
@REM distributed with this work for additional information
@REM regarding copyright ownership. The ASF licenses this file
@REM to you under the Apache License, Version 2.0 (the
@REM "License"); you may not use this file except in compliance
@REM with the License. You may obtain a copy of the License at
@REM
@REM https://www.apache.org/licenses/LICENSE-2.0
@REM
@REM Unless required by applicable law or agreed to in writing,
@REM software distributed under the License is distributed on an
@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@REM KIND, either express or implied. See the License for the
@REM specific language governing permissions and limitations
@REM under the License.
@REM ----------------------------------------------------------------------------
@REM ----------------------------------------------------------------------------
@REM Maven Start Up Batch script
@REM
@REM Required ENV vars:
@REM JAVA_HOME - location of a JDK home dir
@REM
@REM Optional ENV vars
@REM M2_HOME - location of maven2's installed home dir
@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands
@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending
@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven
@REM e.g. to debug Maven itself, use
@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files
@REM ----------------------------------------------------------------------------
@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on'
@echo off
@REM set title of command window
title %0
@REM enable echoing by setting MAVEN_BATCH_ECHO to 'on'
@if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO%
@REM set %HOME% to equivalent of $HOME
if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%")
@REM Execute a user defined script before this one
if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre
@REM check for pre script, once with legacy .bat ending and once with .cmd ending
if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat"
if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd"
:skipRcPre
@setlocal
set ERROR_CODE=0
@REM To isolate internal variables from possible post scripts, we use another setlocal
@setlocal
@REM ==== START VALIDATION ====
if not "%JAVA_HOME%" == "" goto OkJHome
echo.
echo Error: JAVA_HOME not found in your environment. >&2
echo Please set the JAVA_HOME variable in your environment to match the >&2
echo location of your Java installation. >&2
echo.
goto error
:OkJHome
if exist "%JAVA_HOME%\bin\java.exe" goto init
echo.
echo Error: JAVA_HOME is set to an invalid directory. >&2
echo JAVA_HOME = "%JAVA_HOME%" >&2
echo Please set the JAVA_HOME variable in your environment to match the >&2
echo location of your Java installation. >&2
echo.
goto error
@REM ==== END VALIDATION ====
:init
@REM Find the project base dir, i.e. the directory that contains the folder ".mvn".
@REM Fallback to current working directory if not found.
set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR%
IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir
set EXEC_DIR=%CD%
set WDIR=%EXEC_DIR%
:findBaseDir
IF EXIST "%WDIR%"\.mvn goto baseDirFound
cd ..
IF "%WDIR%"=="%CD%" goto baseDirNotFound
set WDIR=%CD%
goto findBaseDir
:baseDirFound
set MAVEN_PROJECTBASEDIR=%WDIR%
cd "%EXEC_DIR%"
goto endDetectBaseDir
:baseDirNotFound
set MAVEN_PROJECTBASEDIR=%EXEC_DIR%
cd "%EXEC_DIR%"
:endDetectBaseDir
IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig
@setlocal EnableExtensions EnableDelayedExpansion
for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a
@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS%
:endReadAdditionalConfig
SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe"
set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar"
set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO (
IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B
)
@REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
@REM This allows using the maven wrapper in projects that prohibit checking in binary data.
if exist %WRAPPER_JAR% (
if "%MVNW_VERBOSE%" == "true" (
echo Found %WRAPPER_JAR%
)
) else (
if not "%MVNW_REPOURL%" == "" (
SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
)
if "%MVNW_VERBOSE%" == "true" (
echo Couldn't find %WRAPPER_JAR%, downloading it ...
echo Downloading from: %DOWNLOAD_URL%
)
powershell -Command "&{"^
"$webclient = new-object System.Net.WebClient;"^
"if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^
"$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^
"}"^
"[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^
"}"
if "%MVNW_VERBOSE%" == "true" (
echo Finished downloading %WRAPPER_JAR%
)
)
@REM End of extension
@REM Provide a "standardized" way to retrieve the CLI args that will
@REM work with both Windows and non-Windows executions.
set MAVEN_CMD_LINE_ARGS=%*
%MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %*
if ERRORLEVEL 1 goto error
goto end
:error
set ERROR_CODE=1
:end
@endlocal & set ERROR_CODE=%ERROR_CODE%
if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost
@REM check for post script, once with legacy .bat ending and once with .cmd ending
if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat"
if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd"
:skipRcPost
@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on'
if "%MAVEN_BATCH_PAUSE%" == "on" pause
if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE%
exit /B %ERROR_CODE%

115
pom.xml Archivo normal
Ver fichero

@@ -0,0 +1,115 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.6.1</version>
<relativePath /> <!-- lookup parent from repository -->
</parent>
<groupId>com.manalejandro</groupId>
<artifactId>arjion2</artifactId>
<version>1.0.0</version>
<packaging>war</packaging>
<name>arjion2</name>
<description>Demo project for Arjion with Spring Boot</description>
<properties>
<java.version>11</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-webflux</artifactId>
</dependency>
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>tensorflow-core-platform</artifactId>
<version>0.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers-standard-package</artifactId>
<version>2.1.0</version>
<exclusions>
<exclusion>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.optimaize.languagedetector</groupId>
<artifactId>language-detector</artifactId>
<version>0.6</version>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-jpeg2000</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>com.levigo.jbig2</groupId>
<artifactId>levigo-jbig2-imageio</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-core</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>org.webjars</groupId>
<artifactId>bootstrap</artifactId>
<version>5.1.3</version>
</dependency>
<dependency>
<groupId>org.webjars</groupId>
<artifactId>jquery</artifactId>
<version>3.6.0</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-tomcat</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.projectreactor</groupId>
<artifactId>reactor-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<finalName>arjion2</finalName>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

Ver fichero

@@ -0,0 +1,38 @@
package com.manalejandro.arjion2;
import java.util.concurrent.Executor;
import org.springframework.aop.interceptor.AsyncUncaughtExceptionHandler;
import org.springframework.aop.interceptor.SimpleAsyncUncaughtExceptionHandler;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.data.elasticsearch.repository.config.EnableReactiveElasticsearchRepositories;
import org.springframework.scheduling.annotation.AsyncConfigurer;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
@SpringBootApplication
@EnableAsync
@EnableReactiveElasticsearchRepositories
public class Arjion2Application implements AsyncConfigurer {
public static void main(String[] args) {
SpringApplication.run(Arjion2Application.class, args);
}
@Override
public Executor getAsyncExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(7);
executor.setMaxPoolSize(42);
executor.setQueueCapacity(11);
executor.setThreadNamePrefix("Executor-");
executor.initialize();
return executor;
}
@Override
public AsyncUncaughtExceptionHandler getAsyncUncaughtExceptionHandler() {
return new SimpleAsyncUncaughtExceptionHandler();
}
}

Ver fichero

@@ -0,0 +1,43 @@
package com.manalejandro.arjion2;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.elasticsearch.client.ClientConfiguration;
import org.springframework.data.elasticsearch.client.reactive.ReactiveElasticsearchClient;
import org.springframework.data.elasticsearch.client.reactive.ReactiveRestClients;
import org.springframework.data.elasticsearch.config.AbstractReactiveElasticsearchConfiguration;
@Configuration
public class ESConfig extends AbstractReactiveElasticsearchConfiguration {
@Value("${elasticsearch.host}")
private String EsHost;
@Value("${elasticsearch.port}")
private int EsPort;
@Value("${arjion.indexName}")
private String indexName;
@Value("${arjion.documentType}")
private String documentType;
@Override
@Bean
public ReactiveElasticsearchClient reactiveElasticsearchClient() {
final ClientConfiguration clientConfiguration = ClientConfiguration.builder().connectedTo(EsHost + ":" + EsPort)
.build();
return ReactiveRestClients.create(clientConfiguration);
}
@Bean
public String indexName() {
return indexName;
}
@Bean
public String documentType() {
return documentType;
}
}

Ver fichero

@@ -0,0 +1,12 @@
package com.manalejandro.arjion2;
import org.springframework.boot.builder.SpringApplicationBuilder;
import org.springframework.boot.web.servlet.support.SpringBootServletInitializer;
public class ServletInitializer extends SpringBootServletInitializer {
@Override
protected SpringApplicationBuilder configure(SpringApplicationBuilder application) {
return application.sources(Arjion2Application.class);
}
}

Ver fichero

@@ -0,0 +1,20 @@
package com.manalejandro.arjion2;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.servlet.config.annotation.EnableWebMvc;
import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry;
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
@Configuration
@EnableWebMvc
public class WebConfig implements WebMvcConfigurer {
private static final String[] CLASSPATH_RESOURCE_LOCATIONS = { "classpath:/META-INF/resources/",
"classpath:/resources/", "classpath:/static/", "classpath:/public/" };
@Override
public void addResourceHandlers(ResourceHandlerRegistry registry) {
registry.addResourceHandler("/webjars/**").addResourceLocations("/webjars/");
registry.addResourceHandler("/**").addResourceLocations(CLASSPATH_RESOURCE_LOCATIONS);
}
}

Ver fichero

@@ -0,0 +1,179 @@
package com.manalejandro.arjion2.controllers;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.Normalizer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.manalejandro.arjion2.model.Archivo;
import com.manalejandro.arjion2.model.Documento;
import com.manalejandro.arjion2.services.MainService;
import com.manalejandro.arjion2.vo.DetailVO;
import com.manalejandro.arjion2.vo.DocumentoVO;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.CommonTextObjectFactories;
import com.optimaize.langdetect.text.TextObjectFactory;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.ByteArrayResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@Controller
public class MainController {
private final MainService mainService;
@Value("${arjion.uploadpath}")
private String uploadpath;
@Value("${arjion.tesseractpath}")
private String tesseractpath;
@Value("${arjion.tesseractdatapath}")
private String tesseractdatapath;
@Autowired
public MainController(MainService mainService) {
this.mainService = mainService;
}
@RequestMapping(path = "/")
public String indexPage(final Model model) {
DocumentoVO documentoVO = new DocumentoVO();
documentoVO.setCount(mainService.count());
documentoVO.setDocumentos(mainService.findAllDocumentos());
model.addAttribute("documentoVO", documentoVO);
return "index";
}
@GetMapping(path = "/upload")
public String upload() {
return "redirect:/";
}
@PostMapping(path = "/upload")
public String uploadPage(@RequestParam(value = "archivos", required = true) MultipartFile[] archivos,
final Model model) throws IOException, TikaException, SAXException {
DocumentoVO documentoVO = new DocumentoVO();
documentoVO.setCount(mainService.count());
documentoVO.setDocumentos(mainService.findAllDocumentos());
if (archivos.length > 0) {
// Recupera la configuración de Tika
TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
// Itera los archivos recibidos
for (int i = 0; i < archivos.length; i++) {
byte[] bytes = archivos[i].getBytes();
// Normaliza el título de los archivos
String normalized = Normalizer.normalize(archivos[i].getOriginalFilename(), Normalizer.Form.NFD),
filename = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
Path path = Paths.get(uploadpath + filename);
// Instancias necesarias
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser(tikaConfig);
PDFParserConfig pdfConfig = new PDFParserConfig();
TesseractOCRConfig tesseractConfig = new TesseractOCRConfig();
tesseractConfig.addOtherTesseractConfig("tesseractPath", tesseractpath);
tesseractConfig.addOtherTesseractConfig("tessdataPath", tesseractdatapath);
tesseractConfig.setLanguage("spa+eng");
pdfConfig.setExtractInlineImages(true);
ParseContext parseContext = new ParseContext();
parseContext.set(TesseractOCRConfig.class, tesseractConfig);
parseContext.set(PDFParserConfig.class, pdfConfig);
// Usa -1 para no tener límite de 100000 chars
ContentHandler handler = new BodyContentHandler(-1);
// Castea los bytes al Stream de Tika
TikaInputStream stream = TikaInputStream.get(bytes);
// Parsea el contenido
parser.parse(stream, handler, metadata, parseContext);
// Identifica el idioma del archivo
List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
LanguageDetector detector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles).build();
TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
String language = detector.detect(textObjectFactory.forText(handler.toString())).isPresent()
? detector.detect(textObjectFactory.forText(handler.toString())).get().getLanguage()
: "";
// Almacena en elasticsearch
String[] names = metadata.names();
Map<String, String> meta = new HashMap<String, String>();
for (int j = 0; j < names.length; j++) {
for (int k = 0; k < metadata.getValues(names[j]).length; k++) {
meta.put(names[j], metadata.getValues(names[j])[k]);
}
}
if (!mainService.save(new Documento(filename, Long.valueOf(archivos[i].getSize()).intValue(), meta,
handler.toString(), language))) {
return "exists";
} else {
// Guarda el archivo en el directorio configurado en las properties
Files.write(path, bytes);
}
// Añade los parámetros al VO para mostrar en la vista
documentoVO.getArchivos().add(new Archivo(filename, Long.valueOf(archivos[i].getSize()).intValue(),
meta, handler.toString(), language));
}
}
model.addAttribute("documentoVO", documentoVO);
return "index";
}
@GetMapping(path = "/detail")
public String detail(final Model model, @RequestParam(value = "nombre", required = true) String nombre)
throws IOException {
DetailVO detailVO = new DetailVO();
Documento doc = mainService.findOne(nombre);
detailVO.setArchivo(new Archivo(doc.getNombre(), doc.getTamano(), doc.getMetadata(), doc.getContenido(),
doc.getLenguaje()));
model.addAttribute("detailVO", detailVO);
return "detail";
}
@GetMapping(path = "/download")
public ResponseEntity<ByteArrayResource> download(
@RequestParam(value = "filename", required = true) String filename)
throws IOException, MalformedURLException {
File file = new File(uploadpath + filename);
Path path = Paths.get(file.getAbsolutePath());
ByteArrayResource resource = new ByteArrayResource(Files.readAllBytes(path));
file.toURI().toURL().openConnection();
String type = URLConnection.guessContentTypeFromName(filename);
HttpHeaders responseHeaders = new HttpHeaders();
responseHeaders.add("Content-Disposition", "attachment; filename=" + URLEncoder.encode(filename, "UTF-8"));
return ResponseEntity.ok().contentType(MediaType.parseMediaType(type)).contentLength(file.length())
.headers(responseHeaders).body(resource);
}
}

Ver fichero

@@ -0,0 +1,5 @@
package com.manalejandro.arjion2.forms;
public class DocumentoForm {
}

Ver fichero

@@ -0,0 +1,90 @@
package com.manalejandro.arjion2.model;
import java.util.Map;
public class Archivo {
private String nombre;
private Integer tamano;
private Map<String, String> metadata;
private String contenido;
private String lenguaje;
public Archivo(String nombre, Integer tamano, Map<String, String> meta, String contenido, String lenguaje) {
this.nombre = nombre;
this.tamano = tamano;
this.metadata = meta;
this.contenido = contenido;
this.lenguaje = lenguaje;
}
/**
* @return the nombre
*/
public String getNombre() {
return nombre;
}
/**
* @return the tamano
*/
public Integer getTamano() {
return tamano;
}
/**
* @return the metadata
*/
public Map<String, String> getMetadata() {
return metadata;
}
/**
* @return the contenido
*/
public String getContenido() {
return contenido;
}
/**
* @param nombre the nombre to set
*/
public void setNombre(String nombre) {
this.nombre = nombre;
}
/**
* @param tamano the tamano to set
*/
public void setTamano(Integer tamano) {
this.tamano = tamano;
}
/**
* @param metadata the metadata to set
*/
public void setMetadata(Map<String, String> metadata) {
this.metadata = metadata;
}
/**
* @param contenido the contenido to set
*/
public void setContenido(String contenido) {
this.contenido = contenido;
}
/**
* @return the lenguaje
*/
public String getLenguaje() {
return lenguaje;
}
/**
* @param lenguaje the lenguaje to set
*/
public void setLenguaje(String lenguaje) {
this.lenguaje = lenguaje;
}
}

Ver fichero

@@ -0,0 +1,52 @@
package com.manalejandro.arjion2.model;
import java.util.ArrayList;
import java.util.List;
public class Consulta {
private List<Documento> documentos = new ArrayList<Documento>();
private String suggest;
private List<String> autocomplete = new ArrayList<String>();
/**
* @return the documentos
*/
public List<Documento> getDocumentos() {
return documentos;
}
/**
* @return the suggest
*/
public String getSuggest() {
return suggest;
}
/**
* @return the autocomplete
*/
public List<String> getAutocomplete() {
return autocomplete;
}
/**
* @param documentos the documentos to set
*/
public void setDocumentos(List<Documento> documentos) {
this.documentos = documentos;
}
/**
* @param suggest the suggest to set
*/
public void setSuggest(String suggest) {
this.suggest = suggest;
}
/**
* @param autocomplete the autocomplete to set
*/
public void setAutocomplete(List<String> autocomplete) {
this.autocomplete = autocomplete;
}
}

Ver fichero

@@ -0,0 +1,110 @@
package com.manalejandro.arjion2.model;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Map;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Mapping;
import org.springframework.data.elasticsearch.annotations.Setting;
import org.springframework.data.elasticsearch.annotations.WriteTypeHint;
@Document(indexName = "#{@indexName}", writeTypeHint = WriteTypeHint.FALSE)
@Setting(settingPath = "/elasticsearch/settings.json", indexStoreType = "#{@documentType}")
@Mapping(mappingPath = "/elasticsearch/mapping.json")
public class Documento {
@Id
public String nombre;
public Integer tamano;
public Map<String, String> metadata;
public String contenido;
public String lenguaje;
@JsonCreator
public Documento(@JsonProperty("nombre") String nombre, @JsonProperty("tamano") Integer tamano,
@JsonProperty("metadata") Map<String, String> metadata, @JsonProperty("contenido") String contenido,
@JsonProperty("lenguaje") String lenguaje) {
this.nombre = nombre;
this.tamano = tamano;
this.metadata = metadata;
this.contenido = contenido;
this.lenguaje = lenguaje;
}
/**
* @return the nombre
*/
@JsonProperty("nombre")
public String getNombre() {
return nombre;
}
/**
* @param nombre the nombre to set
*/
public void setNombre(String nombre) {
this.nombre = nombre;
}
/**
* @return the tamano
*/
@JsonProperty("tamano")
public Integer getTamano() {
return tamano;
}
/**
* @param tamano the tamano to set
*/
public void setTamano(Integer tamano) {
this.tamano = tamano;
}
/**
* @return the metadata
*/
@JsonProperty("metadata")
public Map<String, String> getMetadata() {
return metadata;
}
/**
* @param metadata the metadata to set
*/
public void setMetadata(Map<String, String> metadata) {
this.metadata = metadata;
}
/**
* @return the contenido
*/
@JsonProperty("contenido")
public String getContenido() {
return contenido;
}
/**
* @param contenido the contenido to set
*/
public void setContenido(String contenido) {
this.contenido = contenido;
}
/**
* @return the lenguaje
*/
@JsonProperty("lenguaje")
public String getLenguaje() {
return lenguaje;
}
/**
* @param lenguaje the lenguaje to set
*/
public void setLenguaje(String lenguaje) {
this.lenguaje = lenguaje;
}
}

Ver fichero

@@ -0,0 +1,11 @@
package com.manalejandro.arjion2.repositories;
import org.springframework.data.elasticsearch.repository.ReactiveElasticsearchRepository;
import org.springframework.stereotype.Repository;
import com.manalejandro.arjion2.model.Documento;
@Repository
public interface MainRepository extends ReactiveElasticsearchRepository<Documento, String> {
}

Ver fichero

@@ -0,0 +1,23 @@
package com.manalejandro.arjion2.services;
import java.util.List;
import org.springframework.data.domain.Pageable;
import com.manalejandro.arjion2.model.Consulta;
import com.manalejandro.arjion2.model.Documento;
public interface MainService {
public boolean save(Documento doc);
public long count();
public List<Documento> findAllDocumentos();
public Documento findOne(String nombre);
public Integer maxTamano();
public Consulta search(String busqueda, String[] tipo, Integer tamano, Pageable pageable);
}

Ver fichero

@@ -0,0 +1,127 @@
package com.manalejandro.arjion2.services;
import java.util.ArrayList;
import java.util.List;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.manalejandro.arjion2.model.Consulta;
import com.manalejandro.arjion2.model.Documento;
import com.manalejandro.arjion2.repositories.MainRepository;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.SuggestBuilders;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationContext;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Service;
@Service
public class MainServiceImpl implements MainService {
private final ApplicationContext appContext;
private final MainRepository mainRepository;
@Value("#{@indexName}")
private String index;
@Value("#{@documentType}")
private String document;
@Autowired
public MainServiceImpl(MainRepository mainRepository, ApplicationContext appContext) {
this.mainRepository = mainRepository;
this.appContext = appContext;
}
@Override
public boolean save(Documento doc) {
if (!mainRepository.existsById(doc.nombre).block()) {
if (mainRepository.save(doc).block() != null)
return true;
else
return false;
} else
return false;
}
@Override
public long count() {
return mainRepository.count().block();
}
@Override
public List<Documento> findAllDocumentos() {
return mainRepository.findAll().collectList().block();
}
@Override
public Documento findOne(String nombre) {
return mainRepository.findById(nombre).block();
}
@Override
public Integer maxTamano() {
return mainRepository.findAll(Sort.by(Sort.Direction.DESC, "tamano")).blockFirst().getTamano();
}
@Override
public Consulta search(String busqueda, String[] tipo, Integer tamano, Pageable pageable) {
Client client = (Client) appContext.getBean("client");
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
if (busqueda != null && !"null".equals(busqueda) && !busqueda.isEmpty()) {
boolQueryBuilder.must(QueryBuilders.matchQuery("nombre", busqueda));
boolQueryBuilder.should(QueryBuilders.matchQuery("contenido", busqueda));
}
if (tipo != null && tipo.length > 0)
boolQueryBuilder.filter(QueryBuilders.termsQuery("tipo", tipo));
if (tamano != null && tamano >= 0)
boolQueryBuilder.must(QueryBuilders.rangeQuery("tamano").to(tamano).includeUpper(true));
AggregationBuilder aggregation = AggregationBuilders.terms("by_xarchivo").field("x_archivo").size(10000);
SuggestBuilder suggest = new SuggestBuilder()
.addSuggestion("suggest", SuggestBuilders.completionSuggestion("nombre").text(busqueda).size(10))
.addSuggestion("phrase", SuggestBuilders.phraseSuggestion("nombre").text(busqueda).size(1)
.realWordErrorLikelihood((float) 0.95).maxErrors((float) 0.5).gramSize(2));
System.out.println(boolQueryBuilder);
SearchResponse response = client.prepareSearch(index).setQuery(boolQueryBuilder).addAggregation(aggregation)
.suggest(suggest).setSize(pageable.getPageSize()).setFrom(pageable.getPageNumber()).execute()
.actionGet();
Consulta consulta = new Consulta();
consulta.setSuggest(response.getSuggest().getSuggestion("phrase").getEntries().get(0).getOptions().size() > 0
? response.getSuggest().getSuggestion("phrase").getEntries().get(0).getOptions().get(0).getText()
.string()
: "");
for (Entry<? extends Option> entry : response.getSuggest().getSuggestion("suggest").getEntries()) {
entry.getOptions().forEach(option -> {
String suggestText = option.getText().string().trim(),
autocompleteClean = busqueda.replaceAll("[^\\p{Alnum}\\p{IsAlphabetic} ]", "");
for (String item : autocompleteClean.split(" ")) {
if (item.length() > 0) {
consulta.getAutocomplete().add(
suggestText.replaceAll("(?i)((?!<)" + item + "(?![^<>]*>))", "<strong>$1</strong>"));
}
}
});
}
ObjectMapper mapper = new ObjectMapper();
List<Documento> documentos = new ArrayList<Documento>();
if (response.getHits().getHits().length > 0) {
try {
documentos = mapper.reader().readValue(response.getHits().getHits().toString());
} catch (JsonProcessingException e) {
e.printStackTrace();
}
}
consulta.setDocumentos(documentos);
return consulta;
}
}

Ver fichero

@@ -0,0 +1,22 @@
package com.manalejandro.arjion2.vo;
import com.manalejandro.arjion2.model.Archivo;
public class DetailVO {
private Archivo archivo;
/**
* @return the archivo
*/
public Archivo getArchivo() {
return archivo;
}
/**
* @param archivo the archivo to set
*/
public void setArchivo(Archivo archivo) {
this.archivo = archivo;
}
}

Ver fichero

@@ -0,0 +1,50 @@
package com.manalejandro.arjion2.vo;
import java.util.ArrayList;
import java.util.List;
import com.manalejandro.arjion2.model.Archivo;
import com.manalejandro.arjion2.model.Documento;
public class DocumentoVO {
private List<Archivo> archivos = new ArrayList<Archivo>();
private long count;
private List<Documento> documentos = new ArrayList<Documento>();
/**
* @return the archivos
*/
public List<Archivo> getArchivos() {
return archivos;
}
/**
* @param archivos the archivos to set
*/
public void setArchivos(List<Archivo> archivos) {
this.archivos = archivos;
}
public long getCount() {
return count;
}
public void setCount(long count) {
this.count = count;
}
/**
* @return the documentos
*/
public List<Documento> getDocumentos() {
return documentos;
}
/**
* @param documentos the documentos to set
*/
public void setDocumentos(List<Documento> documentos) {
this.documentos = documentos;
}
}

Ver fichero

@@ -0,0 +1,19 @@
server.servlet.context-path=/arjion2
server.port=8080
elasticsearch.clustername=elasticsearch
elasticsearch.host=elasticsearch
elasticsearch.port=9200
elasticsearch.nodename=arjion2
arjion.indexName=documentos
arjion.documentType=documento
arjion.uploadpath=/upload/
arjion.tesseractpath=/usr/bin
arjion.tesseractdatapath=/usr/share/tesseract-ocr
spring.main.allow-bean-definition-overriding=true
spring.thymeleaf.enabled=true
spring.thymeleaf.prefix=classpath:/templates/
spring.thymeleaf.suffix=.html
spring.thymeleaf.cache=false
spring.servlet.multipart.max-file-size=20MB
spring.servlet.multipart.max-request-size=100MB
spring.codec.max-in-memory-size=20MB

Ver fichero

@@ -0,0 +1,30 @@
{
"properties": {
"documento": {
"properties": {
"@timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"@version": {
"type": "keyword"
},
"nombre": {
"type": "text"
},
"tamano": {
"type": "long"
},
"metadata": {
"type": "object"
},
"contenido": {
"type": "text"
},
"lenguaje": {
"type": "keyword"
}
}
}
}
}

Ver fichero

@@ -0,0 +1,6 @@
{
"index": {
"number_of_shards": "1",
"number_of_replicas": "1"
}
}

Ver fichero

@@ -0,0 +1,4 @@
hr {
width: 100%;
text-align: center;
}

Ver fichero

Ver fichero

@@ -0,0 +1,37 @@
<!DOCTYPE html>
<html>
<header th:insert="fragments/header.html :: header"> </header>
<body>
<header class="text-center">
<a th:href="@{/}" class="text-decoration-none">
<h1 class="text-primary">Arjion2</h1>
</a>
<h3 class="text-warning">[[${detailVO.archivo.nombre}]]</h3>
</header>
<section class="col-md-12">
<hr>
</section>
<section
class="col-md-6 offset-md-3 justify-content-center text-center">
<span class="col-md-1 text-muted">Tamaño</span> <span
class="col-md-11 text-muted">[[${detailVO.archivo.tamano}]]
bytes</span> <span class="col-md-1 text-muted"
th:if="${!#strings.isEmpty(detailVO.archivo.lenguaje)}">-
Lenguaje</span> <span class="col-md-11 text-muted"
th:if="${!#strings.isEmpty(detailVO.archivo.lenguaje)}">[[${detailVO.archivo.lenguaje}]]</span>
<br /> <span class="col-md-12 text-success">Metadatos</span>
<ul class="col-md-6 offset-md-3 text-success text-start">
<li th:each="meta : ${detailVO.archivo.metadata}"><span
th:text="${meta.key}"></span>: <span th:text="${meta.value}"></span></li>
</ul>
<span class="col-md-12 text-warning">Contenido</span>
<pre class="col-md-12 text-warning">[[${detailVO.archivo.contenido}]]</pre>
</div>
</section>
<section class="col-md-12">
<hr>
</section>
<footer th:insert="fragments/footer.html :: footer"> </footer>
</body>
</html>

Ver fichero

@@ -0,0 +1,9 @@
<!DOCTYPE html>
<html>
<header th:insert="fragments/header.html :: header"> </header>
<body class="text-center">
<h2 class="h2 text-danger">Error</h2>
<footer th:insert="fragments/footer.html :: footer"> </footer>
</body>
</html>

Ver fichero

@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html>
<header th:insert="fragments/header.html :: header"> </header>
<body class="text-center">
<h2 class="h2 text-danger">Error</h2>
<h4 class="h4 text-danger">El archivo ya existe o hubo un error</h4>
<footer th:insert="fragments/footer.html :: footer"> </footer>
</body>
</html>

Ver fichero

@@ -0,0 +1,6 @@
<footer th:fragment="footer" class="col-md-12 text-center">
<span class="col-md-12"> <a class="btn btn-primary"
th:href="@{/}">Volver</a>
</span><br />
<span>&copy;2021</span>
</footer>

Ver fichero

@@ -0,0 +1,10 @@
<head th:fragment="header">
<meta charset="UTF-8">
<title>Arjion2</title>
<link rel="stylesheet"
th:href="@{/webjars/bootstrap/5.1.3/css/bootstrap.min.css}">
<link rel="stylesheet" th:href="@{/css/main.css}">
<script th:src="@{/webjars/jquery/3.6.0/jquery.min.js}"></script>
<script th:src="@{/webjars/bootstrap/5.1.3/js/bootstrap.min.js}"></script>
<script th:src="@{/js/main.js}"></script>
</head>

Ver fichero

@@ -0,0 +1,65 @@
<!DOCTYPE html>
<html>
<header th:insert="fragments/header.html :: header"> </header>
<body class="container-fluid">
<header class="row justify-content-center text-center">
<a th:href="@{/}" class="text-decoration-none">
<h1 class="text-primary">Arjion2</h1>
</a>
<h3 class="text-warning">[[${documentoVO.count}]] archivos</h3>
</header>
<form method="post" enctype="multipart/form-data"
class="row justify-content-center text-center" th:action="@{/upload}"
th:object="${documentForm}">
<div class="col-md-4">
<label class="input-group-text"> <input type="file"
name="archivos" class="form-control" multiple="multiple" required />
&nbsp;&nbsp;&nbsp;
<button class="btn btn-primary" type="submit" value="Enviar">Enviar</button>
</label>
</div>
</form>
<hr class="col-md-12">
<section class="row">
<div th:each="arc : ${documentoVO.archivos}"
class="col-md-6 offset-md-3">
<span class="col-md-1 text-primary lead">Nombre</span> <span
class="col-md-11 text-primary lead">[[${arc.nombre}]]</span><br />
<span class="col-md-1 text-muted">Tamaño</span> <span
class="col-md-11 text-muted">[[${arc.tamano}]] bytes</span><br /> <span
class="col-md-1 text-muted"
th:if="${!#strings.isEmpty(arc.lenguaje)}">Lenguaje</span> <span
class="col-md-11 text-muted"
th:if="${!#strings.isEmpty(arc.lenguaje)}">[[${arc.lenguaje}]]</span><br />
<span class="col-md-1 text-success">Metadatos</span> <span
class="col-md-11 text-success"><ul>
<li th:each="meta : ${arc.metadata}"><span
th:text="${meta.key}"></span>: <span th:text="${meta.value}"></span></li>
</ul></span> <span class="col-md-1 text-warning">Contenido</span>
<pre class="col-md-11 text-warning">[[${arc.contenido}]]</pre>
<hr class="col-md-12">
</div>
</section>
<hr class="col-md-12">
<section class="row">
<div th:each="doc : ${documentoVO.documentos}">
<div class="col-md-6 offset-md-3 text-center">
<span class="col-md-12 text-primary"> <a
th:href="@{'/detail'(nombre=${doc.nombre})}"
class="lead text-decoration-none">[[${doc.nombre}]]</a> - <a
th:href="@{'/download'(filename=${doc.nombre})}"
class="text-decoration-none">download</a> <br />[[${doc.tamano}]]
bytes <span th:if="${!#strings.isEmpty(doc.lenguaje)}">-
[[${doc.lenguaje}]]</span>
</span> <br /> <span class="col-md-12 text-success">[[${#strings.abbreviate(doc.metadata,200)}]]</span>
<span class="col-md-12 text-warning">[[${#strings.abbreviate(doc.contenido,200)}]]</span>
</div>
<span class="col-md-12">
<hr>
</span>
</div>
</section>
<footer th:insert="fragments/footer.html :: footer"> </footer>
</body>
</html>

Ver fichero

@@ -0,0 +1,13 @@
package com.manalejandro.arjion2;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
@SpringBootTest
class Arjion2ApplicationTests {
@Test
void contextLoads() {
}
}