commit 75734494660d6b3898cb9f325da95a6f0bd9fc9d Author: ale Date: Tue Dec 21 01:46:14 2021 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..05814d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +HELP.md +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### STS ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ +esdata/ diff --git a/.mvn/wrapper/MavenWrapperDownloader.java b/.mvn/wrapper/MavenWrapperDownloader.java new file mode 100644 index 0000000..e76d1f3 --- /dev/null +++ b/.mvn/wrapper/MavenWrapperDownloader.java @@ -0,0 +1,117 @@ +/* + * Copyright 2007-present the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.net.*; +import java.io.*; +import java.nio.channels.*; +import java.util.Properties; + +public class MavenWrapperDownloader { + + private static final String WRAPPER_VERSION = "0.5.6"; + /** + * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided. + */ + private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/" + + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar"; + + /** + * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to + * use instead of the default one. + */ + private static final String MAVEN_WRAPPER_PROPERTIES_PATH = + ".mvn/wrapper/maven-wrapper.properties"; + + /** + * Path where the maven-wrapper.jar will be saved to. + */ + private static final String MAVEN_WRAPPER_JAR_PATH = + ".mvn/wrapper/maven-wrapper.jar"; + + /** + * Name of the property which should be used to override the default download url for the wrapper. + */ + private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl"; + + public static void main(String args[]) { + System.out.println("- Downloader started"); + File baseDirectory = new File(args[0]); + System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath()); + + // If the maven-wrapper.properties exists, read it and check if it contains a custom + // wrapperUrl parameter. + File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH); + String url = DEFAULT_DOWNLOAD_URL; + if(mavenWrapperPropertyFile.exists()) { + FileInputStream mavenWrapperPropertyFileInputStream = null; + try { + mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile); + Properties mavenWrapperProperties = new Properties(); + mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream); + url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url); + } catch (IOException e) { + System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'"); + } finally { + try { + if(mavenWrapperPropertyFileInputStream != null) { + mavenWrapperPropertyFileInputStream.close(); + } + } catch (IOException e) { + // Ignore ... + } + } + } + System.out.println("- Downloading from: " + url); + + File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH); + if(!outputFile.getParentFile().exists()) { + if(!outputFile.getParentFile().mkdirs()) { + System.out.println( + "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'"); + } + } + System.out.println("- Downloading to: " + outputFile.getAbsolutePath()); + try { + downloadFileFromURL(url, outputFile); + System.out.println("Done"); + System.exit(0); + } catch (Throwable e) { + System.out.println("- Error downloading"); + e.printStackTrace(); + System.exit(1); + } + } + + private static void downloadFileFromURL(String urlString, File destination) throws Exception { + if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) { + String username = System.getenv("MVNW_USERNAME"); + char[] password = System.getenv("MVNW_PASSWORD").toCharArray(); + Authenticator.setDefault(new Authenticator() { + @Override + protected PasswordAuthentication getPasswordAuthentication() { + return new PasswordAuthentication(username, password); + } + }); + } + URL website = new URL(urlString); + ReadableByteChannel rbc; + rbc = Channels.newChannel(website.openStream()); + FileOutputStream fos = new FileOutputStream(destination); + fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); + fos.close(); + rbc.close(); + } + +} diff --git a/.mvn/wrapper/maven-wrapper.jar b/.mvn/wrapper/maven-wrapper.jar new file mode 100644 index 0000000..2cc7d4a Binary files /dev/null and b/.mvn/wrapper/maven-wrapper.jar differ diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 0000000..a9f1ef8 --- /dev/null +++ b/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1,2 @@ +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.3/apache-maven-3.8.3-bin.zip +wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..39b06d0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +FROM openjdk:11-jdk-bullseye +ENV JAVA_TOOL_OPTIONS -Dfile.encoding=UTF8 +RUN apt update && apt -y upgrade && apt -y install tesseract-ocr tesseract-ocr-spa && apt clean +RUN addgroup --system --gid 1000 user +RUN adduser --system --uid 1000 --group user +RUN mkdir -p /upload /arjion2 +RUN chown user.user -R /upload /arjion2 +USER user +WORKDIR /arjion2 \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..17719c5 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +## Arjion2 + +### del griego `archivo` + +### Proof of Concept with [SpringBoot 2.6.1](https://start.spring.io/), [ElasticSearch](https://www.elastic.co/) and [Apache Tika](https://tika.apache.org/) + +## Docker image + + $ docker-compose build + $ docker-compose up -d + +## License + +MIT diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..f5bad7d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,43 @@ +version: "2.3" + +services: + arjion2: + build: . + image: arjion2 + hostname: arjion2 + container_name: arjion2 + restart: always + entrypoint: + - ./mvnw + - spring-boot:run + volumes: + - ./:/arjion2 + ports: + - 8080:8080 + networks: + arjion2: + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2-amd64 + hostname: elasticsearch + container_name: elasticsearch + restart: always + environment: + - node.name=arjion2 + - cluster.name=elasticsearch + - discovery.type=single-node + - bootstrap.memory_lock=true + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + ulimits: + memlock: + soft: -1 + hard: -1 + volumes: + - ./esdata:/usr/share/elasticsearch/data + expose: + - 9200 + networks: + arjion2: + +networks: + arjion2: diff --git a/mvnw b/mvnw new file mode 100755 index 0000000..a16b543 --- /dev/null +++ b/mvnw @@ -0,0 +1,310 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Maven Start Up Batch script +# +# Required ENV vars: +# ------------------ +# JAVA_HOME - location of a JDK home dir +# +# Optional ENV vars +# ----------------- +# M2_HOME - location of maven2's installed home dir +# MAVEN_OPTS - parameters passed to the Java VM when running Maven +# e.g. to debug Maven itself, use +# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 +# MAVEN_SKIP_RC - flag to disable loading of mavenrc files +# ---------------------------------------------------------------------------- + +if [ -z "$MAVEN_SKIP_RC" ] ; then + + if [ -f /etc/mavenrc ] ; then + . /etc/mavenrc + fi + + if [ -f "$HOME/.mavenrc" ] ; then + . "$HOME/.mavenrc" + fi + +fi + +# OS specific support. $var _must_ be set to either true or false. +cygwin=false; +darwin=false; +mingw=false +case "`uname`" in + CYGWIN*) cygwin=true ;; + MINGW*) mingw=true;; + Darwin*) darwin=true + # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home + # See https://developer.apple.com/library/mac/qa/qa1170/_index.html + if [ -z "$JAVA_HOME" ]; then + if [ -x "/usr/libexec/java_home" ]; then + export JAVA_HOME="`/usr/libexec/java_home`" + else + export JAVA_HOME="/Library/Java/Home" + fi + fi + ;; +esac + +if [ -z "$JAVA_HOME" ] ; then + if [ -r /etc/gentoo-release ] ; then + JAVA_HOME=`java-config --jre-home` + fi +fi + +if [ -z "$M2_HOME" ] ; then + ## resolve links - $0 may be a link to maven's home + PRG="$0" + + # need this for relative symlinks + while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG="`dirname "$PRG"`/$link" + fi + done + + saveddir=`pwd` + + M2_HOME=`dirname "$PRG"`/.. + + # make it fully qualified + M2_HOME=`cd "$M2_HOME" && pwd` + + cd "$saveddir" + # echo Using m2 at $M2_HOME +fi + +# For Cygwin, ensure paths are in UNIX format before anything is touched +if $cygwin ; then + [ -n "$M2_HOME" ] && + M2_HOME=`cygpath --unix "$M2_HOME"` + [ -n "$JAVA_HOME" ] && + JAVA_HOME=`cygpath --unix "$JAVA_HOME"` + [ -n "$CLASSPATH" ] && + CLASSPATH=`cygpath --path --unix "$CLASSPATH"` +fi + +# For Mingw, ensure paths are in UNIX format before anything is touched +if $mingw ; then + [ -n "$M2_HOME" ] && + M2_HOME="`(cd "$M2_HOME"; pwd)`" + [ -n "$JAVA_HOME" ] && + JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" +fi + +if [ -z "$JAVA_HOME" ]; then + javaExecutable="`which javac`" + if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then + # readlink(1) is not available as standard on Solaris 10. + readLink=`which readlink` + if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then + if $darwin ; then + javaHome="`dirname \"$javaExecutable\"`" + javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" + else + javaExecutable="`readlink -f \"$javaExecutable\"`" + fi + javaHome="`dirname \"$javaExecutable\"`" + javaHome=`expr "$javaHome" : '\(.*\)/bin'` + JAVA_HOME="$javaHome" + export JAVA_HOME + fi + fi +fi + +if [ -z "$JAVACMD" ] ; then + if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + else + JAVACMD="`which java`" + fi +fi + +if [ ! -x "$JAVACMD" ] ; then + echo "Error: JAVA_HOME is not defined correctly." >&2 + echo " We cannot execute $JAVACMD" >&2 + exit 1 +fi + +if [ -z "$JAVA_HOME" ] ; then + echo "Warning: JAVA_HOME environment variable is not set." +fi + +CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher + +# traverses directory structure from process work directory to filesystem root +# first directory with .mvn subdirectory is considered project base directory +find_maven_basedir() { + + if [ -z "$1" ] + then + echo "Path not specified to find_maven_basedir" + return 1 + fi + + basedir="$1" + wdir="$1" + while [ "$wdir" != '/' ] ; do + if [ -d "$wdir"/.mvn ] ; then + basedir=$wdir + break + fi + # workaround for JBEAP-8937 (on Solaris 10/Sparc) + if [ -d "${wdir}" ]; then + wdir=`cd "$wdir/.."; pwd` + fi + # end of workaround + done + echo "${basedir}" +} + +# concatenates all lines of a file +concat_lines() { + if [ -f "$1" ]; then + echo "$(tr -s '\n' ' ' < "$1")" + fi +} + +BASE_DIR=`find_maven_basedir "$(pwd)"` +if [ -z "$BASE_DIR" ]; then + exit 1; +fi + +########################################################################################## +# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central +# This allows using the maven wrapper in projects that prohibit checking in binary data. +########################################################################################## +if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then + if [ "$MVNW_VERBOSE" = true ]; then + echo "Found .mvn/wrapper/maven-wrapper.jar" + fi +else + if [ "$MVNW_VERBOSE" = true ]; then + echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..." + fi + if [ -n "$MVNW_REPOURL" ]; then + jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" + else + jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" + fi + while IFS="=" read key value; do + case "$key" in (wrapperUrl) jarUrl="$value"; break ;; + esac + done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties" + if [ "$MVNW_VERBOSE" = true ]; then + echo "Downloading from: $jarUrl" + fi + wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" + if $cygwin; then + wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"` + fi + + if command -v wget > /dev/null; then + if [ "$MVNW_VERBOSE" = true ]; then + echo "Found wget ... using wget" + fi + if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then + wget "$jarUrl" -O "$wrapperJarPath" + else + wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath" + fi + elif command -v curl > /dev/null; then + if [ "$MVNW_VERBOSE" = true ]; then + echo "Found curl ... using curl" + fi + if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then + curl -o "$wrapperJarPath" "$jarUrl" -f + else + curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f + fi + + else + if [ "$MVNW_VERBOSE" = true ]; then + echo "Falling back to using Java to download" + fi + javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java" + # For Cygwin, switch paths to Windows format before running javac + if $cygwin; then + javaClass=`cygpath --path --windows "$javaClass"` + fi + if [ -e "$javaClass" ]; then + if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then + if [ "$MVNW_VERBOSE" = true ]; then + echo " - Compiling MavenWrapperDownloader.java ..." + fi + # Compiling the Java class + ("$JAVA_HOME/bin/javac" "$javaClass") + fi + if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then + # Running the downloader + if [ "$MVNW_VERBOSE" = true ]; then + echo " - Running MavenWrapperDownloader.java ..." + fi + ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR") + fi + fi + fi +fi +########################################################################################## +# End of extension +########################################################################################## + +export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"} +if [ "$MVNW_VERBOSE" = true ]; then + echo $MAVEN_PROJECTBASEDIR +fi +MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" + +# For Cygwin, switch paths to Windows format before running java +if $cygwin; then + [ -n "$M2_HOME" ] && + M2_HOME=`cygpath --path --windows "$M2_HOME"` + [ -n "$JAVA_HOME" ] && + JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` + [ -n "$CLASSPATH" ] && + CLASSPATH=`cygpath --path --windows "$CLASSPATH"` + [ -n "$MAVEN_PROJECTBASEDIR" ] && + MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"` +fi + +# Provide a "standardized" way to retrieve the CLI args that will +# work with both Windows and non-Windows executions. +MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@" +export MAVEN_CMD_LINE_ARGS + +WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain + +exec "$JAVACMD" \ + $MAVEN_OPTS \ + -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ + "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ + ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" diff --git a/mvnw.cmd b/mvnw.cmd new file mode 100644 index 0000000..c8d4337 --- /dev/null +++ b/mvnw.cmd @@ -0,0 +1,182 @@ +@REM ---------------------------------------------------------------------------- +@REM Licensed to the Apache Software Foundation (ASF) under one +@REM or more contributor license agreements. See the NOTICE file +@REM distributed with this work for additional information +@REM regarding copyright ownership. The ASF licenses this file +@REM to you under the Apache License, Version 2.0 (the +@REM "License"); you may not use this file except in compliance +@REM with the License. You may obtain a copy of the License at +@REM +@REM https://www.apache.org/licenses/LICENSE-2.0 +@REM +@REM Unless required by applicable law or agreed to in writing, +@REM software distributed under the License is distributed on an +@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@REM KIND, either express or implied. See the License for the +@REM specific language governing permissions and limitations +@REM under the License. +@REM ---------------------------------------------------------------------------- + +@REM ---------------------------------------------------------------------------- +@REM Maven Start Up Batch script +@REM +@REM Required ENV vars: +@REM JAVA_HOME - location of a JDK home dir +@REM +@REM Optional ENV vars +@REM M2_HOME - location of maven2's installed home dir +@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands +@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending +@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven +@REM e.g. to debug Maven itself, use +@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 +@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files +@REM ---------------------------------------------------------------------------- + +@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' +@echo off +@REM set title of command window +title %0 +@REM enable echoing by setting MAVEN_BATCH_ECHO to 'on' +@if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% + +@REM set %HOME% to equivalent of $HOME +if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") + +@REM Execute a user defined script before this one +if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre +@REM check for pre script, once with legacy .bat ending and once with .cmd ending +if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" +if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" +:skipRcPre + +@setlocal + +set ERROR_CODE=0 + +@REM To isolate internal variables from possible post scripts, we use another setlocal +@setlocal + +@REM ==== START VALIDATION ==== +if not "%JAVA_HOME%" == "" goto OkJHome + +echo. +echo Error: JAVA_HOME not found in your environment. >&2 +echo Please set the JAVA_HOME variable in your environment to match the >&2 +echo location of your Java installation. >&2 +echo. +goto error + +:OkJHome +if exist "%JAVA_HOME%\bin\java.exe" goto init + +echo. +echo Error: JAVA_HOME is set to an invalid directory. >&2 +echo JAVA_HOME = "%JAVA_HOME%" >&2 +echo Please set the JAVA_HOME variable in your environment to match the >&2 +echo location of your Java installation. >&2 +echo. +goto error + +@REM ==== END VALIDATION ==== + +:init + +@REM Find the project base dir, i.e. the directory that contains the folder ".mvn". +@REM Fallback to current working directory if not found. + +set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% +IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir + +set EXEC_DIR=%CD% +set WDIR=%EXEC_DIR% +:findBaseDir +IF EXIST "%WDIR%"\.mvn goto baseDirFound +cd .. +IF "%WDIR%"=="%CD%" goto baseDirNotFound +set WDIR=%CD% +goto findBaseDir + +:baseDirFound +set MAVEN_PROJECTBASEDIR=%WDIR% +cd "%EXEC_DIR%" +goto endDetectBaseDir + +:baseDirNotFound +set MAVEN_PROJECTBASEDIR=%EXEC_DIR% +cd "%EXEC_DIR%" + +:endDetectBaseDir + +IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig + +@setlocal EnableExtensions EnableDelayedExpansion +for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a +@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% + +:endReadAdditionalConfig + +SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" +set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" +set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain + +set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" + +FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( + IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B +) + +@REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central +@REM This allows using the maven wrapper in projects that prohibit checking in binary data. +if exist %WRAPPER_JAR% ( + if "%MVNW_VERBOSE%" == "true" ( + echo Found %WRAPPER_JAR% + ) +) else ( + if not "%MVNW_REPOURL%" == "" ( + SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar" + ) + if "%MVNW_VERBOSE%" == "true" ( + echo Couldn't find %WRAPPER_JAR%, downloading it ... + echo Downloading from: %DOWNLOAD_URL% + ) + + powershell -Command "&{"^ + "$webclient = new-object System.Net.WebClient;"^ + "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^ + "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^ + "}"^ + "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^ + "}" + if "%MVNW_VERBOSE%" == "true" ( + echo Finished downloading %WRAPPER_JAR% + ) +) +@REM End of extension + +@REM Provide a "standardized" way to retrieve the CLI args that will +@REM work with both Windows and non-Windows executions. +set MAVEN_CMD_LINE_ARGS=%* + +%MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* +if ERRORLEVEL 1 goto error +goto end + +:error +set ERROR_CODE=1 + +:end +@endlocal & set ERROR_CODE=%ERROR_CODE% + +if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost +@REM check for post script, once with legacy .bat ending and once with .cmd ending +if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" +if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" +:skipRcPost + +@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' +if "%MAVEN_BATCH_PAUSE%" == "on" pause + +if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% + +exit /B %ERROR_CODE% diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..bfe8e44 --- /dev/null +++ b/pom.xml @@ -0,0 +1,115 @@ + + + 4.0.0 + + org.springframework.boot + spring-boot-starter-parent + 2.6.1 + + + com.manalejandro + arjion2 + 1.0.0 + war + arjion2 + Demo project for Arjion with Spring Boot + + 11 + + + + org.springframework.boot + spring-boot-starter-data-elasticsearch + + + org.springframework.boot + spring-boot-starter-thymeleaf + + + org.springframework.boot + spring-boot-starter-web + + + org.springframework.boot + spring-boot-starter-webflux + + + org.tensorflow + tensorflow-core-platform + 0.4.0 + + + org.apache.tika + tika-core + 2.1.0 + + + org.apache.tika + tika-parsers-standard-package + 2.1.0 + + + xml-apis + xml-apis + + + + + com.optimaize.languagedetector + language-detector + 0.6 + + + com.github.jai-imageio + jai-imageio-jpeg2000 + 1.4.0 + + + com.levigo.jbig2 + levigo-jbig2-imageio + 2.0 + + + com.github.jai-imageio + jai-imageio-core + 1.4.0 + + + org.webjars + bootstrap + 5.1.3 + + + org.webjars + jquery + 3.6.0 + + + + org.springframework.boot + spring-boot-starter-tomcat + provided + + + org.springframework.boot + spring-boot-starter-test + test + + + io.projectreactor + reactor-test + test + + + + + arjion2 + + + org.springframework.boot + spring-boot-maven-plugin + + + + + \ No newline at end of file diff --git a/src/main/java/com/manalejandro/arjion2/Arjion2Application.java b/src/main/java/com/manalejandro/arjion2/Arjion2Application.java new file mode 100644 index 0000000..b696a49 --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/Arjion2Application.java @@ -0,0 +1,38 @@ +package com.manalejandro.arjion2; + +import java.util.concurrent.Executor; + +import org.springframework.aop.interceptor.AsyncUncaughtExceptionHandler; +import org.springframework.aop.interceptor.SimpleAsyncUncaughtExceptionHandler; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.data.elasticsearch.repository.config.EnableReactiveElasticsearchRepositories; +import org.springframework.scheduling.annotation.AsyncConfigurer; +import org.springframework.scheduling.annotation.EnableAsync; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; + +@SpringBootApplication +@EnableAsync +@EnableReactiveElasticsearchRepositories +public class Arjion2Application implements AsyncConfigurer { + + public static void main(String[] args) { + SpringApplication.run(Arjion2Application.class, args); + } + + @Override + public Executor getAsyncExecutor() { + ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); + executor.setCorePoolSize(7); + executor.setMaxPoolSize(42); + executor.setQueueCapacity(11); + executor.setThreadNamePrefix("Executor-"); + executor.initialize(); + return executor; + } + + @Override + public AsyncUncaughtExceptionHandler getAsyncUncaughtExceptionHandler() { + return new SimpleAsyncUncaughtExceptionHandler(); + } +} diff --git a/src/main/java/com/manalejandro/arjion2/ESConfig.java b/src/main/java/com/manalejandro/arjion2/ESConfig.java new file mode 100644 index 0000000..21dcae8 --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/ESConfig.java @@ -0,0 +1,43 @@ +package com.manalejandro.arjion2; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.data.elasticsearch.client.ClientConfiguration; +import org.springframework.data.elasticsearch.client.reactive.ReactiveElasticsearchClient; +import org.springframework.data.elasticsearch.client.reactive.ReactiveRestClients; +import org.springframework.data.elasticsearch.config.AbstractReactiveElasticsearchConfiguration; + +@Configuration +public class ESConfig extends AbstractReactiveElasticsearchConfiguration { + + @Value("${elasticsearch.host}") + private String EsHost; + + @Value("${elasticsearch.port}") + private int EsPort; + + @Value("${arjion.indexName}") + private String indexName; + + @Value("${arjion.documentType}") + private String documentType; + + @Override + @Bean + public ReactiveElasticsearchClient reactiveElasticsearchClient() { + final ClientConfiguration clientConfiguration = ClientConfiguration.builder().connectedTo(EsHost + ":" + EsPort) + .build(); + return ReactiveRestClients.create(clientConfiguration); + } + + @Bean + public String indexName() { + return indexName; + } + + @Bean + public String documentType() { + return documentType; + } +} \ No newline at end of file diff --git a/src/main/java/com/manalejandro/arjion2/ServletInitializer.java b/src/main/java/com/manalejandro/arjion2/ServletInitializer.java new file mode 100644 index 0000000..7425c74 --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/ServletInitializer.java @@ -0,0 +1,12 @@ +package com.manalejandro.arjion2; + +import org.springframework.boot.builder.SpringApplicationBuilder; +import org.springframework.boot.web.servlet.support.SpringBootServletInitializer; + +public class ServletInitializer extends SpringBootServletInitializer { + + @Override + protected SpringApplicationBuilder configure(SpringApplicationBuilder application) { + return application.sources(Arjion2Application.class); + } +} diff --git a/src/main/java/com/manalejandro/arjion2/WebConfig.java b/src/main/java/com/manalejandro/arjion2/WebConfig.java new file mode 100644 index 0000000..d575619 --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/WebConfig.java @@ -0,0 +1,20 @@ +package com.manalejandro.arjion2; + +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.EnableWebMvc; +import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +@Configuration +@EnableWebMvc +public class WebConfig implements WebMvcConfigurer { + + private static final String[] CLASSPATH_RESOURCE_LOCATIONS = { "classpath:/META-INF/resources/", + "classpath:/resources/", "classpath:/static/", "classpath:/public/" }; + + @Override + public void addResourceHandlers(ResourceHandlerRegistry registry) { + registry.addResourceHandler("/webjars/**").addResourceLocations("/webjars/"); + registry.addResourceHandler("/**").addResourceLocations(CLASSPATH_RESOURCE_LOCATIONS); + } +} diff --git a/src/main/java/com/manalejandro/arjion2/controllers/MainController.java b/src/main/java/com/manalejandro/arjion2/controllers/MainController.java new file mode 100644 index 0000000..1336d22 --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/controllers/MainController.java @@ -0,0 +1,179 @@ +package com.manalejandro.arjion2.controllers; + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URLConnection; +import java.net.URLEncoder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.text.Normalizer; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.manalejandro.arjion2.model.Archivo; +import com.manalejandro.arjion2.model.Documento; +import com.manalejandro.arjion2.services.MainService; +import com.manalejandro.arjion2.vo.DetailVO; +import com.manalejandro.arjion2.vo.DocumentoVO; +import com.optimaize.langdetect.LanguageDetector; +import com.optimaize.langdetect.LanguageDetectorBuilder; +import com.optimaize.langdetect.ngram.NgramExtractors; +import com.optimaize.langdetect.profiles.LanguageProfile; +import com.optimaize.langdetect.profiles.LanguageProfileReader; +import com.optimaize.langdetect.text.CommonTextObjectFactories; +import com.optimaize.langdetect.text.TextObjectFactory; + +import org.apache.tika.config.TikaConfig; +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.ocr.TesseractOCRConfig; +import org.apache.tika.parser.pdf.PDFParserConfig; +import org.apache.tika.sax.BodyContentHandler; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.ByteArrayResource; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.stereotype.Controller; +import org.springframework.ui.Model; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.multipart.MultipartFile; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +@Controller +public class MainController { + + private final MainService mainService; + + @Value("${arjion.uploadpath}") + private String uploadpath; + + @Value("${arjion.tesseractpath}") + private String tesseractpath; + + @Value("${arjion.tesseractdatapath}") + private String tesseractdatapath; + + @Autowired + public MainController(MainService mainService) { + this.mainService = mainService; + } + + @RequestMapping(path = "/") + public String indexPage(final Model model) { + DocumentoVO documentoVO = new DocumentoVO(); + documentoVO.setCount(mainService.count()); + documentoVO.setDocumentos(mainService.findAllDocumentos()); + model.addAttribute("documentoVO", documentoVO); + return "index"; + } + + @GetMapping(path = "/upload") + public String upload() { + return "redirect:/"; + } + + @PostMapping(path = "/upload") + public String uploadPage(@RequestParam(value = "archivos", required = true) MultipartFile[] archivos, + final Model model) throws IOException, TikaException, SAXException { + DocumentoVO documentoVO = new DocumentoVO(); + documentoVO.setCount(mainService.count()); + documentoVO.setDocumentos(mainService.findAllDocumentos()); + if (archivos.length > 0) { + // Recupera la configuración de Tika + TikaConfig tikaConfig = TikaConfig.getDefaultConfig(); + // Itera los archivos recibidos + for (int i = 0; i < archivos.length; i++) { + byte[] bytes = archivos[i].getBytes(); + // Normaliza el título de los archivos + String normalized = Normalizer.normalize(archivos[i].getOriginalFilename(), Normalizer.Form.NFD), + filename = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); + Path path = Paths.get(uploadpath + filename); + // Instancias necesarias + Metadata metadata = new Metadata(); + Parser parser = new AutoDetectParser(tikaConfig); + PDFParserConfig pdfConfig = new PDFParserConfig(); + TesseractOCRConfig tesseractConfig = new TesseractOCRConfig(); + tesseractConfig.addOtherTesseractConfig("tesseractPath", tesseractpath); + tesseractConfig.addOtherTesseractConfig("tessdataPath", tesseractdatapath); + tesseractConfig.setLanguage("spa+eng"); + pdfConfig.setExtractInlineImages(true); + ParseContext parseContext = new ParseContext(); + parseContext.set(TesseractOCRConfig.class, tesseractConfig); + parseContext.set(PDFParserConfig.class, pdfConfig); + // Usa -1 para no tener límite de 100000 chars + ContentHandler handler = new BodyContentHandler(-1); + // Castea los bytes al Stream de Tika + TikaInputStream stream = TikaInputStream.get(bytes); + // Parsea el contenido + parser.parse(stream, handler, metadata, parseContext); + // Identifica el idioma del archivo + List languageProfiles = new LanguageProfileReader().readAllBuiltIn(); + LanguageDetector detector = LanguageDetectorBuilder.create(NgramExtractors.standard()) + .withProfiles(languageProfiles).build(); + TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); + String language = detector.detect(textObjectFactory.forText(handler.toString())).isPresent() + ? detector.detect(textObjectFactory.forText(handler.toString())).get().getLanguage() + : ""; + // Almacena en elasticsearch + String[] names = metadata.names(); + Map meta = new HashMap(); + for (int j = 0; j < names.length; j++) { + for (int k = 0; k < metadata.getValues(names[j]).length; k++) { + meta.put(names[j], metadata.getValues(names[j])[k]); + } + } + if (!mainService.save(new Documento(filename, Long.valueOf(archivos[i].getSize()).intValue(), meta, + handler.toString(), language))) { + return "exists"; + } else { + // Guarda el archivo en el directorio configurado en las properties + Files.write(path, bytes); + } + // Añade los parámetros al VO para mostrar en la vista + documentoVO.getArchivos().add(new Archivo(filename, Long.valueOf(archivos[i].getSize()).intValue(), + meta, handler.toString(), language)); + } + } + model.addAttribute("documentoVO", documentoVO); + return "index"; + } + + @GetMapping(path = "/detail") + public String detail(final Model model, @RequestParam(value = "nombre", required = true) String nombre) + throws IOException { + DetailVO detailVO = new DetailVO(); + Documento doc = mainService.findOne(nombre); + detailVO.setArchivo(new Archivo(doc.getNombre(), doc.getTamano(), doc.getMetadata(), doc.getContenido(), + doc.getLenguaje())); + model.addAttribute("detailVO", detailVO); + return "detail"; + } + + @GetMapping(path = "/download") + public ResponseEntity download( + @RequestParam(value = "filename", required = true) String filename) + throws IOException, MalformedURLException { + File file = new File(uploadpath + filename); + Path path = Paths.get(file.getAbsolutePath()); + ByteArrayResource resource = new ByteArrayResource(Files.readAllBytes(path)); + file.toURI().toURL().openConnection(); + String type = URLConnection.guessContentTypeFromName(filename); + HttpHeaders responseHeaders = new HttpHeaders(); + responseHeaders.add("Content-Disposition", "attachment; filename=" + URLEncoder.encode(filename, "UTF-8")); + return ResponseEntity.ok().contentType(MediaType.parseMediaType(type)).contentLength(file.length()) + .headers(responseHeaders).body(resource); + } +} \ No newline at end of file diff --git a/src/main/java/com/manalejandro/arjion2/forms/DocumentoForm.java b/src/main/java/com/manalejandro/arjion2/forms/DocumentoForm.java new file mode 100644 index 0000000..92934cb --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/forms/DocumentoForm.java @@ -0,0 +1,5 @@ +package com.manalejandro.arjion2.forms; + +public class DocumentoForm { + +} diff --git a/src/main/java/com/manalejandro/arjion2/model/Archivo.java b/src/main/java/com/manalejandro/arjion2/model/Archivo.java new file mode 100644 index 0000000..40846a4 --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/model/Archivo.java @@ -0,0 +1,90 @@ +package com.manalejandro.arjion2.model; + +import java.util.Map; + +public class Archivo { + + private String nombre; + private Integer tamano; + private Map metadata; + private String contenido; + private String lenguaje; + + public Archivo(String nombre, Integer tamano, Map meta, String contenido, String lenguaje) { + this.nombre = nombre; + this.tamano = tamano; + this.metadata = meta; + this.contenido = contenido; + this.lenguaje = lenguaje; + } + + /** + * @return the nombre + */ + public String getNombre() { + return nombre; + } + + /** + * @return the tamano + */ + public Integer getTamano() { + return tamano; + } + + /** + * @return the metadata + */ + public Map getMetadata() { + return metadata; + } + + /** + * @return the contenido + */ + public String getContenido() { + return contenido; + } + + /** + * @param nombre the nombre to set + */ + public void setNombre(String nombre) { + this.nombre = nombre; + } + + /** + * @param tamano the tamano to set + */ + public void setTamano(Integer tamano) { + this.tamano = tamano; + } + + /** + * @param metadata the metadata to set + */ + public void setMetadata(Map metadata) { + this.metadata = metadata; + } + + /** + * @param contenido the contenido to set + */ + public void setContenido(String contenido) { + this.contenido = contenido; + } + + /** + * @return the lenguaje + */ + public String getLenguaje() { + return lenguaje; + } + + /** + * @param lenguaje the lenguaje to set + */ + public void setLenguaje(String lenguaje) { + this.lenguaje = lenguaje; + } +} \ No newline at end of file diff --git a/src/main/java/com/manalejandro/arjion2/model/Consulta.java b/src/main/java/com/manalejandro/arjion2/model/Consulta.java new file mode 100644 index 0000000..78b89b9 --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/model/Consulta.java @@ -0,0 +1,52 @@ +package com.manalejandro.arjion2.model; + +import java.util.ArrayList; +import java.util.List; + +public class Consulta { + private List documentos = new ArrayList(); + private String suggest; + private List autocomplete = new ArrayList(); + + /** + * @return the documentos + */ + public List getDocumentos() { + return documentos; + } + + /** + * @return the suggest + */ + public String getSuggest() { + return suggest; + } + + /** + * @return the autocomplete + */ + public List getAutocomplete() { + return autocomplete; + } + + /** + * @param documentos the documentos to set + */ + public void setDocumentos(List documentos) { + this.documentos = documentos; + } + + /** + * @param suggest the suggest to set + */ + public void setSuggest(String suggest) { + this.suggest = suggest; + } + + /** + * @param autocomplete the autocomplete to set + */ + public void setAutocomplete(List autocomplete) { + this.autocomplete = autocomplete; + } +} \ No newline at end of file diff --git a/src/main/java/com/manalejandro/arjion2/model/Documento.java b/src/main/java/com/manalejandro/arjion2/model/Documento.java new file mode 100644 index 0000000..1f3a272 --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/model/Documento.java @@ -0,0 +1,110 @@ +package com.manalejandro.arjion2.model; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Map; + +import org.springframework.data.annotation.Id; +import org.springframework.data.elasticsearch.annotations.Document; +import org.springframework.data.elasticsearch.annotations.Mapping; +import org.springframework.data.elasticsearch.annotations.Setting; +import org.springframework.data.elasticsearch.annotations.WriteTypeHint; + +@Document(indexName = "#{@indexName}", writeTypeHint = WriteTypeHint.FALSE) +@Setting(settingPath = "/elasticsearch/settings.json", indexStoreType = "#{@documentType}") +@Mapping(mappingPath = "/elasticsearch/mapping.json") +public class Documento { + @Id + public String nombre; + public Integer tamano; + public Map metadata; + public String contenido; + public String lenguaje; + + @JsonCreator + public Documento(@JsonProperty("nombre") String nombre, @JsonProperty("tamano") Integer tamano, + @JsonProperty("metadata") Map metadata, @JsonProperty("contenido") String contenido, + @JsonProperty("lenguaje") String lenguaje) { + this.nombre = nombre; + this.tamano = tamano; + this.metadata = metadata; + this.contenido = contenido; + this.lenguaje = lenguaje; + } + + /** + * @return the nombre + */ + @JsonProperty("nombre") + public String getNombre() { + return nombre; + } + + /** + * @param nombre the nombre to set + */ + public void setNombre(String nombre) { + this.nombre = nombre; + } + + /** + * @return the tamano + */ + @JsonProperty("tamano") + public Integer getTamano() { + return tamano; + } + + /** + * @param tamano the tamano to set + */ + public void setTamano(Integer tamano) { + this.tamano = tamano; + } + + /** + * @return the metadata + */ + @JsonProperty("metadata") + public Map getMetadata() { + return metadata; + } + + /** + * @param metadata the metadata to set + */ + public void setMetadata(Map metadata) { + this.metadata = metadata; + } + + /** + * @return the contenido + */ + @JsonProperty("contenido") + public String getContenido() { + return contenido; + } + + /** + * @param contenido the contenido to set + */ + public void setContenido(String contenido) { + this.contenido = contenido; + } + + /** + * @return the lenguaje + */ + @JsonProperty("lenguaje") + public String getLenguaje() { + return lenguaje; + } + + /** + * @param lenguaje the lenguaje to set + */ + public void setLenguaje(String lenguaje) { + this.lenguaje = lenguaje; + } +} diff --git a/src/main/java/com/manalejandro/arjion2/repositories/MainRepository.java b/src/main/java/com/manalejandro/arjion2/repositories/MainRepository.java new file mode 100644 index 0000000..744528e --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/repositories/MainRepository.java @@ -0,0 +1,11 @@ +package com.manalejandro.arjion2.repositories; + +import org.springframework.data.elasticsearch.repository.ReactiveElasticsearchRepository; +import org.springframework.stereotype.Repository; + +import com.manalejandro.arjion2.model.Documento; + +@Repository +public interface MainRepository extends ReactiveElasticsearchRepository { + +} diff --git a/src/main/java/com/manalejandro/arjion2/services/MainService.java b/src/main/java/com/manalejandro/arjion2/services/MainService.java new file mode 100644 index 0000000..6f8c4cc --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/services/MainService.java @@ -0,0 +1,23 @@ +package com.manalejandro.arjion2.services; + +import java.util.List; + +import org.springframework.data.domain.Pageable; + +import com.manalejandro.arjion2.model.Consulta; +import com.manalejandro.arjion2.model.Documento; + +public interface MainService { + + public boolean save(Documento doc); + + public long count(); + + public List findAllDocumentos(); + + public Documento findOne(String nombre); + + public Integer maxTamano(); + + public Consulta search(String busqueda, String[] tipo, Integer tamano, Pageable pageable); +} diff --git a/src/main/java/com/manalejandro/arjion2/services/MainServiceImpl.java b/src/main/java/com/manalejandro/arjion2/services/MainServiceImpl.java new file mode 100644 index 0000000..3282af6 --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/services/MainServiceImpl.java @@ -0,0 +1,127 @@ +package com.manalejandro.arjion2.services; + +import java.util.ArrayList; +import java.util.List; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.manalejandro.arjion2.model.Consulta; +import com.manalejandro.arjion2.model.Documento; +import com.manalejandro.arjion2.repositories.MainRepository; + +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry; +import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option; +import org.elasticsearch.search.suggest.SuggestBuilder; +import org.elasticsearch.search.suggest.SuggestBuilders; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.ApplicationContext; +import org.springframework.data.domain.Pageable; +import org.springframework.data.domain.Sort; +import org.springframework.stereotype.Service; + +@Service +public class MainServiceImpl implements MainService { + + private final ApplicationContext appContext; + private final MainRepository mainRepository; + + @Value("#{@indexName}") + private String index; + @Value("#{@documentType}") + private String document; + + @Autowired + public MainServiceImpl(MainRepository mainRepository, ApplicationContext appContext) { + this.mainRepository = mainRepository; + this.appContext = appContext; + } + + @Override + public boolean save(Documento doc) { + if (!mainRepository.existsById(doc.nombre).block()) { + if (mainRepository.save(doc).block() != null) + return true; + else + return false; + } else + return false; + } + + @Override + public long count() { + return mainRepository.count().block(); + } + + @Override + public List findAllDocumentos() { + return mainRepository.findAll().collectList().block(); + } + + @Override + public Documento findOne(String nombre) { + return mainRepository.findById(nombre).block(); + } + + @Override + public Integer maxTamano() { + return mainRepository.findAll(Sort.by(Sort.Direction.DESC, "tamano")).blockFirst().getTamano(); + } + + @Override + public Consulta search(String busqueda, String[] tipo, Integer tamano, Pageable pageable) { + Client client = (Client) appContext.getBean("client"); + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); + if (busqueda != null && !"null".equals(busqueda) && !busqueda.isEmpty()) { + boolQueryBuilder.must(QueryBuilders.matchQuery("nombre", busqueda)); + boolQueryBuilder.should(QueryBuilders.matchQuery("contenido", busqueda)); + } + if (tipo != null && tipo.length > 0) + boolQueryBuilder.filter(QueryBuilders.termsQuery("tipo", tipo)); + if (tamano != null && tamano >= 0) + boolQueryBuilder.must(QueryBuilders.rangeQuery("tamano").to(tamano).includeUpper(true)); + AggregationBuilder aggregation = AggregationBuilders.terms("by_xarchivo").field("x_archivo").size(10000); + SuggestBuilder suggest = new SuggestBuilder() + .addSuggestion("suggest", SuggestBuilders.completionSuggestion("nombre").text(busqueda).size(10)) + .addSuggestion("phrase", SuggestBuilders.phraseSuggestion("nombre").text(busqueda).size(1) + .realWordErrorLikelihood((float) 0.95).maxErrors((float) 0.5).gramSize(2)); + System.out.println(boolQueryBuilder); + SearchResponse response = client.prepareSearch(index).setQuery(boolQueryBuilder).addAggregation(aggregation) + .suggest(suggest).setSize(pageable.getPageSize()).setFrom(pageable.getPageNumber()).execute() + .actionGet(); + Consulta consulta = new Consulta(); + consulta.setSuggest(response.getSuggest().getSuggestion("phrase").getEntries().get(0).getOptions().size() > 0 + ? response.getSuggest().getSuggestion("phrase").getEntries().get(0).getOptions().get(0).getText() + .string() + : ""); + for (Entry entry : response.getSuggest().getSuggestion("suggest").getEntries()) { + entry.getOptions().forEach(option -> { + String suggestText = option.getText().string().trim(), + autocompleteClean = busqueda.replaceAll("[^\\p{Alnum}\\p{IsAlphabetic} ]", ""); + for (String item : autocompleteClean.split(" ")) { + if (item.length() > 0) { + consulta.getAutocomplete().add( + suggestText.replaceAll("(?i)((?!<)" + item + "(?![^<>]*>))", "$1")); + } + } + }); + } + ObjectMapper mapper = new ObjectMapper(); + List documentos = new ArrayList(); + if (response.getHits().getHits().length > 0) { + try { + documentos = mapper.reader().readValue(response.getHits().getHits().toString()); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } + consulta.setDocumentos(documentos); + return consulta; + } +} diff --git a/src/main/java/com/manalejandro/arjion2/vo/DetailVO.java b/src/main/java/com/manalejandro/arjion2/vo/DetailVO.java new file mode 100644 index 0000000..58ca1c4 --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/vo/DetailVO.java @@ -0,0 +1,22 @@ +package com.manalejandro.arjion2.vo; + +import com.manalejandro.arjion2.model.Archivo; + +public class DetailVO { + + private Archivo archivo; + + /** + * @return the archivo + */ + public Archivo getArchivo() { + return archivo; + } + + /** + * @param archivo the archivo to set + */ + public void setArchivo(Archivo archivo) { + this.archivo = archivo; + } +} \ No newline at end of file diff --git a/src/main/java/com/manalejandro/arjion2/vo/DocumentoVO.java b/src/main/java/com/manalejandro/arjion2/vo/DocumentoVO.java new file mode 100644 index 0000000..d2cd2ae --- /dev/null +++ b/src/main/java/com/manalejandro/arjion2/vo/DocumentoVO.java @@ -0,0 +1,50 @@ +package com.manalejandro.arjion2.vo; + +import java.util.ArrayList; +import java.util.List; + +import com.manalejandro.arjion2.model.Archivo; +import com.manalejandro.arjion2.model.Documento; + +public class DocumentoVO { + + private List archivos = new ArrayList(); + private long count; + private List documentos = new ArrayList(); + + /** + * @return the archivos + */ + public List getArchivos() { + return archivos; + } + + /** + * @param archivos the archivos to set + */ + public void setArchivos(List archivos) { + this.archivos = archivos; + } + + public long getCount() { + return count; + } + + public void setCount(long count) { + this.count = count; + } + + /** + * @return the documentos + */ + public List getDocumentos() { + return documentos; + } + + /** + * @param documentos the documentos to set + */ + public void setDocumentos(List documentos) { + this.documentos = documentos; + } +} diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties new file mode 100644 index 0000000..9535c8b --- /dev/null +++ b/src/main/resources/application.properties @@ -0,0 +1,19 @@ +server.servlet.context-path=/arjion2 +server.port=8080 +elasticsearch.clustername=elasticsearch +elasticsearch.host=elasticsearch +elasticsearch.port=9200 +elasticsearch.nodename=arjion2 +arjion.indexName=documentos +arjion.documentType=documento +arjion.uploadpath=/upload/ +arjion.tesseractpath=/usr/bin +arjion.tesseractdatapath=/usr/share/tesseract-ocr +spring.main.allow-bean-definition-overriding=true +spring.thymeleaf.enabled=true +spring.thymeleaf.prefix=classpath:/templates/ +spring.thymeleaf.suffix=.html +spring.thymeleaf.cache=false +spring.servlet.multipart.max-file-size=20MB +spring.servlet.multipart.max-request-size=100MB +spring.codec.max-in-memory-size=20MB \ No newline at end of file diff --git a/src/main/resources/elasticsearch/mapping.json b/src/main/resources/elasticsearch/mapping.json new file mode 100644 index 0000000..0ec927a --- /dev/null +++ b/src/main/resources/elasticsearch/mapping.json @@ -0,0 +1,30 @@ +{ + "properties": { + "documento": { + "properties": { + "@timestamp": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "@version": { + "type": "keyword" + }, + "nombre": { + "type": "text" + }, + "tamano": { + "type": "long" + }, + "metadata": { + "type": "object" + }, + "contenido": { + "type": "text" + }, + "lenguaje": { + "type": "keyword" + } + } + } + } +} \ No newline at end of file diff --git a/src/main/resources/elasticsearch/settings.json b/src/main/resources/elasticsearch/settings.json new file mode 100644 index 0000000..2536b8a --- /dev/null +++ b/src/main/resources/elasticsearch/settings.json @@ -0,0 +1,6 @@ +{ + "index": { + "number_of_shards": "1", + "number_of_replicas": "1" + } +} \ No newline at end of file diff --git a/src/main/resources/static/css/main.css b/src/main/resources/static/css/main.css new file mode 100644 index 0000000..99e9a97 --- /dev/null +++ b/src/main/resources/static/css/main.css @@ -0,0 +1,4 @@ +hr { + width: 100%; + text-align: center; +} \ No newline at end of file diff --git a/src/main/resources/static/js/main.js b/src/main/resources/static/js/main.js new file mode 100644 index 0000000..e69de29 diff --git a/src/main/resources/templates/detail.html b/src/main/resources/templates/detail.html new file mode 100644 index 0000000..dfb37dc --- /dev/null +++ b/src/main/resources/templates/detail.html @@ -0,0 +1,37 @@ + + +
+ +
+ +

Arjion2

+
+

[[${detailVO.archivo.nombre}]]

+
+
+
+
+
+ Tamaño [[${detailVO.archivo.tamano}]] + bytes - + Lenguaje [[${detailVO.archivo.lenguaje}]] +
Metadatos +
    +
  • :
  • +
+ Contenido +
[[${detailVO.archivo.contenido}]]
+ +
+
+
+
+
+ + + \ No newline at end of file diff --git a/src/main/resources/templates/error.html b/src/main/resources/templates/error.html new file mode 100644 index 0000000..8fae11c --- /dev/null +++ b/src/main/resources/templates/error.html @@ -0,0 +1,9 @@ + + +
+ +

Error

+
+ + + \ No newline at end of file diff --git a/src/main/resources/templates/exists.html b/src/main/resources/templates/exists.html new file mode 100644 index 0000000..a8518a8 --- /dev/null +++ b/src/main/resources/templates/exists.html @@ -0,0 +1,10 @@ + + +
+ +

Error

+

El archivo ya existe o hubo un error

+
+ + + \ No newline at end of file diff --git a/src/main/resources/templates/fragments/footer.html b/src/main/resources/templates/fragments/footer.html new file mode 100644 index 0000000..7ca92be --- /dev/null +++ b/src/main/resources/templates/fragments/footer.html @@ -0,0 +1,6 @@ + \ No newline at end of file diff --git a/src/main/resources/templates/fragments/header.html b/src/main/resources/templates/fragments/header.html new file mode 100644 index 0000000..b04d06e --- /dev/null +++ b/src/main/resources/templates/fragments/header.html @@ -0,0 +1,10 @@ + + +Arjion2 + + + + + + \ No newline at end of file diff --git a/src/main/resources/templates/index.html b/src/main/resources/templates/index.html new file mode 100644 index 0000000..1e48e2a --- /dev/null +++ b/src/main/resources/templates/index.html @@ -0,0 +1,65 @@ + + +
+ +
+ +

Arjion2

+
+

[[${documentoVO.count}]] archivos

+
+
+
+ +
+
+
+
+
+ Nombre [[${arc.nombre}]]
+ Tamaño [[${arc.tamano}]] bytes
Lenguaje [[${arc.lenguaje}]]
+ Metadatos
    +
  • :
  • +
Contenido +
[[${arc.contenido}]]
+
+
+
+
+
+
+
+ [[${doc.nombre}]] - download
[[${doc.tamano}]] + bytes - + [[${doc.lenguaje}]] +

[[${#strings.abbreviate(doc.metadata,200)}]] + [[${#strings.abbreviate(doc.contenido,200)}]] +
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/src/test/java/com/manalejandro/arjion2/Arjion2ApplicationTests.java b/src/test/java/com/manalejandro/arjion2/Arjion2ApplicationTests.java new file mode 100644 index 0000000..f6598c3 --- /dev/null +++ b/src/test/java/com/manalejandro/arjion2/Arjion2ApplicationTests.java @@ -0,0 +1,13 @@ +package com.manalejandro.arjion2; + +import org.junit.jupiter.api.Test; +import org.springframework.boot.test.context.SpringBootTest; + +@SpringBootTest +class Arjion2ApplicationTests { + + @Test + void contextLoads() { + } + +}