Skip to content
Snippets Groups Projects
Commit 2e3954c5 authored by dre0059's avatar dre0059
Browse files

Save notes & display information about document

parents
No related merge requests found
Showing
with 1191 additions and 0 deletions
/mvnw text eol=lf
*.cmd text eol=crlf
HELP.md
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
### STS ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
wrapperVersion=3.3.2
distributionType=only-script
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
# Articleprocessor GROBID version 1.0
- The main program runs at : *http://localhost:8080/upload* or *http://localhost:8080*
- H2 database can be found on : *http://localhost:8080/h2-console/login.jsp?jsessionid=9af0ea4b83284ff0a4574769b0336943*
- Password for the DBS can be found in : `resources/application.properties`
- GROBID server available on address *http://158.196.98.65:8080/* at university network
---------
---------
File added
This source diff could not be displayed because it is too large. You can view the blob instead.
mvnw 0 → 100644
#!/bin/sh
# ----------------------------------------------------------------------------
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# Apache Maven Wrapper startup batch script, version 3.3.2
#
# Optional ENV vars
# -----------------
# JAVA_HOME - location of a JDK home dir, required when download maven via java source
# MVNW_REPOURL - repo url base for downloading maven distribution
# MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
# MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output
# ----------------------------------------------------------------------------
set -euf
[ "${MVNW_VERBOSE-}" != debug ] || set -x
# OS specific support.
native_path() { printf %s\\n "$1"; }
case "$(uname)" in
CYGWIN* | MINGW*)
[ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")"
native_path() { cygpath --path --windows "$1"; }
;;
esac
# set JAVACMD and JAVACCMD
set_java_home() {
# For Cygwin and MinGW, ensure paths are in Unix format before anything is touched
if [ -n "${JAVA_HOME-}" ]; then
if [ -x "$JAVA_HOME/jre/sh/java" ]; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
JAVACCMD="$JAVA_HOME/jre/sh/javac"
else
JAVACMD="$JAVA_HOME/bin/java"
JAVACCMD="$JAVA_HOME/bin/javac"
if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then
echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2
echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2
return 1
fi
fi
else
JAVACMD="$(
'set' +e
'unset' -f command 2>/dev/null
'command' -v java
)" || :
JAVACCMD="$(
'set' +e
'unset' -f command 2>/dev/null
'command' -v javac
)" || :
if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then
echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2
return 1
fi
fi
}
# hash string like Java String::hashCode
hash_string() {
str="${1:-}" h=0
while [ -n "$str" ]; do
char="${str%"${str#?}"}"
h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296))
str="${str#?}"
done
printf %x\\n $h
}
verbose() { :; }
[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; }
die() {
printf %s\\n "$1" >&2
exit 1
}
trim() {
# MWRAPPER-139:
# Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds.
# Needed for removing poorly interpreted newline sequences when running in more
# exotic environments such as mingw bash on Windows.
printf "%s" "${1}" | tr -d '[:space:]'
}
# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties
while IFS="=" read -r key value; do
case "${key-}" in
distributionUrl) distributionUrl=$(trim "${value-}") ;;
distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;;
esac
done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties"
[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties"
case "${distributionUrl##*/}" in
maven-mvnd-*bin.*)
MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/
case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in
*AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;;
:Darwin*x86_64) distributionPlatform=darwin-amd64 ;;
:Darwin*arm64) distributionPlatform=darwin-aarch64 ;;
:Linux*x86_64*) distributionPlatform=linux-amd64 ;;
*)
echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2
distributionPlatform=linux-amd64
;;
esac
distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip"
;;
maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;;
*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;;
esac
# apply MVNW_REPOURL and calculate MAVEN_HOME
# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}"
distributionUrlName="${distributionUrl##*/}"
distributionUrlNameMain="${distributionUrlName%.*}"
distributionUrlNameMain="${distributionUrlNameMain%-bin}"
MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}"
MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")"
exec_maven() {
unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || :
exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD"
}
if [ -d "$MAVEN_HOME" ]; then
verbose "found existing MAVEN_HOME at $MAVEN_HOME"
exec_maven "$@"
fi
case "${distributionUrl-}" in
*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;;
*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;;
esac
# prepare tmp dir
if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then
clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; }
trap clean HUP INT TERM EXIT
else
die "cannot create temp dir"
fi
mkdir -p -- "${MAVEN_HOME%/*}"
# Download and Install Apache Maven
verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
verbose "Downloading from: $distributionUrl"
verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
# select .zip or .tar.gz
if ! command -v unzip >/dev/null; then
distributionUrl="${distributionUrl%.zip}.tar.gz"
distributionUrlName="${distributionUrl##*/}"
fi
# verbose opt
__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR=''
[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v
# normalize http auth
case "${MVNW_PASSWORD:+has-password}" in
'') MVNW_USERNAME='' MVNW_PASSWORD='' ;;
has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;;
esac
if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then
verbose "Found wget ... using wget"
wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl"
elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then
verbose "Found curl ... using curl"
curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl"
elif set_java_home; then
verbose "Falling back to use Java to download"
javaSource="$TMP_DOWNLOAD_DIR/Downloader.java"
targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName"
cat >"$javaSource" <<-END
public class Downloader extends java.net.Authenticator
{
protected java.net.PasswordAuthentication getPasswordAuthentication()
{
return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() );
}
public static void main( String[] args ) throws Exception
{
setDefault( new Downloader() );
java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() );
}
}
END
# For Cygwin/MinGW, switch paths to Windows format before running javac and java
verbose " - Compiling Downloader.java ..."
"$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java"
verbose " - Running Downloader.java ..."
"$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")"
fi
# If specified, validate the SHA-256 sum of the Maven distribution zip file
if [ -n "${distributionSha256Sum-}" ]; then
distributionSha256Result=false
if [ "$MVN_CMD" = mvnd.sh ]; then
echo "Checksum validation is not supported for maven-mvnd." >&2
echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
exit 1
elif command -v sha256sum >/dev/null; then
if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then
distributionSha256Result=true
fi
elif command -v shasum >/dev/null; then
if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then
distributionSha256Result=true
fi
else
echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2
echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
exit 1
fi
if [ $distributionSha256Result = false ]; then
echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2
echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2
exit 1
fi
fi
# unzip and move
if command -v unzip >/dev/null; then
unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip"
else
tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar"
fi
printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url"
mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME"
clean || :
exec_maven "$@"
mvnw.cmd 0 → 100644
<# : batch portion
@REM ----------------------------------------------------------------------------
@REM Licensed to the Apache Software Foundation (ASF) under one
@REM or more contributor license agreements. See the NOTICE file
@REM distributed with this work for additional information
@REM regarding copyright ownership. The ASF licenses this file
@REM to you under the Apache License, Version 2.0 (the
@REM "License"); you may not use this file except in compliance
@REM with the License. You may obtain a copy of the License at
@REM
@REM http://www.apache.org/licenses/LICENSE-2.0
@REM
@REM Unless required by applicable law or agreed to in writing,
@REM software distributed under the License is distributed on an
@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@REM KIND, either express or implied. See the License for the
@REM specific language governing permissions and limitations
@REM under the License.
@REM ----------------------------------------------------------------------------
@REM ----------------------------------------------------------------------------
@REM Apache Maven Wrapper startup batch script, version 3.3.2
@REM
@REM Optional ENV vars
@REM MVNW_REPOURL - repo url base for downloading maven distribution
@REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
@REM MVNW_VERBOSE - true: enable verbose log; others: silence the output
@REM ----------------------------------------------------------------------------
@IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0)
@SET __MVNW_CMD__=
@SET __MVNW_ERROR__=
@SET __MVNW_PSMODULEP_SAVE=%PSModulePath%
@SET PSModulePath=
@FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @(
IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B)
)
@SET PSModulePath=%__MVNW_PSMODULEP_SAVE%
@SET __MVNW_PSMODULEP_SAVE=
@SET __MVNW_ARG0_NAME__=
@SET MVNW_USERNAME=
@SET MVNW_PASSWORD=
@IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*)
@echo Cannot start maven from wrapper >&2 && exit /b 1
@GOTO :EOF
: end batch / begin powershell #>
$ErrorActionPreference = "Stop"
if ($env:MVNW_VERBOSE -eq "true") {
$VerbosePreference = "Continue"
}
# calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties
$distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl
if (!$distributionUrl) {
Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties"
}
switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) {
"maven-mvnd-*" {
$USE_MVND = $true
$distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip"
$MVN_CMD = "mvnd.cmd"
break
}
default {
$USE_MVND = $false
$MVN_CMD = $script -replace '^mvnw','mvn'
break
}
}
# apply MVNW_REPOURL and calculate MAVEN_HOME
# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
if ($env:MVNW_REPOURL) {
$MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" }
$distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')"
}
$distributionUrlName = $distributionUrl -replace '^.*/',''
$distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$',''
$MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain"
if ($env:MAVEN_USER_HOME) {
$MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain"
}
$MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join ''
$MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME"
if (Test-Path -Path "$MAVEN_HOME" -PathType Container) {
Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME"
Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
exit $?
}
if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) {
Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl"
}
# prepare tmp dir
$TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile
$TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir"
$TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null
trap {
if ($TMP_DOWNLOAD_DIR.Exists) {
try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
}
}
New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null
# Download and Install Apache Maven
Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
Write-Verbose "Downloading from: $distributionUrl"
Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
$webclient = New-Object System.Net.WebClient
if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) {
$webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD)
}
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
$webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null
# If specified, validate the SHA-256 sum of the Maven distribution zip file
$distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum
if ($distributionSha256Sum) {
if ($USE_MVND) {
Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties."
}
Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash
if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) {
Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property."
}
}
# unzip and move
Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null
Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null
try {
Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null
} catch {
if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) {
Write-Error "fail to move MAVEN_HOME"
}
} finally {
try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
}
Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
pom.xml 0 → 100644
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.4.0</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.dre0059</groupId>
<artifactId>articleProcessor</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>articleProcessor</name>
<description>articleProcessor</description>
<url/>
<licenses>
<license/>
</licenses>
<developers>
<developer/>
</developers>
<scm>
<connection/>
<developerConnection/>
<tag/>
<url/>
</scm>
<properties>
<java.version>17</java.version>
<lombok.version>1.18.36</lombok.version>
</properties>
<!-- -->
<repositories>
<repository>
<id>grobid</id>
<name>GROBID DIY repo</name>
<url>https://grobid.s3.eu-west-1.amazonaws.com/repo/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-webflux</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- APACHE TIKA -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>2.9.2</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-pdf-module</artifactId>
<version>2.9.2</version>
</dependency>
<!-- PDF BOX -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.27</version> <!-- Môžete skontrolovať najnovšiu verziu -->
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.23.1</version> <!-- zadejte konkrétní verzi, např. 2.19.0 -->
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.23.1</version>
</dependency>
<!-- OCR TESSERACT -->
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>4.3.0</version>
</dependency>
<!-- OCR - Optické rozpoznávanie znakov -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers-standard-package</artifactId>
<version>2.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-ocr-module</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<!-- Apache Commons IO (pre prácu so súbormi) -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct</artifactId>
<version>1.6.3</version>
</dependency>
<dependency>
<groupId>org.webjars</groupId>
<artifactId>bootstrap</artifactId>
<version>5.2.3</version> <!-- Alebo najnovšiu verziu -->
</dependency>
<!-- https://mvnrepository.com/artifact/org.webjars.npm/bootstrap-icons -->
<dependency>
<groupId>org.webjars.npm</groupId>
<artifactId>bootstrap-icons</artifactId>
<version>1.10.3</version>
</dependency>
<dependency>
<groupId>org.webjars</groupId>
<artifactId>jquery</artifactId>
<version>3.6.0</version> <!-- Alebo najnovšiu verziu -->
</dependency>
<dependency>
<groupId>org.webjars</groupId>
<artifactId>bootstrap</artifactId>
<version>5.1.3</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<failOnError>false</failOnError>
<annotationProcessorPaths>
<path>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct-processor</artifactId>
<version>1.6.3</version>
</path>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
</path>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok-mapstruct-binding</artifactId>
<version>0.2.0</version>
</path>
</annotationProcessorPaths>
</configuration>
</plugin>
</plugins>
</build>
</project>
services:
grobid:
image: grobid/grobid:0.8.1
ports:
- "8070:8070"
db:
image: mysql
restart: always
environment:
MYSQL_USER: admin
MYSQL_ROOT_PASSWORD: admin
MYSQL_DATABASE: article-processor
volumes:
- ~/volumes/tmp/mysql-data:/var/lib/mysql
ports:
- "3306:3306"
docker run --rm --gpus all --init --ulimit core=0 -p 8070:8070 grobid/grobid:0.8.1
\ No newline at end of file
package com.dre0059.articleprocessor;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class ArticleProcessorApplication {
public static void main(String[] args) {
SpringApplication.run(ArticleProcessorApplication.class, args);
}
}
package com.dre0059.articleprocessor;
import com.dre0059.articleprocessor.config.GrobidProperties;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.BodyInserters;
import org.springframework.web.reactive.function.client.WebClient;
import org.springframework.core.io.FileSystemResource;
import reactor.core.publisher.Mono;
import java.io.File;
@Service
public class GrobidClient {
private final WebClient webClient;
public GrobidClient(GrobidProperties grobidProperties) {
this.webClient = WebClient.builder()
.baseUrl(grobidProperties.getHost()) // URL kde beží GROBID server
.build();
}
// get METADATA of the file
public String processHeader(File pdfFile){ // Mono - vráti jeden string, výsledok je JSON
return webClient.post()
.uri("/api/processHeaderDocument")
.contentType(MediaType.MULTIPART_FORM_DATA)
.body(BodyInserters.fromMultipartData("input", new FileSystemResource(pdfFile)))
.attribute("consolidateHeader", 1) // Možnosť na zjednotenie hlavičky
.attribute("includeRawAffiliations", 1) // Prípadne pridať ďalšie parametre, ak Grobid podporuje takéto rozšírenie
.attribute("includeRawCopyrights", 1) // Prípadne pridať ďalšie parametre, ak Grobid podporuje takéto rozšírenie
//.attribute("includeReferences", 1) // Možnosť pridať aj referencie priamo do hlavičky
.retrieve()
.bodyToMono(String.class)
.block(); // returns String instead of Mono<String>
}
public String processFullMetadata(File pdfFile) {
return webClient.post()
.uri("/api/processFullMetadata")
.contentType(MediaType.MULTIPART_FORM_DATA)
.body(BodyInserters.fromMultipartData("input", new FileSystemResource(pdfFile)))
.retrieve()
.bodyToMono(String.class)
.block();
}
// spracuje REFERENCIE z PDF
public String processReferences(File pdfFile){
return webClient.post()
.uri("/api/processReferences")
.contentType(MediaType.MULTIPART_FORM_DATA)
.body(BodyInserters.fromMultipartData("input", new FileSystemResource(pdfFile)))
.retrieve()
.bodyToMono(String.class)
.block();
}
}
\ No newline at end of file
package com.dre0059.articleprocessor.config;
import com.dre0059.articleprocessor.model.Category;
import com.dre0059.articleprocessor.repository.CategoryRepository;
import org.springframework.boot.CommandLineRunner;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.List;
@Configuration
public class DataInitializer {
// automaticky sa to vykoná, nie je potrebné to znovu volať
@Bean
public CommandLineRunner init(CategoryRepository categoryRepository) {
return args -> {
if(categoryRepository.count() == 0) {
List<Category> categories = List.of(
new Category("1.4", "Chemical sciences"),
new Category("1.5", "Earth and related environmental sciences"),
new Category("1.6", "Biological sciences"),
new Category("1.7", "Other natural sciences"),
new Category("2.1", "Civil engineering"),
new Category("2.2", "Electrical engineering, electronic engineering, information engineering"),
new Category("2.3", "Mechanical engineering"),
new Category("2.4", "Chemical engineering"),
new Category("2.5", "Materials engineering"),
new Category("2.6", "Medical engineering"),
new Category("2.7", "Environmental engineering"),
new Category("2.8", "Environmental biotechnology"),
new Category("2.9", "Industrial biotechnology"),
new Category("2.10", "Nano-technology"),
new Category("2.11", "Other engineering and technologies"),
new Category("3.2", "Clinical medicine"),
new Category("3.3", "Health sciences"),
new Category("4.1", "Agriculture, forestry, and fisheries"),
new Category("4.2", "Animal and dairy science"),
new Category("4.3", "Veterinary science"),
new Category("4.5", "Other agricultural sciences"),
new Category("5.1", "Psychology and cognitive sciences"),
new Category("5.2", "Economics and business"),
new Category("5.3", "Education"),
new Category("5.4", "Sociology"),
new Category("5.5", "Law"),
new Category("5.6", "Political science"),
new Category("5.7", "Social and economic geography"),
new Category("5.8", "Media and communication"),
new Category("5.9", "Other social sciences"),
new Category("6.1", "History and archaeology"),
new Category("6.2", "Languages and literature"),
new Category("6.4", "Arts"),
new Category("6.5", "Other Humanities and the Arts")
);
categoryRepository.saveAll(categories);
}
};
}
}
package com.dre0059.articleprocessor.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Configuration
@ConfigurationProperties(prefix = "grobid")
public class GrobidProperties {
private String host;
public void setHost(String host) {
this.host = host;
}
public String getHost() {
return host;
}
}
package com.dre0059.articleprocessor.controller;
import com.dre0059.articleprocessor.dto.DocumentDto;
import com.dre0059.articleprocessor.dto.SimpleDocumentDto;
import com.dre0059.articleprocessor.model.Dokument;
import com.dre0059.articleprocessor.repository.DocumentRepository;
import com.dre0059.articleprocessor.service.DocumentService;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.*;
@Controller
@RequestMapping
public class DocumentController {
private final DocumentService documentService;
@Autowired
private DocumentRepository dokumentRepository;
public DocumentController(DocumentService documentService) {
this.documentService = documentService;
}
@GetMapping("/api/documents/{id}")
public ResponseEntity<DocumentDto> getDocumentById(@PathVariable Long id) {
return ResponseEntity.ok(documentService.getDocumentById(id));
}
@GetMapping(
value = "/api/documents/{id}/content",
produces = MediaType.APPLICATION_PDF_VALUE
)
public @ResponseBody byte[] getDocumentContentById(@PathVariable Long id) {
return documentService.getDocumentContentById(id).getContent();
}
@GetMapping("/api/documents/{id}/references")
public ResponseEntity<List<SimpleDocumentDto>> getReferencesFromDocument(@PathVariable Long id) {
return ResponseEntity.ok(documentService.getReferencedDocumentsById(id));
}
@GetMapping("/view/{id}")
public String viewDocument(Model model, @PathVariable("id") Long id) {
var references = documentService.getReferencedDocumentsById(id);
Dokument document = dokumentRepository.findById(id).get();
if (documentService.getDocumentById(id) == null) {
throw new IllegalArgumentException("Document with ID " + id + " not found.");
}
if(!document.getStatus().equals("Referenced"))
model.addAttribute("category", document.getCategory().getName());
model.addAttribute("author", document.authorsToString());
model.addAttribute("year", document.getYear());
model.addAttribute("doi", document.getDoi());
model.addAttribute("link", document.getTarget());
model.addAttribute("tags", document.tagsToString());
model.addAttribute("notes", document.getNotes());
model.addAttribute("documentId", id);
model.addAttribute("references", references);
model.addAttribute("docTitle", documentService.getDocumentById(id).getTitle());
return "view-pdf";
}
@GetMapping("/view")
public String viewAllDocuments(Model model) {
var documents = documentService.getAllDocuments();
model.addAttribute("documents", documents);
return "view-all";
}
@PostMapping("/delete/{id}")
public ResponseEntity<String> deleteDocument(@PathVariable Long id) {
documentService.deleteDocument(id);
return ResponseEntity.ok("References deleted successfully.");
}
@PostMapping("/api/documents/{documentId}/setNotes")
@ResponseBody
public ResponseEntity<Map<String, Object>> setNotes(@PathVariable Long documentId, @RequestBody Map<String, String> payload) {
Dokument document = dokumentRepository.findById(documentId)
.orElseThrow(() -> new IllegalArgumentException("Document not found"));
String notes = payload.get("notes");
document.setNotes(notes);
dokumentRepository.save(document);
Map<String, Object> response = new HashMap<>();
response.put("success", true);
return ResponseEntity.ok(response);
}
}
package com.dre0059.articleprocessor.controller;
import com.dre0059.articleprocessor.GrobidClient;
import com.dre0059.articleprocessor.model.Dokument;
import com.dre0059.articleprocessor.model.Tag;
import com.dre0059.articleprocessor.repository.TagRepository;
import com.dre0059.articleprocessor.service.CategoryService;
import com.dre0059.articleprocessor.service.HeaderService;
import com.dre0059.articleprocessor.service.ReferenceService;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.multipart.MultipartFile;
@Controller
@RequestMapping
public class FileUploadController {
private final GrobidClient grobidClient;
private final HeaderService headerService;
private final ReferenceService referenceService;
private final CategoryService categoryService;
public FileUploadController(GrobidClient grobidClient, HeaderService headerService, ReferenceService referenceService, CategoryService categoryService) {
this.grobidClient = grobidClient;
this.headerService = headerService;
this.referenceService = referenceService;
this.categoryService = categoryService;
}
@GetMapping(value = {"/upload","/"})
public String showUploadForm(Model model) {
var categories = categoryService.getAll();
model.addAttribute("categories", categories);
return "upload"; // vracia upload.html
}
@PostMapping("/api/upload")
@ResponseBody
public ResponseEntity<?> handleFileUpload(
@RequestParam("file") MultipartFile file,
@RequestParam("categoryId") String categoryId,
@RequestParam("tags") List<String> tags) {
if (file.isEmpty()) {
return ResponseEntity.badRequest().body("No file uploaded!");
}
try {
System.out.println("Processing file " + file.getOriginalFilename());
File tmpFile = File.createTempFile("article-", ".pdf");
// save data from file to tmpFile
try(FileOutputStream stream = new FileOutputStream(tmpFile)) {
stream.write(file.getBytes());
} catch (IOException e) {
return ResponseEntity.internalServerError().body("FAILURE - cannot process file : " + e.getMessage());
}
//System.out.println("File written to temporary location. ");
String header = grobidClient.processHeader(tmpFile);
//System.out.println("GROBID Header processed: " + header);
String references = grobidClient.processReferences(tmpFile);
//System.out.println("GROBID Reference processed: " + references);
Dokument savedDocument = headerService.processHeader(header, categoryId, tags, tmpFile);
//System.out.println("Header saved to database.");
referenceService.extractReferences(references);
//System.out.println("References extracted..");
tmpFile.delete();
Map<String, Object> response = new HashMap<>();
response.put("id", savedDocument.getId());
response.put("message", "Upload successful");
return ResponseEntity.ok(response);
} catch (IOException e) {
return ResponseEntity.status(500).body("Chyba pri vytváraní dočasného súboru.");
}
}
}
package com.dre0059.articleprocessor.controller;
import com.dre0059.articleprocessor.model.Dokument;
import com.dre0059.articleprocessor.repository.DocumentRepository;
import com.dre0059.articleprocessor.repository.ReferenceRepository;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
import java.util.*;
import java.util.stream.Collectors;
@Controller
public class StatisticsController {
private final DocumentRepository documentRepository;
private final ReferenceRepository referenceRepository;
public StatisticsController(DocumentRepository documentRepository,
ReferenceRepository referenceRepository) {
this.documentRepository = documentRepository;
this.referenceRepository = referenceRepository;
}
@GetMapping("/statistics")
public String statistics(
@RequestParam(value = "category", required = false, defaultValue = "") String category,
Model model
) {
// 1) Všetky dokumenty
List<Dokument> documents = documentRepository.findAll();
// 2) Status count
Map<String, Long> statusCount = documents.stream()
.collect(Collectors.groupingBy(Dokument::getStatus, Collectors.counting()));
// 3) Category count (len tie, ktoré majú kategóriu)
Map<String, Long> categoryCount = documents.stream()
.filter(d -> d.getCategory() != null)
.collect(Collectors.groupingBy(
d -> d.getCategory().getName(),
Collectors.counting()
));
// 4) Zoznam všetkých kategórií (poradie podľa vstupu do mapy)
List<String> categories = new ArrayList<>(categoryCount.keySet());
// 5) Vyberieme "selectedCategory":
// - ak prišlo z parametra, použijeme ho
// - inak vezmeme prvú kategóriu zo zoznamu (ak existuje)
String selectedCategory = category.trim().isEmpty()
? (categories.isEmpty() ? "" : categories.get(0))
: category.trim();
// 6) Spočítame referencie podľa rokov pre vybranú kategóriu
Map<Integer, Long> referenceCounts = new LinkedHashMap<>();
if (!selectedCategory.isEmpty()) {
List<Object[]> raw = referenceRepository
.countReferencesByYearForCategory(selectedCategory);
// raw: [ [year, count], [year, count], ... ] už zoradené podľa roku v JPQL
raw.forEach(record -> {
Integer year = (Integer) record[0];
Long count = (Long) record[1];
referenceCounts.put(year, count);
});
}
// 7) Pridáme všetko do modelu
model.addAttribute("statusCount", statusCount);
model.addAttribute("categoryCount", categoryCount);
model.addAttribute("referenceCounts", referenceCounts);
model.addAttribute("selectedCategory", selectedCategory);
return "statistics";
}
}
package com.dre0059.articleprocessor.controller;
import com.dre0059.articleprocessor.repository.TagRepository;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@RestController
@RequestMapping("/api")
public class TagController {
//@Autowired
private final TagRepository tagRepository;
public TagController(TagRepository tagRepository) {
this.tagRepository = tagRepository;
}
@GetMapping("/tags")
public List<Map<String, String>> getTags(@RequestParam("term") String term) {
return tagRepository.findByTitleContainingIgnoreCase(term).stream()
.map(tag -> Map.of("id", tag.getTitle(), "text", tag.getTitle())) // použijeme title ako ID pre select2
.collect(Collectors.toList());
}
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment