Skip to content
Snippets Groups Projects
Commit ebe1559a authored by dre0059's avatar dre0059
Browse files

The very first commit

parent c0eb02ef
No related merge requests found
Pipeline #2469 failed with stages
Showing
with 1222 additions and 0 deletions
/mvnw text eol=lf
*.cmd text eol=crlf
HELP.md
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
### STS ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
wrapperVersion=3.3.2
distributionType=only-script
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
"hahahahahahaha"
\ No newline at end of file
"heeeeeeeeeeeeeeeeeeej"
\ No newline at end of file
mvnw 0 → 100644
#!/bin/sh
# ----------------------------------------------------------------------------
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# Apache Maven Wrapper startup batch script, version 3.3.2
#
# Optional ENV vars
# -----------------
# JAVA_HOME - location of a JDK home dir, required when download maven via java source
# MVNW_REPOURL - repo url base for downloading maven distribution
# MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
# MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output
# ----------------------------------------------------------------------------
set -euf
[ "${MVNW_VERBOSE-}" != debug ] || set -x
# OS specific support.
native_path() { printf %s\\n "$1"; }
case "$(uname)" in
CYGWIN* | MINGW*)
[ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")"
native_path() { cygpath --path --windows "$1"; }
;;
esac
# set JAVACMD and JAVACCMD
set_java_home() {
# For Cygwin and MinGW, ensure paths are in Unix format before anything is touched
if [ -n "${JAVA_HOME-}" ]; then
if [ -x "$JAVA_HOME/jre/sh/java" ]; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
JAVACCMD="$JAVA_HOME/jre/sh/javac"
else
JAVACMD="$JAVA_HOME/bin/java"
JAVACCMD="$JAVA_HOME/bin/javac"
if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then
echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2
echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2
return 1
fi
fi
else
JAVACMD="$(
'set' +e
'unset' -f command 2>/dev/null
'command' -v java
)" || :
JAVACCMD="$(
'set' +e
'unset' -f command 2>/dev/null
'command' -v javac
)" || :
if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then
echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2
return 1
fi
fi
}
# hash string like Java String::hashCode
hash_string() {
str="${1:-}" h=0
while [ -n "$str" ]; do
char="${str%"${str#?}"}"
h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296))
str="${str#?}"
done
printf %x\\n $h
}
verbose() { :; }
[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; }
die() {
printf %s\\n "$1" >&2
exit 1
}
trim() {
# MWRAPPER-139:
# Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds.
# Needed for removing poorly interpreted newline sequences when running in more
# exotic environments such as mingw bash on Windows.
printf "%s" "${1}" | tr -d '[:space:]'
}
# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties
while IFS="=" read -r key value; do
case "${key-}" in
distributionUrl) distributionUrl=$(trim "${value-}") ;;
distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;;
esac
done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties"
[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties"
case "${distributionUrl##*/}" in
maven-mvnd-*bin.*)
MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/
case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in
*AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;;
:Darwin*x86_64) distributionPlatform=darwin-amd64 ;;
:Darwin*arm64) distributionPlatform=darwin-aarch64 ;;
:Linux*x86_64*) distributionPlatform=linux-amd64 ;;
*)
echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2
distributionPlatform=linux-amd64
;;
esac
distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip"
;;
maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;;
*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;;
esac
# apply MVNW_REPOURL and calculate MAVEN_HOME
# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}"
distributionUrlName="${distributionUrl##*/}"
distributionUrlNameMain="${distributionUrlName%.*}"
distributionUrlNameMain="${distributionUrlNameMain%-bin}"
MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}"
MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")"
exec_maven() {
unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || :
exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD"
}
if [ -d "$MAVEN_HOME" ]; then
verbose "found existing MAVEN_HOME at $MAVEN_HOME"
exec_maven "$@"
fi
case "${distributionUrl-}" in
*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;;
*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;;
esac
# prepare tmp dir
if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then
clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; }
trap clean HUP INT TERM EXIT
else
die "cannot create temp dir"
fi
mkdir -p -- "${MAVEN_HOME%/*}"
# Download and Install Apache Maven
verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
verbose "Downloading from: $distributionUrl"
verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
# select .zip or .tar.gz
if ! command -v unzip >/dev/null; then
distributionUrl="${distributionUrl%.zip}.tar.gz"
distributionUrlName="${distributionUrl##*/}"
fi
# verbose opt
__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR=''
[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v
# normalize http auth
case "${MVNW_PASSWORD:+has-password}" in
'') MVNW_USERNAME='' MVNW_PASSWORD='' ;;
has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;;
esac
if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then
verbose "Found wget ... using wget"
wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl"
elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then
verbose "Found curl ... using curl"
curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl"
elif set_java_home; then
verbose "Falling back to use Java to download"
javaSource="$TMP_DOWNLOAD_DIR/Downloader.java"
targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName"
cat >"$javaSource" <<-END
public class Downloader extends java.net.Authenticator
{
protected java.net.PasswordAuthentication getPasswordAuthentication()
{
return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() );
}
public static void main( String[] args ) throws Exception
{
setDefault( new Downloader() );
java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() );
}
}
END
# For Cygwin/MinGW, switch paths to Windows format before running javac and java
verbose " - Compiling Downloader.java ..."
"$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java"
verbose " - Running Downloader.java ..."
"$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")"
fi
# If specified, validate the SHA-256 sum of the Maven distribution zip file
if [ -n "${distributionSha256Sum-}" ]; then
distributionSha256Result=false
if [ "$MVN_CMD" = mvnd.sh ]; then
echo "Checksum validation is not supported for maven-mvnd." >&2
echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
exit 1
elif command -v sha256sum >/dev/null; then
if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then
distributionSha256Result=true
fi
elif command -v shasum >/dev/null; then
if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then
distributionSha256Result=true
fi
else
echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2
echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
exit 1
fi
if [ $distributionSha256Result = false ]; then
echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2
echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2
exit 1
fi
fi
# unzip and move
if command -v unzip >/dev/null; then
unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip"
else
tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar"
fi
printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url"
mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME"
clean || :
exec_maven "$@"
mvnw.cmd 0 → 100644
<# : batch portion
@REM ----------------------------------------------------------------------------
@REM Licensed to the Apache Software Foundation (ASF) under one
@REM or more contributor license agreements. See the NOTICE file
@REM distributed with this work for additional information
@REM regarding copyright ownership. The ASF licenses this file
@REM to you under the Apache License, Version 2.0 (the
@REM "License"); you may not use this file except in compliance
@REM with the License. You may obtain a copy of the License at
@REM
@REM http://www.apache.org/licenses/LICENSE-2.0
@REM
@REM Unless required by applicable law or agreed to in writing,
@REM software distributed under the License is distributed on an
@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@REM KIND, either express or implied. See the License for the
@REM specific language governing permissions and limitations
@REM under the License.
@REM ----------------------------------------------------------------------------
@REM ----------------------------------------------------------------------------
@REM Apache Maven Wrapper startup batch script, version 3.3.2
@REM
@REM Optional ENV vars
@REM MVNW_REPOURL - repo url base for downloading maven distribution
@REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
@REM MVNW_VERBOSE - true: enable verbose log; others: silence the output
@REM ----------------------------------------------------------------------------
@IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0)
@SET __MVNW_CMD__=
@SET __MVNW_ERROR__=
@SET __MVNW_PSMODULEP_SAVE=%PSModulePath%
@SET PSModulePath=
@FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @(
IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B)
)
@SET PSModulePath=%__MVNW_PSMODULEP_SAVE%
@SET __MVNW_PSMODULEP_SAVE=
@SET __MVNW_ARG0_NAME__=
@SET MVNW_USERNAME=
@SET MVNW_PASSWORD=
@IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*)
@echo Cannot start maven from wrapper >&2 && exit /b 1
@GOTO :EOF
: end batch / begin powershell #>
$ErrorActionPreference = "Stop"
if ($env:MVNW_VERBOSE -eq "true") {
$VerbosePreference = "Continue"
}
# calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties
$distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl
if (!$distributionUrl) {
Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties"
}
switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) {
"maven-mvnd-*" {
$USE_MVND = $true
$distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip"
$MVN_CMD = "mvnd.cmd"
break
}
default {
$USE_MVND = $false
$MVN_CMD = $script -replace '^mvnw','mvn'
break
}
}
# apply MVNW_REPOURL and calculate MAVEN_HOME
# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
if ($env:MVNW_REPOURL) {
$MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" }
$distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')"
}
$distributionUrlName = $distributionUrl -replace '^.*/',''
$distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$',''
$MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain"
if ($env:MAVEN_USER_HOME) {
$MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain"
}
$MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join ''
$MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME"
if (Test-Path -Path "$MAVEN_HOME" -PathType Container) {
Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME"
Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
exit $?
}
if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) {
Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl"
}
# prepare tmp dir
$TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile
$TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir"
$TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null
trap {
if ($TMP_DOWNLOAD_DIR.Exists) {
try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
}
}
New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null
# Download and Install Apache Maven
Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
Write-Verbose "Downloading from: $distributionUrl"
Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
$webclient = New-Object System.Net.WebClient
if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) {
$webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD)
}
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
$webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null
# If specified, validate the SHA-256 sum of the Maven distribution zip file
$distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum
if ($distributionSha256Sum) {
if ($USE_MVND) {
Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties."
}
Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash
if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) {
Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property."
}
}
# unzip and move
Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null
Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null
try {
Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null
} catch {
if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) {
Write-Error "fail to move MAVEN_HOME"
}
} finally {
try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
}
Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
pom.xml 0 → 100644
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.4.0</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.dre0059</groupId>
<artifactId>articleProcessor</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>articleProcessor</name>
<description>articleProcessor</description>
<url/>
<licenses>
<license/>
</licenses>
<developers>
<developer/>
</developers>
<scm>
<connection/>
<developerConnection/>
<tag/>
<url/>
</scm>
<properties>
<java.version>17</java.version>
</properties>
<!-- -->
<repositories>
<repository>
<id>grobid</id>
<name>GROBID DIY repo</name>
<url>https://grobid.s3.eu-west-1.amazonaws.com/repo/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!--<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.43.2.1</version>
</dependency>
<dependency>
<groupId>org.hibernate.orm</groupId>
<artifactId>hibernate-community-dialects</artifactId>
<version>6.2.12.Final</version>
</dependency>
-->
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-webflux</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- APACHE TIKA -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>2.9.2</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-pdf-module</artifactId>
<version>2.9.2</version>
</dependency>
<!-- PDF BOX -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.27</version> <!-- Môžete skontrolovať najnovšiu verziu -->
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.23.1</version> <!-- zadejte konkrétní verzi, např. 2.19.0 -->
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.23.1</version>
</dependency>
<!-- OCR TESSERACT -->
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>4.3.0</version>
</dependency>
<!-- OCR - Optické rozpoznávanie znakov -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers-standard-package</artifactId>
<version>2.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-ocr-module</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<!-- GROBID Dependencies - stará verzia
<dependency>
<groupId>org.grobid</groupId>
<artifactId>grobid-core</artifactId>
<version>0.8.1</version>
</dependency>
-->
<!-- Apache Commons IO (pre prácu so súbormi) -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
package com.dre0059.articleprocessor;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class ArticleProcessorApplication {
public static void main(String[] args) {
SpringApplication.run(ArticleProcessorApplication.class, args);
}
}
package com.dre0059.articleprocessor;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.BodyInserters;
import org.springframework.web.reactive.function.client.WebClient;
import org.springframework.core.io.FileSystemResource;
import reactor.core.publisher.Mono;
import java.io.File;
@Service
public class GrobidClient {
private final WebClient webClient;
public GrobidClient() {
this.webClient = WebClient.builder()
.baseUrl("http://158.196.98.65:8080") // URL kde beží GROBID server
.build();
}
// get METADATA of the file
public Mono<String> processHeader(File pdfFile){ // Mono - vráti jeden string, výsledok je JSON
return webClient.post()
.uri("/api/processHeaderDocument")
.contentType(MediaType.MULTIPART_FORM_DATA)
.body(BodyInserters.fromMultipartData("input", new FileSystemResource(pdfFile)))
.attribute("consolidateHeader", 1)
.retrieve()
.bodyToMono(String.class);
}
// spracuje REFERENCIE z PDF
public Mono<String> processReferences(File pdfFile){
return webClient.post()
.uri("/api/processReferences")
.contentType(MediaType.MULTIPART_FORM_DATA)
.body(BodyInserters.fromMultipartData("input", new FileSystemResource(pdfFile)))
.retrieve()
.bodyToMono(String.class);
}
}
\ No newline at end of file
package com.dre0059.articleprocessor.apacheTika;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.springframework.stereotype.Component;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import static com.lowagie.text.html.HtmlTagMap.isTitle;
@Component
public class PDFbox {
private int countTXT = 0;
public int getCountTXT(){ return this.countTXT; }
public void toTXT(String inputPDFpath, String outputTXTpath){
File pdfFile = new File(inputPDFpath);
try(PDDocument document = PDDocument.load(pdfFile)){
PDFTextStripper pdfStripper = new PDFTextStripper();
String extractedText = pdfStripper.getText(document);
//String[] lines = extractedText.split("\n");
//StringBuilder contentWithHeaders = new StringBuilder();
try (FileWriter writer = new FileWriter(outputTXTpath)) {
writer.write(extractedText);
System.out.println("File was sucessfully saved to : " + outputTXTpath);
countTXT++;
} catch (IOException e) {
System.err.println("FAILURE - file was not saved : " + e.getMessage());
}
} catch (IOException ex) {
System.err.println("FAILURE - Problem kin reading file : " + ex.getMessage());
}
}
}
package com.dre0059.articleprocessor.apacheTika;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import javax.sound.midi.Soundbank;
import java.io.*;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ReferencesScanner {
// private Vector<String> references;
// notFound is just for TESTING
private int notFound;
private int notParsed;
public int getNotFound(){
return this.notFound;
}
public int getNotParsed(){ return this.notParsed; }
public boolean findReferences(String filepath, String outputPath) {
Tika tika = new Tika();
String content;
try {
content = tika.parseToString(new File(filepath));
String[] lines = content.split("\\r?\\n"); // split text to lines
boolean found = false;
StringBuilder referencesData = new StringBuilder();
// Regular expression for "References" and variations
String regex = "\\b[Rr][Ee][Ff][Ee][Rr][Ee][Nn][Cc][Ee][Ss]?\\b|" +
"\\b[Nn][Oo][Tt][Ee][Ss]?\\s+[Aa][Nn][Dd]\\s+[Rr][Ee][Ff][Ee][Rr][Ee][Nn][Cc][Ee][Ss]?\\b";
// the most suitable for finding the references
//String regex = "(?i)(\\d*\\s*References|References|R\\s*E\\s*F\\s*E\\s*R\\s*E\\s*N\\s*C\\s*E\\s*S|Notes\\s+and\\s+References|RE[FE]*R[EE]*N[C]*E[S]*|^\\s*REFERENCES\\s*$)";
Pattern pattern = Pattern.compile(regex);
for (String line : lines) {
if (found) {
// TODO : FURTHER READING is in the end of the line and in the beggining of the other line (this below is not working)
if (line.contains("APPENDIX") || line.contains("Appendix") || line.contains("FURTHER\nREADING"))
break;
// Append lines of references
referencesData.append(line).append(System.lineSeparator());
} else {
// Check if line contains references
Matcher matcher = pattern.matcher(line);
if (matcher.find()) {
found = true; // Reference found
referencesData.append(line).append(System.lineSeparator());
}
}
}
// Save references to TXT file
if (found) {
try (FileWriter writer = new FileWriter(outputPath)) {
writer.write(referencesData.toString());
System.out.println("References found and saved to " + outputPath);
} catch (IOException e) {
e.printStackTrace();
}
return true; // Return true indicating references were found
} else {
System.out.println("References NOT found in file: " + filepath);
notFound++;
return false; // Return false indicating no references were found
}
} catch (IOException | TikaException e) {
e.printStackTrace();
return false; // Return false if an exception occurs
}
}
// parse references to vector
public Vector<String> parseReferences(String inputReferencesPath, String outputPath) throws IOException {
Vector<String> parsedReferences = new Vector<>();
FileInputStream stream = null;
try{
stream = new FileInputStream(inputReferencesPath);
} catch (FileNotFoundException e) {
throw new RuntimeException("File not found " + inputReferencesPath, e);
}
Scanner scanner = new Scanner(stream);
String line = null;
int index = 1; // number of current reference
boolean found = false; // used for lines behind found reference
while (scanner.hasNextLine()){
// first reference (from second until the last, it will be true - because we dont wanna go to another line, we wanna stay on line with next [ i ] )
if(!found)
line = scanner.nextLine();
// regex for patterns : 1 1. (1) [1] ... s* - spaces
//String regex = "(\\(\\s*\\b" + index + "\\b\\s*\\))|(\\[\\s*\\b " + index +"\\b\\s*\\])|(\\b" + index + "\\b\\s*\\.)|(\\b" + index + "\\b)";
String regex = "(\\(\\s*" + index + "\\s*\\))" // Formát (index)
+ "|(\\[\\s*" + index + "\\s*\\])" // Formát [index]
+ "|(\\b" + index + "\\b\\s*\\.)" // Formát index.
+ "|(\\b" + index + "\\b)"; // Samotné číslo index
Pattern pattern = Pattern.compile(regex); // regular expression
Matcher matcher = pattern.matcher(line); // matcher for comparing regular exrpession
// [ i ] found, add reference
if(matcher.find()){
StringBuilder currReference = new StringBuilder();
currReference.append(line); // append line which contains [ 1 ]
index++; // regex searches for 2 instead of 1
//regex = "(\\(\\s*\\b" + index + "\\b\\s*\\))|(\\[\\s*\\b " + index +"\\b\\s*\\])|(\\b" + index + "\\b\\s*\\.)|(\\b" + index + "\\b\\s+)"; // [ 2 ]
regex = "(\\(\\s*" + index + "\\s*\\))" // Formát (index)
+ "|(\\[\\s*" + index + "\\s*\\])" // Formát [index]
+ "|(\\b" + index + "\\b\\s*\\.)" // Formát index.
+ "|(\\b" + index + "\\b\\s+)"; // Samotné číslo index
pattern = Pattern.compile(regex);
//line = scanner.nextLine();
//matcher = pattern.matcher(line); // looking for [ 2 ] on the next line
while(scanner.hasNextLine()){ // all lines without regex (these lines belong to first reference)
line = scanner.nextLine();
matcher = pattern.matcher(line); // looking for [ 2 ] on the next line
if(matcher.find()){ // [ 2 ] was found
found = true;
break;
}
currReference.append(" ").append(line); // [ 2 ] was not found, lines belong to the first reference
}
//System.out.println("Match was found\n");
parsedReferences.add(currReference.toString()); // add the whole reference [ 1 ] to vector
}
}
scanner.close();
int i = 0;
try(FileWriter writer = new FileWriter(outputPath)) {
for (String ref : parsedReferences) {
i++;
writer.write(i + ". " + ref + "\n");
//System.out.println(i + " " + ref);
}
}catch (IOException e){
e.printStackTrace();
}
if(!(parsedReferences.size() > 0)){
notParsed++;
System.out.println("References NOT parsed in file: " + inputReferencesPath);
}
return parsedReferences;
}
// pslit reference and get NAME and year out of it
public void splitReferences(Vector<String> oneDocumentReferences){
for(String ref : oneDocumentReferences){
System.out.println(ref + "\n");
}
}
}
package com.dre0059.articleprocessor.controller;
import com.dre0059.articleprocessor.GrobidClient;
import com.dre0059.articleprocessor.model.DocumentMetadata;
import com.dre0059.articleprocessor.service.MetadataParser;
import com.dre0059.articleprocessor.repository.DocumentRepository;
import com.dre0059.articleprocessor.repository.ReferenceRepository;
import com.dre0059.articleprocessor.service.TEIparser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import reactor.core.publisher.Mono;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
// TODO :
// 1. ✅ nefunguje mi správne uloženie článku, pokiaľ už článok v DBS je, aktuálne mi vyhodí len ERROR že nemožno správne spracovať
// 2. ✅ !!! uloženie referencií do databázy
// 3. prepojím referenciu s uloženými článkami ???
// 4. viac spraviť program USER-FRIENDLY - výpis že spracovávam document, výpis že dokument už je uložený, výpis že dokument sa uložil a vypíšem metadata pre overenie
// 5. nesprávne vyťahovanie referencií - referencie ktoré sa odkazujú na nejaký web, nie sú spracované
@Controller
@RequestMapping("/api/grobid")
public class FileUploadController {
private static final Logger logger = LoggerFactory.getLogger(FileUploadController.class);
private final GrobidClient grobidClient;
private final DocumentRepository metadataRepository;
private final ReferenceRepository referenceRepository;
public FileUploadController(GrobidClient grobidClient, DocumentRepository metadataRepository, ReferenceRepository referenceRepository) {
this.grobidClient = grobidClient;
this.metadataRepository = metadataRepository;
this.referenceRepository = referenceRepository;
}
@GetMapping("/upload")
public String showUploadForm(Model model) {
return "upload"; // vracia upload.html
}
@PostMapping("/upload")
@ResponseBody
public Mono<ResponseEntity<Map<String, String>>> handleFileUpload(@RequestParam("file") MultipartFile file) {
logger.info("Received file: {}", file.getOriginalFilename());
return Mono.fromCallable(() -> {
Path tempFile = Files.createTempFile("upload-", ".pdf");
file.transferTo(tempFile.toFile());
return tempFile.toFile();
}).flatMap(pdfFile -> {
Mono<String> metadataMono = grobidClient.processHeader(pdfFile);
Mono<String> referencesMono = grobidClient.processReferences(pdfFile);
return Mono.zip(metadataMono, referencesMono)
.flatMap(result -> {
String metadataJson = result.getT1();
String referencesXml = result.getT2();
String title = MetadataParser.extractTitle(metadataJson);
List<String> authors = MetadataParser.extractAuthors(metadataJson);
return Mono.justOrEmpty(metadataRepository.findByTitle(title))
.map(existing -> {
logger.warn("Article with title '{}' already exists!", title);
return ResponseEntity.status(HttpStatus.CONFLICT)
.body(Map.of("error", "Article is already in database."));
})
.switchIfEmpty(Mono.fromCallable(() -> {
DocumentMetadata doc = new DocumentMetadata(title, authors);
metadataRepository.save(doc);
// Spracovanie referencií cez TEIparser
TEIparser teiParser = new TEIparser(referenceRepository);
teiParser.parseAndSaveToDB(referencesXml, doc);
Map<String, String> response = new HashMap<>();
response.put("metadata", metadataJson);
response.put("references", referencesXml);
return ResponseEntity.ok(response);
}));
})
.onErrorResume(e -> {
logger.error("Error processing PDF", e);
return Mono.just(ResponseEntity.internalServerError().body(Map.of("error", "Failed to process PDF")));
});
});
}
}
package com.dre0059.articleprocessor.model;
import jakarta.persistence.*;
import java.util.ArrayList;
import java.util.List;
@Entity
@Table(name = "authors")
public class Author {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
private String name;
private String surname;
@ManyToMany(mappedBy = "authors")
private List<Document> documents = new ArrayList<Document>();
public Author(){}
public Author(String name, String surname) {
this.name = name;
this.surname = surname;
}
public Long getId() { return id; }
public String getName() { return name; }
public String getSurname() { return surname; }
public List<Document> getDocuments() { return documents; }
public void setName(String name) { this.name = name; }
public void setSurname(String surname) { this.surname = surname; }
public void setDocuments(List<Document> documents) { this.documents = documents; }
}
package com.dre0059.articleprocessor.model;
import jakarta.persistence.*;
import org.hibernate.annotations.CollectionId;
import com.dre0059.articleprocessor.model.*;
import java.util.ArrayList;
import java.util.List;
@Entity
@Table(name = "documents")
public class Document {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
private String title;
private Integer year;
private String doi;
@Column(name = "abstractText")
private String abstractText;
private Integer pages;
private String publisher;
@OneToMany(mappedBy = "fromDocument", cascade = CascadeType.ALL)
private List<Reference> references = new ArrayList<>();
@ManyToMany
@JoinTable(
name = "document_author",
joinColumns = @JoinColumn(name = "ID_document"),
inverseJoinColumns = @JoinColumn(name = "ID_author")
)
private List<Author> authors = new ArrayList<>();
public Document() {}
public Document(String title, Integer year, String doi, String abstractText, Integer pages, String publisher) {
this.title = title;
this.year = year;
this.doi = doi;
this.abstractText = abstractText;
this.pages = pages;
this.publisher = publisher;
}
public Long getId() { return id; }
public String getTitle() { return title; }
public Integer getYear() { return year; }
public String getDoi() { return doi; }
public String getAbstractText() { return abstractText; }
public Integer getPages() { return pages; }
public String getPublisher() { return publisher; }
public List<Reference> getReferences() { return references; }
public List<Author> getAuthors() { return authors; }
public void setAuthors(List<Author> authors) { this.authors = authors; }
}
package com.dre0059.articleprocessor.model;
import jakarta.persistence.*;
import java.util.ArrayList;
import java.util.List;
@Entity // DBS table
@Table (name = "DOCUMENT_METADATA", uniqueConstraints = @UniqueConstraint(columnNames = "title"))
public class DocumentMetadata {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY) // ID is generated automatically
private Long id;
private String title;
@ElementCollection // pomocna tabulka authors
private List<String> authors = new ArrayList<>();
// needed for Hibernate for right instances in DBS
public DocumentMetadata() {}
public DocumentMetadata(String title, List<String> authors) {
this.title = title;
this.authors = authors;
}
public Long getId(){
return id;
}
public String getTitle(){
return title;
}
public List<String> getAuthors(){
return authors;
}
}
package com.dre0059.articleprocessor.model;
import jakarta.persistence.*;
import javax.print.Doc;
import java.util.List;
@Entity
@Table(name = "references")
public class Reference {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
// number or letters in reference list
private String orderNumber;
@ManyToOne(cascade = CascadeType.ALL)
@JoinColumn(name = "ID_fromDocument")
private Document fromDocument;
@ManyToOne(cascade = CascadeType.ALL)
@JoinColumn(name = "ID_toDocument")
private Document toDocument;
public Reference() {}
public Reference(String orderNumber, Document fromDocument, Document toDocument) {
this.orderNumber = orderNumber;
this.fromDocument = fromDocument;
this.toDocument = toDocument;
}
public String getOrderNumber() { return orderNumber; }
public Document getFromDocument() { return fromDocument; }
public Document getToDocument() { return toDocument; }
public Long getId() { return id; }
public void setFromDocument(Document fromDocument) { this.fromDocument = fromDocument; }
public void setToDocument(Document toDocument) { this.toDocument = toDocument; }
public void setOrderNumber(String orderNumber) { this.orderNumber = orderNumber; }
}
package com.dre0059.articleprocessor.repository;
import com.dre0059.articleprocessor.model.Author;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.Optional;
@Repository
public interface AuthorRepository extends JpaRepository<Author, Long> {
Optional<Author> findByFullname(String name, String surname);
}
package com.dre0059.articleprocessor.repository;
import com.dre0059.articleprocessor.model.DocumentMetadata;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import org.w3c.dom.Document;
import java.util.Optional;
// uklada extrahovane data
@Repository
public interface DocumentRepository extends JpaRepository<Document, Long> {
Optional<Document> findByTitleAndAuthorsSurname(String title, String surname);
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment