From dc1846d774cf694430a3b987fa8bacd2f688e6d0 Mon Sep 17 00:00:00 2001
From: dre0059 <eliska.dreveniakova@vsb.cz>
Date: Wed, 30 Apr 2025 19:26:38 +0200
Subject: [PATCH] =?UTF-8?q?Dokument=C3=A1cia=20update?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                     | 10 ++--
 .../articleprocessor/GrobidClient.java        | 53 ++++++++++++-------
 .../service/ReferenceService.java             | 40 ++++++++++++--
 .../service/TEINamespaceContext.java          | 16 ++++++
 .../resources/templates/about-project.html    | 10 ++++
 .../templates/citation-timeline.html          | 11 +++-
 .../resources/templates/more-citations.html   | 10 ++++
 src/main/resources/templates/statistics.html  |  9 ++++
 src/main/resources/templates/upload.html      | 11 ++++
 src/main/resources/templates/view-all.html    |  9 ++++
 src/main/resources/templates/view-pdf.html    | 10 ++++
 11 files changed, 163 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index 3e72bff..9a4162f 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,15 @@
-# Articleprocessor GROBID version 1.0 
+# Articleprocessor GROBID version
 
 - The main program runs at : *http://localhost:8080/upload* or  *http://localhost:8080*
 - H2 database can be found on : *http://localhost:8080/h2-console/login.jsp?jsessionid=9af0ea4b83284ff0a4574769b0336943*
 - Password for the DBS can be found in : `resources/application.properties`
-- GROBID server available on address *http://158.196.98.65:8080/* at university network
+- GROBID server available on address *http://158.196.98.65:8080/* in university network
+
+
+_VĹ B-TUO 2025_
+
+_Eliška Kozáčiková - DRE0059_
 
----------
 ---------
 
 
diff --git a/src/main/java/com/dre0059/articleprocessor/GrobidClient.java b/src/main/java/com/dre0059/articleprocessor/GrobidClient.java
index ccb56ec..93da170 100644
--- a/src/main/java/com/dre0059/articleprocessor/GrobidClient.java
+++ b/src/main/java/com/dre0059/articleprocessor/GrobidClient.java
@@ -20,19 +20,39 @@ import reactor.core.publisher.Mono;
 import java.io.File;
 import java.net.ConnectException;
 
-
+/**
+ *  Tento klient používa WebClient na odosielanie požiadaviek
+ *         na server GROBID a získavanie spracovaných metadát,
+ *         referenciĂ­ a hlaviÄŤiek z PDF dokumentov.
+ * *
+ * Implementuje metódy na komunikáciu s API Grobid servera pre rôzne
+ *         typy analýz PDF dokumentov.
+ */
 @Service
 public class GrobidClient {
     private final WebClient webClient;
 
-  public GrobidClient(GrobidProperties grobidProperties) {
+    /**
+     * Konštrukto, ktorý inicializuje WebClient pre komunikáciu s GROBID serverom
+     *
+     * @param grobidProperties Konfigurácia obsahujúca URL hostiteľa GROBID servera.
+     */
+     public GrobidClient(GrobidProperties grobidProperties) {
 
-    this.webClient = WebClient.builder()
-                .baseUrl(grobidProperties.getHost())   // URL kde beží GROBID server
-                .build();
-    }
+        this.webClient = WebClient.builder()
+                    .baseUrl(grobidProperties.getHost())   // URL kde beží GROBID server
+                    .build();
+        }
 
-    // get METADATA of the file
+    /**
+     * Posiela poĹľiadavku na server GROBID na spracovanie hlaviÄŤky PDF dokumentu.
+     * Metóda získa základné informácie o dokumente
+     *  (názov dokumentu, autorov a ďalšie metadáta z hlavičky)
+     *
+     * @param pdfFile PDF súbor, ktorý sa má spracovať.
+     * @return JSON reťazec obsahujúci extrahované metadáta o dokumente.
+     * @throws RuntimeException ak nastane problém pri komunikácii so serverom GROBID alebo pri spracovaní odpovede.
+     */
     public String processHeader(File pdfFile){    // Mono - vráti jeden string, výsledok je JSON
       try {
 
@@ -59,18 +79,15 @@ public class GrobidClient {
       }
   }
 
-    public String processFullMetadata(File pdfFile) {
-        return webClient.post()
-                .uri("/api/processFullMetadata")
-                .contentType(MediaType.MULTIPART_FORM_DATA)
-                .body(BodyInserters.fromMultipartData("input", new FileSystemResource(pdfFile)))
-                .retrieve()
-                .bodyToMono(String.class)
-                .block();
-    }
-
 
-    // spracuje REFERENCIE z PDF
+    /**
+     * Posiela poĹľiadavku na server GROBID
+     *  na SPRACOVANIE REFERENCIĂŤ v PDF dokumente.
+     *
+     * @param pdfFile PDF súbor, ktorý sa má spracovať.
+     * @return JSON reťazec obsahujúci extrahované bibliografické referencie.
+     * @throws RuntimeException ak nastane problém pri komunikácii so serverom GROBID alebo pri spracovaní odpovede.
+     */
     public String processReferences(File pdfFile){
       try {
           return webClient.post()
diff --git a/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java b/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java
index 875c65c..1ac5685 100644
--- a/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java
+++ b/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java
@@ -13,6 +13,7 @@ import com.dre0059.articleprocessor.model.Reference;
 import com.dre0059.articleprocessor.repository.AuthorRepository;
 import com.dre0059.articleprocessor.repository.DocumentRepository;
 import com.dre0059.articleprocessor.repository.ReferenceRepository;
+import lombok.Setter;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 import org.w3c.dom.Document;
@@ -31,7 +32,17 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+/**
+ *
+ *  Trieda na extrahovanie a ukladanie referenciĂ­ medzi dokumentmi.
+ *  *
+ *  Táto trieda :
+ *      - spracováva XML dáta s referenciami (získané z GROBID servera)
+ *      - vyhľadáva v databáze existujúce dokumenty a autorov
+ *      - vytvára nové referencie medzi dokumentami
+ */
 @Service
+@Setter
 public class ReferenceService {
 
     private final DocumentRepository documentRepository;
@@ -48,15 +59,31 @@ public class ReferenceService {
         this.referenceRepository = referenceRepository;
     }
 
+    /**
+     * Nastaví dokument, z ktorého sa cituje
+            (dokument v ktorom sa zoznam referencií nachádzal = PDF dokument)
+     *
+     * @param fromDocument Dokument, z ktorého sa referencuje
+     */
     public void setFromDocument(Dokument fromDocument) {
         this.fromDocument = fromDocument;
         System.out.println("From document: " + fromDocument.getTitle());
     }
 
+    /**
+     * NastavĂ­ dokument, na ktorĂ˝ je citovanĂ˝
+     *
+     * @param doc Dokument, na ktorĂ˝ sa referencuje
+     */
     public void setToDocument(Dokument doc){
         this.toDocument = doc;
     }
 
+    /**
+     * Extrahuje referencie z XML TEI dokumentu a uloží ich do databázy.
+     *
+     * @param xmlTeiReferences XML reĹĄazec obsahujĂşci referencie
+     */
     public void extractReferences(String xmlTeiReferences) {
         List<Author> databaseAuthors = this.authorRepository.findAll();
         Map<String, Author> authorMap = new HashMap<>();
@@ -67,6 +94,7 @@ public class ReferenceService {
         }
 
         try {
+            // Vytvorenie DocumentBuilder pre naÄŤĂ­tanie XML dokumentu
             DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
             factory.setNamespaceAware(true);
             DocumentBuilder builder = factory.newDocumentBuilder();
@@ -74,10 +102,12 @@ public class ReferenceService {
             InputSource inputSource = new InputSource(new StringReader(xmlTeiReferences));
             Document doc = builder.parse(inputSource);
 
+            // Nastavenie XPath pre vyhľadávanie v XML
             XPathFactory xpathFactory = XPathFactory.newInstance();
             XPath xpath = xpathFactory.newXPath();
             xpath.setNamespaceContext(new TEINamespaceContext());
 
+            // Výber všetkých biblioštruktúr (referencií) v XML dokumente
             NodeList biblNodes = (NodeList) xpath.evaluate("//tei:biblStruct", doc, XPathConstants.NODESET);
 
             // for each reference
@@ -121,8 +151,10 @@ public class ReferenceService {
                     String firstName = xpath.evaluate(".//tei:forename", authorNode);
                     String lastName = xpath.evaluate(".//tei:surname", authorNode);
 
+                    // Vytvorenie kľúča pre autora
                     String authorKey = lastName.toLowerCase() + "," + firstName.toLowerCase();
 
+                    // Ak autor existuje v databáze, pridá sa do zoznamu, inak sa vytvorí nový
                     if (authorMap.containsKey(authorKey)) {
                         authors.add(authorMap.get(authorKey));
                         System.out.println("Author: " + authorMap.get(authorKey) + " already exists in database.");
@@ -135,15 +167,14 @@ public class ReferenceService {
                 referencedDocument.setAuthors(authors);
 
                 List<String> authorLastNames= authors.stream().map(Author::getLastname).toList();
-                // check if document exists in dbs
-                boolean exists = documentRepository.existsByTitleAndAuthorsIn(title, authorLastNames);
 
                 // check whether the document is already saved in DBS
+                boolean exists = documentRepository.existsByTitleAndAuthorsIn(title, authorLastNames);
+
                 if(exists){
                     System.out.println("Document with this title and authors already exist");
 
                     // vyhÄľadaj dokument podÄľa TITLE alebo AUTORA a nastav ho ako toDokument
-
                     referencedDocument = documentRepository.findByTitleAndAuthorsIn(title, authorLastNames)
                             .orElseThrow(() -> new IllegalStateException("Document should exist but was NOT FOUND."));
 
@@ -157,7 +188,8 @@ public class ReferenceService {
                     this.authorRepository.saveAll(authors);
                 }
 
-                Reference reference = new Reference(/*referenceID,*/ fromDocument, toDocument);
+                // Vytvorenie a uloĹľenie referencie
+                Reference reference = new Reference(fromDocument, toDocument);
 
                 // extract ID from the document
                 String referenceID = xpath.evaluate("@*[local-name()='id']", biblNode);
diff --git a/src/main/java/com/dre0059/articleprocessor/service/TEINamespaceContext.java b/src/main/java/com/dre0059/articleprocessor/service/TEINamespaceContext.java
index 001f791..784132f 100644
--- a/src/main/java/com/dre0059/articleprocessor/service/TEINamespaceContext.java
+++ b/src/main/java/com/dre0059/articleprocessor/service/TEINamespaceContext.java
@@ -10,7 +10,19 @@ package com.dre0059.articleprocessor.service;
 import javax.xml.namespace.NamespaceContext;
 import java.util.Iterator;
 
+/**
+ * Implementácia rozhrania NamespaceContext pre XPath dotazy na TEI XML dokumenty.
+ *  *
+ * Táto trieda poskytuje správne názvové priestory pre XPath dotazy.
+ */
 public class TEINamespaceContext implements NamespaceContext {
+
+    /**
+     * Kontroluje, ÄŤi je prefix "tei"
+     *
+     * @param prefix prefix to check
+     * @return  URL, ktoré je názvovým priestorom pre TEI (resp. null)
+     */
     @Override
     public String getNamespaceURI(String prefix) {
         if ("tei".equals(prefix)) {
@@ -19,6 +31,10 @@ public class TEINamespaceContext implements NamespaceContext {
         return null;
     }
 
+    /**
+     * povinné metódy z rozhrania
+     * nevyužívame tieto dve metódy
+     */
     @Override
     public String getPrefix(String namespaceURI) { return null; }
     @Override
diff --git a/src/main/resources/templates/about-project.html b/src/main/resources/templates/about-project.html
index f335acf..02fbd4d 100644
--- a/src/main/resources/templates/about-project.html
+++ b/src/main/resources/templates/about-project.html
@@ -1,3 +1,13 @@
+<!--
+    Autor: Eliška Kozáčiková
+    Ĺ kola: VĹ B-TUO
+    Fakulta: Fakulta Elektrotechniky a informatiky
+    Dátum: 30.04.2025
+
+    Tento HTML dokument obsahuje informácie o projekte
+
+-->
+
 <!DOCTYPE html>
 <html lang="en" xmlns:th="http://www.thymeleaf.org">
 <head>
diff --git a/src/main/resources/templates/citation-timeline.html b/src/main/resources/templates/citation-timeline.html
index 6cdff9b..13783a5 100644
--- a/src/main/resources/templates/citation-timeline.html
+++ b/src/main/resources/templates/citation-timeline.html
@@ -1,4 +1,13 @@
-    <!DOCTYPE html>
+<!--
+    Autor: Eliška Kozáčiková
+    Ĺ kola: VĹ B-TUO
+    Fakulta: Fakulta Elektrotechniky a informatiky
+    Dátum: 30.04.2025
+
+    Tento HTML dokument zobrazuje graf dokumentu a jeho refrenciĂ­.
+
+-->
+<!DOCTYPE html>
     <html xmlns:th="http://www.thymeleaf.org">
     <head>
         <!-- zobrazuje graf článku na /statistics/citation-timeline?documentId={id} -->
diff --git a/src/main/resources/templates/more-citations.html b/src/main/resources/templates/more-citations.html
index b59b664..635abea 100644
--- a/src/main/resources/templates/more-citations.html
+++ b/src/main/resources/templates/more-citations.html
@@ -1,3 +1,13 @@
+<!--
+    Autor: Eliška Kozáčiková
+    Ĺ kola: VĹ B-TUO
+    Fakulta: Fakulta Elektrotechniky a informatiky
+    Dátum: 30.04.2025
+
+    Tento HTML dokument zobrazuje graf viacerých dokumentov a ich cítácií
+
+-->
+
 <!DOCTYPE html>
 <html xmlns:th="http://www.thymeleaf.org">
 <head>
diff --git a/src/main/resources/templates/statistics.html b/src/main/resources/templates/statistics.html
index 54868a1..b52459d 100644
--- a/src/main/resources/templates/statistics.html
+++ b/src/main/resources/templates/statistics.html
@@ -1,3 +1,12 @@
+<!--
+    Autor: Eliška Kozáčiková
+    Ĺ kola: VĹ B-TUO
+    Fakulta: Fakulta Elektrotechniky a informatiky
+    Dátum: 30.04.2025
+
+    Tento HTML dokument obsahuje všetobecné štatistiky o dokumentoch v celej aplikácii
+
+-->
 <!DOCTYPE html>
 <html xmlns:th="http://www.thymeleaf.org">
 <head>
diff --git a/src/main/resources/templates/upload.html b/src/main/resources/templates/upload.html
index 82e70a2..97538c3 100644
--- a/src/main/resources/templates/upload.html
+++ b/src/main/resources/templates/upload.html
@@ -1,3 +1,14 @@
+<!--
+    Autor: Eliška Kozáčiková
+    Ĺ kola: VĹ B-TUO
+    Fakulta: Fakulta Elektrotechniky a informatiky
+    Dátum: 30.04.2025
+
+    Tento HTML dokument zobrazuje úvodnú stránku,
+    umožňuje užívateľovi nahrať PDF dokument,
+    priradiĹĄ mu kategĂłriu a tagy a odoslaĹĄ ho na spracovanie
+
+-->
 <!DOCTYPE html>
 <html lang="en" xmlns:th="http://www.thymeleaf.org">
 <head>
diff --git a/src/main/resources/templates/view-all.html b/src/main/resources/templates/view-all.html
index 5150e3b..433dca4 100644
--- a/src/main/resources/templates/view-all.html
+++ b/src/main/resources/templates/view-all.html
@@ -1,3 +1,12 @@
+<!--
+    Autor: Eliška Kozáčiková
+    Ĺ kola: VĹ B-TUO
+    Fakulta: Fakulta Elektrotechniky a informatiky
+    Dátum: 30.04.2025
+
+    Tento HTML dokument zobrazuje zoznam všetkých existujúcich článkov v databáze
+
+-->
 <!DOCTYPE html>
 <html lang="en" xmlns:th="http://www.thymeleaf.org">
 <head>
diff --git a/src/main/resources/templates/view-pdf.html b/src/main/resources/templates/view-pdf.html
index e13be45..8cd5e87 100644
--- a/src/main/resources/templates/view-pdf.html
+++ b/src/main/resources/templates/view-pdf.html
@@ -1,3 +1,13 @@
+<!--
+    Autor: Eliška Kozáčiková
+    Ĺ kola: VĹ B-TUO
+    Fakulta: Fakulta Elektrotechniky a informatiky
+    Dátum: 30.04.2025
+
+    Tento HTML dokument zobrazuje jeden článok, informácie o ňom a zoznam jeho referencií
+    (referencie sú klikateľné odkazy na iné články v DBS)
+
+-->
 <!DOCTYPE html>
 <html xmlns:th="http://www.thymeleaf.org">
 <head>
-- 
GitLab