From 046b0bf6c8423bb77f28b24e17770551629337d4 Mon Sep 17 00:00:00 2001
From: dre0059 <eliska.dreveniakova@vsb.cz>
Date: Fri, 21 Feb 2025 09:18:57 +0100
Subject: [PATCH] Fulfill REFERENCES table

---
 .../controller/FileUploadController.java      |  2 +-
 .../articleprocessor/model/Dokument.java      |  3 +
 .../repository/DocumentRepository.java        | 10 +++
 .../service/HeaderService.java                |  9 +-
 .../service/ReferenceService.java             | 84 +++++++++++--------
 5 files changed, 70 insertions(+), 38 deletions(-)

diff --git a/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java b/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java
index af0c62e..dc7f29a 100644
--- a/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java
+++ b/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java
@@ -59,7 +59,7 @@ public class FileUploadController {
             referenceService.extractReferences(references);
 
             System.out.println(header);
-            System.out.println(references);
+            //System.out.println(references);
 
             tmpFile.delete();
 
diff --git a/src/main/java/com/dre0059/articleprocessor/model/Dokument.java b/src/main/java/com/dre0059/articleprocessor/model/Dokument.java
index 7dfcd4b..086720d 100644
--- a/src/main/java/com/dre0059/articleprocessor/model/Dokument.java
+++ b/src/main/java/com/dre0059/articleprocessor/model/Dokument.java
@@ -6,6 +6,9 @@ import org.hibernate.annotations.Cascade;
 import java.util.ArrayList;
 import java.util.List;
 
+// TODO : int / boolean - či je PDF nahraté alebo je to dokument len z referencie
+//    1. references -
+
 @Entity
 @Table(name = "documents")
 public class Dokument {
diff --git a/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java b/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java
index d1bafc4..6a10273 100644
--- a/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java
+++ b/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java
@@ -22,6 +22,16 @@ public interface DocumentRepository extends JpaRepository<Dokument, Long> {
             "AND a.lastName IN :lastNames"
     )
     boolean existsByTitleAndAuthorsIn(@Param("title") String title, @Param("lastNames") List<String> lastNames);
+
+
+    @Query(
+            "SELECT d FROM Dokument d " +
+                    "JOIN d.authors a " +
+                    "WHERE d.title = :title " +
+                    "AND a.lastName IN :lastNames"
+    )
+    Optional<Dokument> findByTitleAndAuthorsIn(@Param("title") String title, @Param("lastNames") List<String> lastNames);
+
 }
 
 /*
diff --git a/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java b/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java
index 9b24e59..ff5a66a 100644
--- a/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java
+++ b/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java
@@ -15,7 +15,8 @@ import java.util.regex.Pattern;
 // TODO :
 //  1. VALIDATE author based on surname and first INITIAL of the firstName.
 //      SOLUTION : change keys of the map on surname and first initial and compare it with surname and first initial of author
-//  2.
+//  2. dve mená autora nesprávne ukladá (priezivsko neuloží, zistiť teda formát aby sa správne ukladalo)
+//  3. ukladá viac krát meno toho istého autora, zistiť prečo !!!
 
 @Service
 public class HeaderService {
@@ -121,12 +122,12 @@ public class HeaderService {
             String[] nameParts = fullName.split(",");
 
             String firstName;
-            String lastName = nameParts[1].trim();
+            String lastName = nameParts[0].trim();
             if(nameParts.length > 2){
                 // have two names
-                firstName = nameParts[0].trim() + " " + nameParts[2].trim();
+                firstName = nameParts[1].trim() + " " + nameParts[2].trim();
             } else {
-                firstName = nameParts[0].trim();
+                firstName = nameParts[1].trim();
             }
 
             String authorKey = lastName.toLowerCase() + "," + firstName.toLowerCase();
diff --git a/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java b/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java
index 03a20e4..fd03a23 100644
--- a/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java
+++ b/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java
@@ -21,44 +21,40 @@ import javax.xml.xpath.XPathFactory;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 
 // TODO :
 //  1. uloĹľiĹĄ prepojenie toDocument a fromDocument do tabuÄľky referencie
 //  2. vytiahnuť orderNumber z referencie (toto riešiť cez GROBID)
 //  3. aktuálne sa mi toDocument ukladá vždy ako nový.. ja ho potrebujem vyhľadať a na základe toho uložiť alebo prepojiť
+//  4. uložiť záznam do tabuľky references
+//  5. ak už bolo PDF raz uložené, uloží sa mi "null" článok, prepojený s autormi - VYRIESIT
+
 
 @Service
 public class ReferenceService {
 
     private final DocumentRepository documentRepository;
     private final AuthorRepository authorRepository;
-
-    //private String title;
-    private Integer year;
-    private String doi;
-    private String abstractText;
-    private Integer pages;
-    private String publisher;
-
-    private String author;
-    private List<Author> authorList = new ArrayList<>();
+    private final ReferenceRepository referenceRepository;
 
     private Dokument fromDocument;
     private Dokument toDocument;
 
-
     @Autowired
-    public ReferenceService(DocumentRepository documentRepository, AuthorRepository authorRepository) {
+    public ReferenceService(DocumentRepository documentRepository, AuthorRepository authorRepository, ReferenceRepository referenceRepository) {
         this.documentRepository = documentRepository;
         this.authorRepository = authorRepository;
+        this.referenceRepository = referenceRepository;
     }
 
     public void setFromDocument(Dokument fromDocument) {
         this.fromDocument = fromDocument;
+        System.out.println("From document: " + fromDocument.getTitle());
+    }
+
+    public void setToDocument(Dokument doc){
+        this.toDocument = doc;
     }
 
     public void extractReferences(String xmlTeiReferences) {
@@ -84,18 +80,34 @@ public class ReferenceService {
 
             NodeList biblNodes = (NodeList) xpath.evaluate("//tei:biblStruct", doc, XPathConstants.NODESET);
 
+            // for each reference
             for (int i = 0; i < biblNodes.getLength(); i++) {
                 Node biblNode = biblNodes.item(i);
-                Dokument toDokument = new Dokument();
+                Dokument referencedDocument = new Dokument();
 
                 // Extract title - toDocument
                 String title = xpath.evaluate(".//tei:title[@level='m' or @level='a']", biblNode);
-                toDokument.setTitle(title);
+                referencedDocument.setTitle(title);
+
+                // Extract year of publication
+                String yearStr = xpath.evaluate(".//tei:date[@type='published']/@when", biblNode);
+                if (yearStr != null && !yearStr.isEmpty()) {
+                    try {
+                        referencedDocument.setPublicationYear(Integer.valueOf(yearStr));
+                    } catch (NumberFormatException e) {
+                        System.out.println("Error during converting year." + yearStr);
+                    }
+                }
+
+                // Extract publisher
+                String publisher = xpath.evaluate(".//tei:publisher", biblNode);
+                referencedDocument.setPublisher(publisher);
 
                 // Extract authors
                 NodeList authorNodes = (NodeList) xpath.evaluate(".//tei:author/tei:persName", biblNode, XPathConstants.NODESET);
                 List<Author> authors = new ArrayList<>();
 
+                // each author in a reference
                 for (int j = 0; j < authorNodes.getLength(); j++) {
                     Node authorNode = authorNodes.item(j);
 
@@ -112,28 +124,34 @@ public class ReferenceService {
                         authors.add(newAuthor);
                         authorMap.put(authorKey, newAuthor);
                     }
+                }
+                referencedDocument.setAuthors(authors);
 
-                    toDokument.setAuthors(authors);
+                List<String> authorLastNames= authors.stream().map(Author::getLastname).toList();
 
-                    // Extract year of publication
-                    String yearStr = xpath.evaluate(".//tei:date[@type='published']/@when", biblNode);
-                    if (yearStr != null && !yearStr.isEmpty()) {
-                        try {
-                            toDokument.setPublicationYear(Integer.valueOf(yearStr));
-                        } catch (NumberFormatException e) {
-                            System.out.println("Error during converting year." + yearStr);
-                        }
-                    }
+                // check if document exists in dbs
+                boolean exists = documentRepository.existsByTitleAndAuthorsIn(title, authorLastNames);
 
-                    // Extract publisher
-                    String publisher = xpath.evaluate(".//tei:publisher", biblNode);
-                    toDokument.setPublisher(publisher);
+                // check whether the document is already saved in DBS
+                if(exists){
+                    System.out.println("Document with this title and authors already exist");
 
-                    this.documentRepository.save(toDokument);
-                    this.authorRepository.saveAll(authors);
+                    // vyhÄľadaj dokument podÄľa TITLE alebo AUTORA a nastav ho ako toDokument
+
+                    referencedDocument = documentRepository.findByTitleAndAuthorsIn(title, authorLastNames)
+                            .orElseThrow(() -> new IllegalStateException("Document should exist but was NOT FOUND."));
 
+                    this.toDocument = referencedDocument;
+                    System.out.println("Document already exists in database : " + referencedDocument.getTitle() + " with ID : " + referencedDocument.getId());
+                } else {
+                    // create new dokument
+                    this.setToDocument(referencedDocument);
+                    this.documentRepository.save(toDocument);
+                    this.authorRepository.saveAll(authors);
                 }
 
+                Reference reference = new Reference("[i]", fromDocument, toDocument);
+                referenceRepository.save(reference);
             }
         } catch (Exception e) {
             e.printStackTrace();
-- 
GitLab