From 046b0bf6c8423bb77f28b24e17770551629337d4 Mon Sep 17 00:00:00 2001 From: dre0059 <eliska.dreveniakova@vsb.cz> Date: Fri, 21 Feb 2025 09:18:57 +0100 Subject: [PATCH] Fulfill REFERENCES table --- .../controller/FileUploadController.java | 2 +- .../articleprocessor/model/Dokument.java | 3 + .../repository/DocumentRepository.java | 10 +++ .../service/HeaderService.java | 9 +- .../service/ReferenceService.java | 84 +++++++++++-------- 5 files changed, 70 insertions(+), 38 deletions(-) diff --git a/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java b/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java index af0c62e..dc7f29a 100644 --- a/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java +++ b/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java @@ -59,7 +59,7 @@ public class FileUploadController { referenceService.extractReferences(references); System.out.println(header); - System.out.println(references); + //System.out.println(references); tmpFile.delete(); diff --git a/src/main/java/com/dre0059/articleprocessor/model/Dokument.java b/src/main/java/com/dre0059/articleprocessor/model/Dokument.java index 7dfcd4b..086720d 100644 --- a/src/main/java/com/dre0059/articleprocessor/model/Dokument.java +++ b/src/main/java/com/dre0059/articleprocessor/model/Dokument.java @@ -6,6 +6,9 @@ import org.hibernate.annotations.Cascade; import java.util.ArrayList; import java.util.List; +// TODO : int / boolean - ÄŤi je PDF nahratĂ© alebo je to dokument len z referencie +// 1. references - + @Entity @Table(name = "documents") public class Dokument { diff --git a/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java b/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java index d1bafc4..6a10273 100644 --- a/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java +++ b/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java @@ -22,6 +22,16 @@ public interface DocumentRepository extends JpaRepository<Dokument, Long> { "AND a.lastName IN :lastNames" ) boolean existsByTitleAndAuthorsIn(@Param("title") String title, @Param("lastNames") List<String> lastNames); + + + @Query( + "SELECT d FROM Dokument d " + + "JOIN d.authors a " + + "WHERE d.title = :title " + + "AND a.lastName IN :lastNames" + ) + Optional<Dokument> findByTitleAndAuthorsIn(@Param("title") String title, @Param("lastNames") List<String> lastNames); + } /* diff --git a/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java b/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java index 9b24e59..ff5a66a 100644 --- a/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java +++ b/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java @@ -15,7 +15,8 @@ import java.util.regex.Pattern; // TODO : // 1. VALIDATE author based on surname and first INITIAL of the firstName. // SOLUTION : change keys of the map on surname and first initial and compare it with surname and first initial of author -// 2. +// 2. dve mená autora nesprávne ukladá (priezivsko neuloĹľĂ, zistiĹĄ teda formát aby sa správne ukladalo) +// 3. ukladá viac krát meno toho istĂ©ho autora, zistiĹĄ preÄŤo !!! @Service public class HeaderService { @@ -121,12 +122,12 @@ public class HeaderService { String[] nameParts = fullName.split(","); String firstName; - String lastName = nameParts[1].trim(); + String lastName = nameParts[0].trim(); if(nameParts.length > 2){ // have two names - firstName = nameParts[0].trim() + " " + nameParts[2].trim(); + firstName = nameParts[1].trim() + " " + nameParts[2].trim(); } else { - firstName = nameParts[0].trim(); + firstName = nameParts[1].trim(); } String authorKey = lastName.toLowerCase() + "," + firstName.toLowerCase(); diff --git a/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java b/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java index 03a20e4..fd03a23 100644 --- a/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java +++ b/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java @@ -21,44 +21,40 @@ import javax.xml.xpath.XPathFactory; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; // TODO : // 1. uloĹľiĹĄ prepojenie toDocument a fromDocument do tabuÄľky referencie // 2. vytiahnuĹĄ orderNumber z referencie (toto riešiĹĄ cez GROBID) // 3. aktuálne sa mi toDocument ukladá vĹľdy ako novĂ˝.. ja ho potrebujem vyhÄľadaĹĄ a na základe toho uloĹľiĹĄ alebo prepojiĹĄ +// 4. uloĹľiĹĄ záznam do tabuÄľky references +// 5. ak uĹľ bolo PDF raz uloĹľenĂ©, uložà sa mi "null" ÄŤlánok, prepojenĂ˝ s autormi - VYRIESIT + @Service public class ReferenceService { private final DocumentRepository documentRepository; private final AuthorRepository authorRepository; - - //private String title; - private Integer year; - private String doi; - private String abstractText; - private Integer pages; - private String publisher; - - private String author; - private List<Author> authorList = new ArrayList<>(); + private final ReferenceRepository referenceRepository; private Dokument fromDocument; private Dokument toDocument; - @Autowired - public ReferenceService(DocumentRepository documentRepository, AuthorRepository authorRepository) { + public ReferenceService(DocumentRepository documentRepository, AuthorRepository authorRepository, ReferenceRepository referenceRepository) { this.documentRepository = documentRepository; this.authorRepository = authorRepository; + this.referenceRepository = referenceRepository; } public void setFromDocument(Dokument fromDocument) { this.fromDocument = fromDocument; + System.out.println("From document: " + fromDocument.getTitle()); + } + + public void setToDocument(Dokument doc){ + this.toDocument = doc; } public void extractReferences(String xmlTeiReferences) { @@ -84,18 +80,34 @@ public class ReferenceService { NodeList biblNodes = (NodeList) xpath.evaluate("//tei:biblStruct", doc, XPathConstants.NODESET); + // for each reference for (int i = 0; i < biblNodes.getLength(); i++) { Node biblNode = biblNodes.item(i); - Dokument toDokument = new Dokument(); + Dokument referencedDocument = new Dokument(); // Extract title - toDocument String title = xpath.evaluate(".//tei:title[@level='m' or @level='a']", biblNode); - toDokument.setTitle(title); + referencedDocument.setTitle(title); + + // Extract year of publication + String yearStr = xpath.evaluate(".//tei:date[@type='published']/@when", biblNode); + if (yearStr != null && !yearStr.isEmpty()) { + try { + referencedDocument.setPublicationYear(Integer.valueOf(yearStr)); + } catch (NumberFormatException e) { + System.out.println("Error during converting year." + yearStr); + } + } + + // Extract publisher + String publisher = xpath.evaluate(".//tei:publisher", biblNode); + referencedDocument.setPublisher(publisher); // Extract authors NodeList authorNodes = (NodeList) xpath.evaluate(".//tei:author/tei:persName", biblNode, XPathConstants.NODESET); List<Author> authors = new ArrayList<>(); + // each author in a reference for (int j = 0; j < authorNodes.getLength(); j++) { Node authorNode = authorNodes.item(j); @@ -112,28 +124,34 @@ public class ReferenceService { authors.add(newAuthor); authorMap.put(authorKey, newAuthor); } + } + referencedDocument.setAuthors(authors); - toDokument.setAuthors(authors); + List<String> authorLastNames= authors.stream().map(Author::getLastname).toList(); - // Extract year of publication - String yearStr = xpath.evaluate(".//tei:date[@type='published']/@when", biblNode); - if (yearStr != null && !yearStr.isEmpty()) { - try { - toDokument.setPublicationYear(Integer.valueOf(yearStr)); - } catch (NumberFormatException e) { - System.out.println("Error during converting year." + yearStr); - } - } + // check if document exists in dbs + boolean exists = documentRepository.existsByTitleAndAuthorsIn(title, authorLastNames); - // Extract publisher - String publisher = xpath.evaluate(".//tei:publisher", biblNode); - toDokument.setPublisher(publisher); + // check whether the document is already saved in DBS + if(exists){ + System.out.println("Document with this title and authors already exist"); - this.documentRepository.save(toDokument); - this.authorRepository.saveAll(authors); + // vyhÄľadaj dokument podÄľa TITLE alebo AUTORA a nastav ho ako toDokument + + referencedDocument = documentRepository.findByTitleAndAuthorsIn(title, authorLastNames) + .orElseThrow(() -> new IllegalStateException("Document should exist but was NOT FOUND.")); + this.toDocument = referencedDocument; + System.out.println("Document already exists in database : " + referencedDocument.getTitle() + " with ID : " + referencedDocument.getId()); + } else { + // create new dokument + this.setToDocument(referencedDocument); + this.documentRepository.save(toDocument); + this.authorRepository.saveAll(authors); } + Reference reference = new Reference("[i]", fromDocument, toDocument); + referenceRepository.save(reference); } } catch (Exception e) { e.printStackTrace(); -- GitLab