diff --git a/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java b/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java index b2b785f7e06f65232671c8e07bf8bbcc0c1b8c05..78fa9a561db0c001a9a3855e0ca7219ca04ceea1 100644 --- a/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java +++ b/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java @@ -2,36 +2,16 @@ package com.dre0059.articleprocessor.controller; import com.dre0059.articleprocessor.GrobidClient; import com.dre0059.articleprocessor.service.HeaderService; -import com.dre0059.articleprocessor.service.MetadataParser; -import com.dre0059.articleprocessor.repository.DocumentRepository; -import com.dre0059.articleprocessor.repository.ReferenceRepository; -import com.dre0059.articleprocessor.service.TEIparser; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.http.HttpStatus; + import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; import org.springframework.ui.Model; import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; -import reactor.core.publisher.Mono; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -// TODO : -// 1. âś… nefunguje mi správne uloĹľenie ÄŤlánku, pokiaÄľ uĹľ ÄŤlánok v DBS je, aktuálne mi vyhodĂ len ERROR Ĺľe nemoĹľno správne spracovaĹĄ -// 2. âś… !!! uloĹľenie referenciĂ do databázy -// 3. prepojĂm referenciu s uloĹľenĂ˝mi ÄŤlánkami ??? -// 4. viac spraviĹĄ program USER-FRIENDLY - vĂ˝pis Ĺľe spracovávam document, vĂ˝pis Ĺľe dokument uĹľ je uloĹľenĂ˝, vĂ˝pis Ĺľe dokument sa uloĹľil a vypĂšem metadata pre overenie -// 5. nesprávne vyĹĄahovanie referenciĂ - referencie ktorĂ© sa odkazujĂş na nejakĂ˝ web, nie sĂş spracovanĂ© @Controller @RequestMapping("/api/grobid") diff --git a/src/main/java/com/dre0059/articleprocessor/model/DocumentMetadata.java b/src/main/java/com/dre0059/articleprocessor/model/DocumentMetadata.java deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java b/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java index f3004c7e1622ea7da270ef20603336afe6e77493..d1bafc441e21f16337bc9e9b75ed1e20416537e6 100644 --- a/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java +++ b/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java @@ -14,6 +14,28 @@ import java.util.Optional; @Repository public interface DocumentRepository extends JpaRepository<Dokument, Long> { - @Query("SELECT COUNT(d) > 0 FROM Dokument d JOIN d.authors a WHERE d.title = :title AND a IN :authors") - boolean existsByTitleAndAuthorsIn(@Param("title") String title, @Param("authors") List<Author> authors); + @Query( + "SELECT COUNT(d) > 0 " + + "FROM Dokument d " + + "JOIN d.authors a " + + "WHERE d.title = :title " + + "AND a.lastName IN :lastNames" + ) + boolean existsByTitleAndAuthorsIn(@Param("title") String title, @Param("lastNames") List<String> lastNames); } + +/* + // save only if all authors are the same + @Query(""" + SELECT COUNT(d) > 0 + FROM Dokument d + WHERE d.title = :title + AND SIZE(d.authors) = :authorCount + AND EXISTS ( + SELECT 1 FROM Dokument d2 JOIN d2.authors a2 + WHERE d2.id = d.id AND a2 IN :authors + ) + """) + boolean existsByTitleAndAuthors(@Param("title") String title, @Param("authors") List<Author> authors, @Param("authorCount") int authorCount); + + */ \ No newline at end of file diff --git a/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java b/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java index 81597b584fd22620985400c5c39adfe3c9d79523..352f6f78b29fcc5652fd004b9061bfdb39730e0d 100644 --- a/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java +++ b/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java @@ -40,31 +40,47 @@ public class HeaderService { public void processHeader(String header){ this.title = this.parseHeaderFields(header, "title"); - this.doi = this.parseHeaderFields(header, "doi"); - this.abstractText = this.parseHeaderFields(header, "abstract"); - this.publisher = this.parseHeaderFields(header, "publisher"); - if(this.parseHeaderFields(header, "year").equals("Not found")){ - this.year = 0; + if(!this.parseHeaderFields(header, "doi").equals("Not found")){ + this.doi = this.parseHeaderFields(header, "doi"); } - if(this.parseHeaderFields(header, "pages").equals("Not found")){ - this.pages = 0; + if(!this.parseHeaderFields(header, "abstract").equals("Not found")){ + this.abstractText = this.parseHeaderFields(header, "abstract"); + } + if(!this.parseHeaderFields(header, "publisher").equals("Not found")){ + this.publisher = this.parseHeaderFields(header, "publisher"); } + if(!this.parseHeaderFields(header, "year").equals("Not found")){ + String yearString = this.parseHeaderFields(header, "year"); + this.year = Integer.parseInt(yearString); + } + if(!this.parseHeaderFields(header, "pages").equals("Not found")){ + String pagesString = this.parseHeaderFields(header, "pages"); + this.pages = Integer.parseInt(pagesString); + } this.author = this.parseHeaderFields(header, "author"); if(!this.author.equals("Not found")){ authorList = this.saveAuthorNameAndSurname(this.author); } - authorRepository.saveAll(authorList); + List<String> authorLastNames= authorList.stream().map(Author::getLastname).toList(); + System.out.println("Author list before checking duplicity: " + authorList); + System.out.println("Author last names before checking duplicity: " + authorLastNames); + + // tu dostávam error : + boolean headerDuplicity = documentRepository.existsByTitleAndAuthorsIn(title, authorLastNames); // check duplicity of the document - if(documentRepository.existsByTitleAndAuthorsIn(title, authorList)){ + if(headerDuplicity){ System.out.println("Document with this title and authors already exist"); return; } + + authorRepository.saveAll(authorList); + Dokument dokument = new Dokument(title, year, doi, pages, publisher); dokument.setAuthors(authorList); diff --git a/src/main/java/com/dre0059/articleprocessor/service/MetadataParser.java b/src/main/java/com/dre0059/articleprocessor/service/MetadataParser.java deleted file mode 100644 index 1ea90a9e7ffe877efb161fd3afcb19170b557c09..0000000000000000000000000000000000000000 --- a/src/main/java/com/dre0059/articleprocessor/service/MetadataParser.java +++ /dev/null @@ -1,67 +0,0 @@ -package com.dre0059.articleprocessor.service; - -import com.dre0059.articleprocessor.model.*; -import com.dre0059.articleprocessor.repository.*; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; - -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -@Service -public class MetadataParser { - - @Autowired - private DocumentRepository documentRepository; - @Autowired - private AuthorRepository authorRepository; - - @Autowired - public MetadataParser(DocumentRepository documentRepository, AuthorRepository authorRepository) { - this.documentRepository = documentRepository; - this.authorRepository = authorRepository; - } - - public Dokument parseBibTeX(String bibtexString) { - // Regular expression pre zĂskanie hodnĂ´t z BibTeX formátu - Pattern pattern = Pattern.compile("@.*?\\{.*?,\\s*author\\s*=\\s*\\{(.*?)\\},\\s*title\\s*=\\s*\\{(.*?)\\},\\s*doi\\s*=\\s*\\{(.*?)\\},\\s*abstract\\s*=\\s*\\{(.*?)\\}"); - Matcher matcher = pattern.matcher(bibtexString); - - if (matcher.find()) { - String authorsRaw = matcher.group(1); - String title = matcher.group(2); - String doi = matcher.group(3); - String abstractText = matcher.group(4); - - List<Author> authors = parseAuthors(authorsRaw); - - Dokument document = new Dokument(title, null, doi, null, null); - document.setAuthors(authors); - - //documentRepository.save(document); - - for (Author author : authors) { - authorRepository.save(author); - } - - return document; - } - return null; - } - - private List<Author> parseAuthors(String authorsRaw) { - List<Author> authors = new ArrayList<>(); - String[] authorNames = authorsRaw.split(" and "); - for (String fullName : authorNames) { - String[] nameParts = fullName.trim().split("\\s+", 2); - if (nameParts.length == 2) { - authors.add(new Author(nameParts[1], nameParts[0])); // Priezvisko, Meno - } else { - authors.add(new Author(nameParts[0], "")); // Ak meno nemá priezvisko - } - } - return authors; - } -}