diff --git a/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java b/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java index 3105cec2c99784647a87d1273921e7abf7ce3059..b2b785f7e06f65232671c8e07bf8bbcc0c1b8c05 100644 --- a/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java +++ b/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java @@ -1,7 +1,6 @@ package com.dre0059.articleprocessor.controller; import com.dre0059.articleprocessor.GrobidClient; -import com.dre0059.articleprocessor.model.DocumentMetadata; import com.dre0059.articleprocessor.service.HeaderService; import com.dre0059.articleprocessor.service.MetadataParser; import com.dre0059.articleprocessor.repository.DocumentRepository; diff --git a/src/main/java/com/dre0059/articleprocessor/model/Author.java b/src/main/java/com/dre0059/articleprocessor/model/Author.java index 61636fcbc6e9f9e9a4f609327e1a2ca5cf118449..6d86454462ba7b9bbaf472530cd0ecba3db7e950 100644 --- a/src/main/java/com/dre0059/articleprocessor/model/Author.java +++ b/src/main/java/com/dre0059/articleprocessor/model/Author.java @@ -13,24 +13,24 @@ public class Author { @GeneratedValue(strategy = GenerationType.IDENTITY) private Long id; - private String name; - private String surname; + private String firstName; + private String lastName; @ManyToMany(mappedBy = "authors") - private List<Dokument> documents = new ArrayList<Dokument>(); + private List<Dokument> documents = new ArrayList<>(); public Author(){} - public Author(String name, String surname) { - this.name = name; - this.surname = surname; + public Author(String firstName, String lastName) { + this.firstName = firstName; + this.lastName = lastName; } public Long getId() { return id; } - public String getName() { return name; } - public String getSurname() { return surname; } + public String getFirstname() { return firstName; } + public String getLastname() { return lastName; } public List<Dokument> getDocuments() { return documents; } - public void setName(String name) { this.name = name; } - public void setSurname(String surname) { this.surname = surname; } + public void setFirstname(String name) { this.firstName = name; } + public void setLastname(String surname) { this.lastName = surname; } public void setDocuments(List<Dokument> documents) { this.documents = documents; } } diff --git a/src/main/java/com/dre0059/articleprocessor/model/DocumentMetadata.java b/src/main/java/com/dre0059/articleprocessor/model/DocumentMetadata.java index 1aa86f973ac00e3f334216a3780a780e83231c34..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/src/main/java/com/dre0059/articleprocessor/model/DocumentMetadata.java +++ b/src/main/java/com/dre0059/articleprocessor/model/DocumentMetadata.java @@ -1,37 +0,0 @@ -package com.dre0059.articleprocessor.model; - -import jakarta.persistence.*; - -import java.util.ArrayList; -import java.util.List; - -@Entity // DBS table -@Table (name = "DOCUMENT_METADATA", uniqueConstraints = @UniqueConstraint(columnNames = "title")) -public class DocumentMetadata { - @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) // ID is generated automatically - private Long id; - - private String title; - - @ElementCollection // pomocna tabulka authors - private List<String> authors = new ArrayList<>(); - - // needed for Hibernate for right instances in DBS - public DocumentMetadata() {} - - public DocumentMetadata(String title, List<String> authors) { - this.title = title; - this.authors = authors; - } - - public Long getId(){ - return id; - } - public String getTitle(){ - return title; - } - public List<String> getAuthors(){ - return authors; - } -} diff --git a/src/main/java/com/dre0059/articleprocessor/model/Dokument.java b/src/main/java/com/dre0059/articleprocessor/model/Dokument.java index ec852f0bcce504320b1fbfbb770667deafd58c2c..7dfcd4bee4bd0911807e872fa953d317bb2caf0a 100644 --- a/src/main/java/com/dre0059/articleprocessor/model/Dokument.java +++ b/src/main/java/com/dre0059/articleprocessor/model/Dokument.java @@ -1,6 +1,7 @@ package com.dre0059.articleprocessor.model; import jakarta.persistence.*; +import org.hibernate.annotations.Cascade; import java.util.ArrayList; import java.util.List; @@ -20,6 +21,7 @@ public class Dokument { private Integer publicationYear; private String doi; + // @Lob for huge text //@Column(name = "abstractText", columnDefinition = "TEXT") //private String abstractText; @@ -36,6 +38,7 @@ public class Dokument { inverseJoinColumns = @JoinColumn(name = "ID_author") ) + @Cascade(org.hibernate.annotations.CascadeType.ALL) private List<Author> authors = new ArrayList<>(); public Dokument() {} @@ -62,4 +65,24 @@ public class Dokument { public void setAuthors(List<Author> authors) { this.authors = authors; } public void setTitle(String title) { this.title = title; } + + public void setPublicationYear(Integer publicationYear) { + this.publicationYear = publicationYear; + } + + public void setDoi(String doi) { + this.doi = doi; + } + + public void setPages(Integer pages) { + this.pages = pages; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public void setReferences(List<Reference> references) { + this.references = references; + } } diff --git a/src/main/java/com/dre0059/articleprocessor/repository/AuthorRepository.java b/src/main/java/com/dre0059/articleprocessor/repository/AuthorRepository.java index 6815efaac8b76d05b88ed48cf25aa41271fb5b1f..42aca26c7b60273a0e49e803cc725620dc02454a 100644 --- a/src/main/java/com/dre0059/articleprocessor/repository/AuthorRepository.java +++ b/src/main/java/com/dre0059/articleprocessor/repository/AuthorRepository.java @@ -2,11 +2,16 @@ package com.dre0059.articleprocessor.repository; import com.dre0059.articleprocessor.model.Author; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; import org.springframework.stereotype.Repository; import java.util.Optional; @Repository public interface AuthorRepository extends JpaRepository<Author, Long> { - Optional<Author> findByNameAndSurname(String name, String surname); + + @Query("SELECT a FROM Author a WHERE a.lastName = :lastName AND SUBSTRING(a.firstName, 1, 1) = SUBSTRING(:firstName, 1, 1)") + Optional<Author> findByFullName(@Param("lastName") String lastName, @Param("firstName") String firstName); + } diff --git a/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java b/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java index fd6a2a9c68af121185ca93b8ba11b1565ed18fff..f3004c7e1622ea7da270ef20603336afe6e77493 100644 --- a/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java +++ b/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java @@ -3,12 +3,17 @@ package com.dre0059.articleprocessor.repository; import com.dre0059.articleprocessor.model.*; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; import org.springframework.stereotype.Repository; +import java.util.List; import java.util.Optional; // uklada extrahovane data @Repository public interface DocumentRepository extends JpaRepository<Dokument, Long> { - //Optional<Dokument> findByTitleAndAuthorsSurname(String title, String surname); + + @Query("SELECT COUNT(d) > 0 FROM Dokument d JOIN d.authors a WHERE d.title = :title AND a IN :authors") + boolean existsByTitleAndAuthorsIn(@Param("title") String title, @Param("authors") List<Author> authors); } diff --git a/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java b/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java index 1dc1ff4c4d7dbb7d7f6e2767c9fd410246ccfede..81597b584fd22620985400c5c39adfe3c9d79523 100644 --- a/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java +++ b/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java @@ -7,8 +7,10 @@ import com.dre0059.articleprocessor.repository.DocumentRepository; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import javax.swing.text.html.Option; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -43,7 +45,7 @@ public class HeaderService { this.publisher = this.parseHeaderFields(header, "publisher"); if(this.parseHeaderFields(header, "year").equals("Not found")){ - this.year = -1; + this.year = 0; } if(this.parseHeaderFields(header, "pages").equals("Not found")){ this.pages = 0; @@ -54,10 +56,15 @@ public class HeaderService { authorList = this.saveAuthorNameAndSurname(this.author); } - for(Author author : authorList){ - authorRepository.save(author); + authorRepository.saveAll(authorList); + + // check duplicity of the document + if(documentRepository.existsByTitleAndAuthorsIn(title, authorList)){ + System.out.println("Document with this title and authors already exist"); + return; } + Dokument dokument = new Dokument(title, year, doi, pages, publisher); dokument.setAuthors(authorList); @@ -88,6 +95,7 @@ public class HeaderService { String[] nameParts = fullName.split(","); String firstName; + String lastName = nameParts[1]; if(nameParts.length > 2){ // have two names firstName = nameParts[0] + " " + nameParts[2]; @@ -95,7 +103,16 @@ public class HeaderService { firstName = nameParts[0]; } - authors.add(new Author(nameParts[1], firstName)); + // check if author already exists + Optional<Author> existingAuthor = authorRepository.findByFullName(lastName, firstName); + if (existingAuthor.isPresent()) { + authors.add(existingAuthor.get()); + } else { + Author newAuthor = new Author(lastName, firstName); + authors.add(newAuthor); + } + + //authors.add(new Author(lastName, firstName)); } return authors; diff --git a/src/main/java/com/dre0059/articleprocessor/service/TEIparser.java b/src/main/java/com/dre0059/articleprocessor/service/TEIparser.java index ccbdbb8612f939fa1c036107f046463199cada58..dbe9378219094cd9786e6aa26c2f0b3ea58c3a06 100644 --- a/src/main/java/com/dre0059/articleprocessor/service/TEIparser.java +++ b/src/main/java/com/dre0059/articleprocessor/service/TEIparser.java @@ -40,7 +40,7 @@ public class TEIparser { referencedDocument.setTitle(title); Author author = new Author(); - author.setSurname(authorSurname); + author.setLastname(authorSurname); referencedDocument.setAuthors(List.of(author)); Dokument savedReferencedDocument = documentService.saveDocument(referencedDocument);