Skip to content
Snippets Groups Projects
Commit 046b0bf6 authored by dre0059's avatar dre0059
Browse files

Fulfill REFERENCES table

parent 6b367d34
Branches
No related merge requests found
...@@ -59,7 +59,7 @@ public class FileUploadController { ...@@ -59,7 +59,7 @@ public class FileUploadController {
referenceService.extractReferences(references); referenceService.extractReferences(references);
System.out.println(header); System.out.println(header);
System.out.println(references); //System.out.println(references);
tmpFile.delete(); tmpFile.delete();
......
...@@ -6,6 +6,9 @@ import org.hibernate.annotations.Cascade; ...@@ -6,6 +6,9 @@ import org.hibernate.annotations.Cascade;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
// TODO : int / boolean - či je PDF nahraté alebo je to dokument len z referencie
// 1. references -
@Entity @Entity
@Table(name = "documents") @Table(name = "documents")
public class Dokument { public class Dokument {
......
...@@ -22,6 +22,16 @@ public interface DocumentRepository extends JpaRepository<Dokument, Long> { ...@@ -22,6 +22,16 @@ public interface DocumentRepository extends JpaRepository<Dokument, Long> {
"AND a.lastName IN :lastNames" "AND a.lastName IN :lastNames"
) )
boolean existsByTitleAndAuthorsIn(@Param("title") String title, @Param("lastNames") List<String> lastNames); boolean existsByTitleAndAuthorsIn(@Param("title") String title, @Param("lastNames") List<String> lastNames);
@Query(
"SELECT d FROM Dokument d " +
"JOIN d.authors a " +
"WHERE d.title = :title " +
"AND a.lastName IN :lastNames"
)
Optional<Dokument> findByTitleAndAuthorsIn(@Param("title") String title, @Param("lastNames") List<String> lastNames);
} }
/* /*
......
...@@ -15,7 +15,8 @@ import java.util.regex.Pattern; ...@@ -15,7 +15,8 @@ import java.util.regex.Pattern;
// TODO : // TODO :
// 1. VALIDATE author based on surname and first INITIAL of the firstName. // 1. VALIDATE author based on surname and first INITIAL of the firstName.
// SOLUTION : change keys of the map on surname and first initial and compare it with surname and first initial of author // SOLUTION : change keys of the map on surname and first initial and compare it with surname and first initial of author
// 2. // 2. dve mená autora nesprávne ukladá (priezivsko neuloží, zistiť teda formát aby sa správne ukladalo)
// 3. ukladá viac krát meno toho istého autora, zistiť prečo !!!
@Service @Service
public class HeaderService { public class HeaderService {
...@@ -121,12 +122,12 @@ public class HeaderService { ...@@ -121,12 +122,12 @@ public class HeaderService {
String[] nameParts = fullName.split(","); String[] nameParts = fullName.split(",");
String firstName; String firstName;
String lastName = nameParts[1].trim(); String lastName = nameParts[0].trim();
if(nameParts.length > 2){ if(nameParts.length > 2){
// have two names // have two names
firstName = nameParts[0].trim() + " " + nameParts[2].trim(); firstName = nameParts[1].trim() + " " + nameParts[2].trim();
} else { } else {
firstName = nameParts[0].trim(); firstName = nameParts[1].trim();
} }
String authorKey = lastName.toLowerCase() + "," + firstName.toLowerCase(); String authorKey = lastName.toLowerCase() + "," + firstName.toLowerCase();
......
...@@ -21,44 +21,40 @@ import javax.xml.xpath.XPathFactory; ...@@ -21,44 +21,40 @@ import javax.xml.xpath.XPathFactory;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
// TODO : // TODO :
// 1. uložiť prepojenie toDocument a fromDocument do tabuľky referencie // 1. uložiť prepojenie toDocument a fromDocument do tabuľky referencie
// 2. vytiahnuť orderNumber z referencie (toto riešiť cez GROBID) // 2. vytiahnuť orderNumber z referencie (toto riešiť cez GROBID)
// 3. aktuálne sa mi toDocument ukladá vždy ako nový.. ja ho potrebujem vyhľadať a na základe toho uložiť alebo prepojiť // 3. aktuálne sa mi toDocument ukladá vždy ako nový.. ja ho potrebujem vyhľadať a na základe toho uložiť alebo prepojiť
// 4. uložiť záznam do tabuľky references
// 5. ak už bolo PDF raz uložené, uloží sa mi "null" článok, prepojený s autormi - VYRIESIT
@Service @Service
public class ReferenceService { public class ReferenceService {
private final DocumentRepository documentRepository; private final DocumentRepository documentRepository;
private final AuthorRepository authorRepository; private final AuthorRepository authorRepository;
private final ReferenceRepository referenceRepository;
//private String title;
private Integer year;
private String doi;
private String abstractText;
private Integer pages;
private String publisher;
private String author;
private List<Author> authorList = new ArrayList<>();
private Dokument fromDocument; private Dokument fromDocument;
private Dokument toDocument; private Dokument toDocument;
@Autowired @Autowired
public ReferenceService(DocumentRepository documentRepository, AuthorRepository authorRepository) { public ReferenceService(DocumentRepository documentRepository, AuthorRepository authorRepository, ReferenceRepository referenceRepository) {
this.documentRepository = documentRepository; this.documentRepository = documentRepository;
this.authorRepository = authorRepository; this.authorRepository = authorRepository;
this.referenceRepository = referenceRepository;
} }
public void setFromDocument(Dokument fromDocument) { public void setFromDocument(Dokument fromDocument) {
this.fromDocument = fromDocument; this.fromDocument = fromDocument;
System.out.println("From document: " + fromDocument.getTitle());
}
public void setToDocument(Dokument doc){
this.toDocument = doc;
} }
public void extractReferences(String xmlTeiReferences) { public void extractReferences(String xmlTeiReferences) {
...@@ -84,18 +80,34 @@ public class ReferenceService { ...@@ -84,18 +80,34 @@ public class ReferenceService {
NodeList biblNodes = (NodeList) xpath.evaluate("//tei:biblStruct", doc, XPathConstants.NODESET); NodeList biblNodes = (NodeList) xpath.evaluate("//tei:biblStruct", doc, XPathConstants.NODESET);
// for each reference
for (int i = 0; i < biblNodes.getLength(); i++) { for (int i = 0; i < biblNodes.getLength(); i++) {
Node biblNode = biblNodes.item(i); Node biblNode = biblNodes.item(i);
Dokument toDokument = new Dokument(); Dokument referencedDocument = new Dokument();
// Extract title - toDocument // Extract title - toDocument
String title = xpath.evaluate(".//tei:title[@level='m' or @level='a']", biblNode); String title = xpath.evaluate(".//tei:title[@level='m' or @level='a']", biblNode);
toDokument.setTitle(title); referencedDocument.setTitle(title);
// Extract year of publication
String yearStr = xpath.evaluate(".//tei:date[@type='published']/@when", biblNode);
if (yearStr != null && !yearStr.isEmpty()) {
try {
referencedDocument.setPublicationYear(Integer.valueOf(yearStr));
} catch (NumberFormatException e) {
System.out.println("Error during converting year." + yearStr);
}
}
// Extract publisher
String publisher = xpath.evaluate(".//tei:publisher", biblNode);
referencedDocument.setPublisher(publisher);
// Extract authors // Extract authors
NodeList authorNodes = (NodeList) xpath.evaluate(".//tei:author/tei:persName", biblNode, XPathConstants.NODESET); NodeList authorNodes = (NodeList) xpath.evaluate(".//tei:author/tei:persName", biblNode, XPathConstants.NODESET);
List<Author> authors = new ArrayList<>(); List<Author> authors = new ArrayList<>();
// each author in a reference
for (int j = 0; j < authorNodes.getLength(); j++) { for (int j = 0; j < authorNodes.getLength(); j++) {
Node authorNode = authorNodes.item(j); Node authorNode = authorNodes.item(j);
...@@ -112,28 +124,34 @@ public class ReferenceService { ...@@ -112,28 +124,34 @@ public class ReferenceService {
authors.add(newAuthor); authors.add(newAuthor);
authorMap.put(authorKey, newAuthor); authorMap.put(authorKey, newAuthor);
} }
}
referencedDocument.setAuthors(authors);
toDokument.setAuthors(authors); List<String> authorLastNames= authors.stream().map(Author::getLastname).toList();
// Extract year of publication // check if document exists in dbs
String yearStr = xpath.evaluate(".//tei:date[@type='published']/@when", biblNode); boolean exists = documentRepository.existsByTitleAndAuthorsIn(title, authorLastNames);
if (yearStr != null && !yearStr.isEmpty()) {
try {
toDokument.setPublicationYear(Integer.valueOf(yearStr));
} catch (NumberFormatException e) {
System.out.println("Error during converting year." + yearStr);
}
}
// Extract publisher // check whether the document is already saved in DBS
String publisher = xpath.evaluate(".//tei:publisher", biblNode); if(exists){
toDokument.setPublisher(publisher); System.out.println("Document with this title and authors already exist");
this.documentRepository.save(toDokument); // vyhľadaj dokument podľa TITLE alebo AUTORA a nastav ho ako toDokument
this.authorRepository.saveAll(authors);
referencedDocument = documentRepository.findByTitleAndAuthorsIn(title, authorLastNames)
.orElseThrow(() -> new IllegalStateException("Document should exist but was NOT FOUND."));
this.toDocument = referencedDocument;
System.out.println("Document already exists in database : " + referencedDocument.getTitle() + " with ID : " + referencedDocument.getId());
} else {
// create new dokument
this.setToDocument(referencedDocument);
this.documentRepository.save(toDocument);
this.authorRepository.saveAll(authors);
} }
Reference reference = new Reference("[i]", fromDocument, toDocument);
referenceRepository.save(reference);
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment