diff --git a/src/main/java/com/dre0059/articleprocessor/GrobidClient.java b/src/main/java/com/dre0059/articleprocessor/GrobidClient.java index 91ff7869dbdad1e3183f38e5693f897b8c43fe94..4b52d743885498178c707895bd0d54918dee2100 100644 --- a/src/main/java/com/dre0059/articleprocessor/GrobidClient.java +++ b/src/main/java/com/dre0059/articleprocessor/GrobidClient.java @@ -21,24 +21,40 @@ public class GrobidClient { } // get METADATA of the file - public Mono<String> processHeader(File pdfFile){ // Mono - vráti jeden string, vĂ˝sledok je JSON + public String processHeader(File pdfFile){ // Mono - vráti jeden string, vĂ˝sledok je JSON return webClient.post() .uri("/api/processHeaderDocument") .contentType(MediaType.MULTIPART_FORM_DATA) .body(BodyInserters.fromMultipartData("input", new FileSystemResource(pdfFile))) - .attribute("consolidateHeader", 1) + .attribute("consolidateHeader", 1) // MoĹľnosĹĄ na zjednotenie hlaviÄŤky + .attribute("includeRawAffiliations", 1) // PrĂpadne pridaĹĄ ÄŹalšie parametre, ak Grobid podporuje takĂ©to rozšĂrenie + .attribute("includeRawCopyrights", 1) // PrĂpadne pridaĹĄ ÄŹalšie parametre, ak Grobid podporuje takĂ©to rozšĂrenie + //.attribute("includeReferences", 1) // MoĹľnosĹĄ pridaĹĄ aj referencie priamo do hlaviÄŤky .retrieve() - .bodyToMono(String.class); + .bodyToMono(String.class) + .block(); // returns String instead of Mono<String> } + public String processFullMetadata(File pdfFile) { + return webClient.post() + .uri("/api/processFullMetadata") + .contentType(MediaType.MULTIPART_FORM_DATA) + .body(BodyInserters.fromMultipartData("input", new FileSystemResource(pdfFile))) + .retrieve() + .bodyToMono(String.class) + .block(); + } + + // spracuje REFERENCIE z PDF - public Mono<String> processReferences(File pdfFile){ + public String processReferences(File pdfFile){ return webClient.post() .uri("/api/processReferences") .contentType(MediaType.MULTIPART_FORM_DATA) .body(BodyInserters.fromMultipartData("input", new FileSystemResource(pdfFile))) .retrieve() - .bodyToMono(String.class); + .bodyToMono(String.class) + .block(); } } \ No newline at end of file diff --git a/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java b/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java index 91bfab51f108b78a6a1e55565462e7ec38a97a28..3105cec2c99784647a87d1273921e7abf7ce3059 100644 --- a/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java +++ b/src/main/java/com/dre0059/articleprocessor/controller/FileUploadController.java @@ -2,6 +2,7 @@ package com.dre0059.articleprocessor.controller; import com.dre0059.articleprocessor.GrobidClient; import com.dre0059.articleprocessor.model.DocumentMetadata; +import com.dre0059.articleprocessor.service.HeaderService; import com.dre0059.articleprocessor.service.MetadataParser; import com.dre0059.articleprocessor.repository.DocumentRepository; import com.dre0059.articleprocessor.repository.ReferenceRepository; @@ -17,6 +18,9 @@ import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; import reactor.core.publisher.Mono; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -33,15 +37,20 @@ import java.util.Map; @Controller @RequestMapping("/api/grobid") public class FileUploadController { - private static final Logger logger = LoggerFactory.getLogger(FileUploadController.class); private final GrobidClient grobidClient; - private final DocumentRepository metadataRepository; - private final ReferenceRepository referenceRepository; + private final HeaderService headerService; - public FileUploadController(GrobidClient grobidClient, DocumentRepository metadataRepository, ReferenceRepository referenceRepository) { + //private final DocumentRepository metadataRepository; + //private final ReferenceRepository referenceRepository; + //private final MetadataParser metadataParser; + //private static final Logger logger = LoggerFactory.getLogger(FileUploadController.class); + + public FileUploadController(GrobidClient grobidClient, HeaderService headerService/*, DocumentRepository metadataRepository, ReferenceRepository referenceRepository, MetadataParser metadataParser*/) { this.grobidClient = grobidClient; - this.metadataRepository = metadataRepository; - this.referenceRepository = referenceRepository; + this.headerService = headerService; + // this.metadataRepository = metadataRepository; + //this.referenceRepository = referenceRepository; + //this.metadataParser = metadataParser; } @GetMapping("/upload") @@ -51,51 +60,41 @@ public class FileUploadController { @PostMapping("/upload") @ResponseBody - public Mono<ResponseEntity<Map<String, String>>> handleFileUpload(@RequestParam("file") MultipartFile file) { - logger.info("Received file: {}", file.getOriginalFilename()); - - return Mono.fromCallable(() -> { - Path tempFile = Files.createTempFile("upload-", ".pdf"); - file.transferTo(tempFile.toFile()); - return tempFile.toFile(); - }).flatMap(pdfFile -> { - Mono<String> metadataMono = grobidClient.processHeader(pdfFile); - Mono<String> referencesMono = grobidClient.processReferences(pdfFile); - - return Mono.zip(metadataMono, referencesMono) - .flatMap(result -> { - String metadataJson = result.getT1(); - String referencesXml = result.getT2(); - - String title = MetadataParser.extractTitle(metadataJson); - List<String> authors = MetadataParser.extractAuthors(metadataJson); - - return Mono.justOrEmpty(metadataRepository.findByTitle(title)) - .map(existing -> { - logger.warn("Article with title '{}' already exists!", title); - return ResponseEntity.status(HttpStatus.CONFLICT) - .body(Map.of("error", "Article is already in database.")); - }) - .switchIfEmpty(Mono.fromCallable(() -> { - DocumentMetadata doc = new DocumentMetadata(title, authors); - metadataRepository.save(doc); - - // Spracovanie referenciĂ cez TEIparser - TEIparser teiParser = new TEIparser(referenceRepository); - teiParser.parseAndSaveToDB(referencesXml, doc); - - Map<String, String> response = new HashMap<>(); - response.put("metadata", metadataJson); - response.put("references", referencesXml); - - return ResponseEntity.ok(response); - })); - }) - .onErrorResume(e -> { - logger.error("Error processing PDF", e); - return Mono.just(ResponseEntity.internalServerError().body(Map.of("error", "Failed to process PDF"))); - }); - }); + public ResponseEntity<String> handleFileUpload(@RequestParam("file") MultipartFile file) { + if (file.isEmpty()) { + return ResponseEntity.badRequest().body("No file uploaded!"); + } + + System.out.println("Received file: " + file.getOriginalFilename()); + + try { + // Vytvorenie doÄŤasnĂ©ho sĂşboru + File tmpFile = File.createTempFile("article-", ".pdf"); + + // save data from file to tmpFile + try(FileOutputStream stream = new FileOutputStream(tmpFile)) { + stream.write(file.getBytes()); + } catch (IOException e) { + return ResponseEntity.internalServerError().body("FAILURE - cannot process file : " + e.getMessage()); + } + + String header = grobidClient.processHeader(tmpFile); + String references = grobidClient.processReferences(tmpFile); + + headerService.processHeader(header); + + System.out.println(header); + //System.out.println(references); + + tmpFile.delete(); + + return ResponseEntity.ok(header); + + + } catch (IOException e) { + System.out.println("Chyba pri vytváranĂ doÄŤasnĂ©ho sĂşboru" + e); + return ResponseEntity.status(500).body("Chyba pri vytváranĂ doÄŤasnĂ©ho sĂşboru."); + } } diff --git a/src/main/java/com/dre0059/articleprocessor/model/Author.java b/src/main/java/com/dre0059/articleprocessor/model/Author.java index 69329b9267732f59f2770c3d3c9cabdfbe8a0b16..61636fcbc6e9f9e9a4f609327e1a2ca5cf118449 100644 --- a/src/main/java/com/dre0059/articleprocessor/model/Author.java +++ b/src/main/java/com/dre0059/articleprocessor/model/Author.java @@ -17,7 +17,7 @@ public class Author { private String surname; @ManyToMany(mappedBy = "authors") - private List<Document> documents = new ArrayList<Document>(); + private List<Dokument> documents = new ArrayList<Dokument>(); public Author(){} public Author(String name, String surname) { @@ -28,9 +28,9 @@ public class Author { public Long getId() { return id; } public String getName() { return name; } public String getSurname() { return surname; } - public List<Document> getDocuments() { return documents; } + public List<Dokument> getDocuments() { return documents; } public void setName(String name) { this.name = name; } public void setSurname(String surname) { this.surname = surname; } - public void setDocuments(List<Document> documents) { this.documents = documents; } + public void setDocuments(List<Dokument> documents) { this.documents = documents; } } diff --git a/src/main/java/com/dre0059/articleprocessor/model/Document.java b/src/main/java/com/dre0059/articleprocessor/model/Dokument.java similarity index 67% rename from src/main/java/com/dre0059/articleprocessor/model/Document.java rename to src/main/java/com/dre0059/articleprocessor/model/Dokument.java index a0170291c384ae8d9af94cf13d6ad7ac95ec5ce1..ec852f0bcce504320b1fbfbb770667deafd58c2c 100644 --- a/src/main/java/com/dre0059/articleprocessor/model/Document.java +++ b/src/main/java/com/dre0059/articleprocessor/model/Dokument.java @@ -1,26 +1,27 @@ package com.dre0059.articleprocessor.model; import jakarta.persistence.*; -import org.hibernate.annotations.CollectionId; -import com.dre0059.articleprocessor.model.*; import java.util.ArrayList; import java.util.List; @Entity @Table(name = "documents") -public class Document { +public class Dokument { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) private Long id; + @Column(name = "title") private String title; - private Integer year; + + @Column(name = "publication_year") + private Integer publicationYear; private String doi; - @Column(name = "abstractText") - private String abstractText; + //@Column(name = "abstractText", columnDefinition = "TEXT") + //private String abstractText; private Integer pages; private String publisher; @@ -34,29 +35,31 @@ public class Document { joinColumns = @JoinColumn(name = "ID_document"), inverseJoinColumns = @JoinColumn(name = "ID_author") ) + private List<Author> authors = new ArrayList<>(); - public Document() {} + public Dokument() {} - public Document(String title, Integer year, String doi, String abstractText, Integer pages, String publisher) { + public Dokument(String title, Integer year, String doi, /*String abstractText,*/ Integer pages, String publisher) { this.title = title; - this.year = year; + this.publicationYear = year; this.doi = doi; - this.abstractText = abstractText; + //this.abstractText = abstractText; this.pages = pages; this.publisher = publisher; } public Long getId() { return id; } public String getTitle() { return title; } - public Integer getYear() { return year; } + public Integer getYear() { return publicationYear; } public String getDoi() { return doi; } - public String getAbstractText() { return abstractText; } + //public String getAbstractText() { return abstractText; } public Integer getPages() { return pages; } public String getPublisher() { return publisher; } public List<Reference> getReferences() { return references; } public List<Author> getAuthors() { return authors; } - public void setAuthors(List<Author> authors) { this.authors = authors; } + public void setAuthors(List<Author> authors) { this.authors = authors; } + public void setTitle(String title) { this.title = title; } } diff --git a/src/main/java/com/dre0059/articleprocessor/model/Reference.java b/src/main/java/com/dre0059/articleprocessor/model/Reference.java index 5aae7ce8ab3e11e2a2e40336f107740a94321190..6bb6af9288fdd0460b06f8adbe39b2db079e061c 100644 --- a/src/main/java/com/dre0059/articleprocessor/model/Reference.java +++ b/src/main/java/com/dre0059/articleprocessor/model/Reference.java @@ -2,9 +2,6 @@ package com.dre0059.articleprocessor.model; import jakarta.persistence.*; -import javax.print.Doc; -import java.util.List; - @Entity @Table(name = "references") public class Reference { @@ -16,26 +13,26 @@ public class Reference { private String orderNumber; @ManyToOne(cascade = CascadeType.ALL) - @JoinColumn(name = "ID_fromDocument") - private Document fromDocument; + @JoinColumn(name = "fromDocument") + private Dokument fromDocument; @ManyToOne(cascade = CascadeType.ALL) - @JoinColumn(name = "ID_toDocument") - private Document toDocument; + @JoinColumn(name = "toDocument") + private Dokument toDocument; public Reference() {} - public Reference(String orderNumber, Document fromDocument, Document toDocument) { + public Reference(String orderNumber, Dokument fromDocument, Dokument toDocument) { this.orderNumber = orderNumber; this.fromDocument = fromDocument; this.toDocument = toDocument; } public String getOrderNumber() { return orderNumber; } - public Document getFromDocument() { return fromDocument; } - public Document getToDocument() { return toDocument; } + public Dokument getFromDocument() { return fromDocument; } + public Dokument getToDocument() { return toDocument; } public Long getId() { return id; } - public void setFromDocument(Document fromDocument) { this.fromDocument = fromDocument; } - public void setToDocument(Document toDocument) { this.toDocument = toDocument; } + public void setFromDocument(Dokument fromDocument) { this.fromDocument = fromDocument; } + public void setToDocument(Dokument toDocument) { this.toDocument = toDocument; } public void setOrderNumber(String orderNumber) { this.orderNumber = orderNumber; } } diff --git a/src/main/java/com/dre0059/articleprocessor/repository/AuthorRepository.java b/src/main/java/com/dre0059/articleprocessor/repository/AuthorRepository.java index 846a3fa78e981ac0815c58bd8f1728b7fe6756b8..6815efaac8b76d05b88ed48cf25aa41271fb5b1f 100644 --- a/src/main/java/com/dre0059/articleprocessor/repository/AuthorRepository.java +++ b/src/main/java/com/dre0059/articleprocessor/repository/AuthorRepository.java @@ -8,5 +8,5 @@ import java.util.Optional; @Repository public interface AuthorRepository extends JpaRepository<Author, Long> { - Optional<Author> findByFullname(String name, String surname); + Optional<Author> findByNameAndSurname(String name, String surname); } diff --git a/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java b/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java index d68a201afc812d15c1175fcaa247b84231f5880a..fd6a2a9c68af121185ca93b8ba11b1565ed18fff 100644 --- a/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java +++ b/src/main/java/com/dre0059/articleprocessor/repository/DocumentRepository.java @@ -1,15 +1,14 @@ package com.dre0059.articleprocessor.repository; -import com.dre0059.articleprocessor.model.DocumentMetadata; +import com.dre0059.articleprocessor.model.*; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.stereotype.Repository; -import org.w3c.dom.Document; import java.util.Optional; // uklada extrahovane data @Repository -public interface DocumentRepository extends JpaRepository<Document, Long> { - Optional<Document> findByTitleAndAuthorsSurname(String title, String surname); +public interface DocumentRepository extends JpaRepository<Dokument, Long> { + //Optional<Dokument> findByTitleAndAuthorsSurname(String title, String surname); } diff --git a/src/main/java/com/dre0059/articleprocessor/service/DocumentService.java b/src/main/java/com/dre0059/articleprocessor/service/DocumentService.java index 9b3c6cadf1a959705787e94808612185bec8f34f..48f8a1f6c895252a2c23a5d3c0e3968af8553825 100644 --- a/src/main/java/com/dre0059/articleprocessor/service/DocumentService.java +++ b/src/main/java/com/dre0059/articleprocessor/service/DocumentService.java @@ -1,4 +1,26 @@ package com.dre0059.articleprocessor.service; +import com.dre0059.articleprocessor.repository.*; +import com.dre0059.articleprocessor.model.*; + +import jakarta.transaction.Transactional; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +@Service public class DocumentService { + @Autowired + private DocumentRepository documentRepository; + @Autowired + private AuthorRepository authorRepository; + + @Transactional + public Dokument saveDocument(Dokument document) { + Dokument dok = new Dokument(); + return dok; + } } diff --git a/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java b/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java new file mode 100644 index 0000000000000000000000000000000000000000..1dc1ff4c4d7dbb7d7f6e2767c9fd410246ccfede --- /dev/null +++ b/src/main/java/com/dre0059/articleprocessor/service/HeaderService.java @@ -0,0 +1,103 @@ +package com.dre0059.articleprocessor.service; + +import com.dre0059.articleprocessor.model.Author; +import com.dre0059.articleprocessor.model.Dokument; +import com.dre0059.articleprocessor.repository.AuthorRepository; +import com.dre0059.articleprocessor.repository.DocumentRepository; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +@Service +public class HeaderService { + + private final DocumentRepository documentRepository; + private final AuthorRepository authorRepository; + + //public Dokument(String title, Integer year, String doi, String abstractText, Integer pages, String publisher) { + + private String title; + private Integer year; + private String doi; + private String abstractText; + private Integer pages; + private String publisher; + private List<Author> authorList = new ArrayList<>(); + + private String author; + + @Autowired + public HeaderService(DocumentRepository documentRepository, AuthorRepository authorRepository) { + this.documentRepository = documentRepository; + this.authorRepository = authorRepository; + } + + public void processHeader(String header){ + this.title = this.parseHeaderFields(header, "title"); + this.doi = this.parseHeaderFields(header, "doi"); + this.abstractText = this.parseHeaderFields(header, "abstract"); + this.publisher = this.parseHeaderFields(header, "publisher"); + + if(this.parseHeaderFields(header, "year").equals("Not found")){ + this.year = -1; + } + if(this.parseHeaderFields(header, "pages").equals("Not found")){ + this.pages = 0; + } + + this.author = this.parseHeaderFields(header, "author"); + if(!this.author.equals("Not found")){ + authorList = this.saveAuthorNameAndSurname(this.author); + } + + for(Author author : authorList){ + authorRepository.save(author); + } + + Dokument dokument = new Dokument(title, year, doi, pages, publisher); + dokument.setAuthors(authorList); + + this.documentRepository.save(dokument); + } + + private String parseHeaderFields(String header, String field){ + String regex = field + "\\s*=\\s*\\{([^}]*)\\}"; + Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); + Matcher matcher = pattern.matcher(header); + + if(matcher.find()){ + return matcher.group(1).trim(); + } else + return "Not found"; // should replace for NULL ? + + } + + private List<Author> saveAuthorNameAndSurname(String author){ + // "and" divide our authors + String[] authorNames = author.split(" and "); + + List<Author> authors = new ArrayList<>(); + + System.out.println("Author is : \n" + author); + + for(String fullName : authorNames){ + String[] nameParts = fullName.split(","); + + String firstName; + if(nameParts.length > 2){ + // have two names + firstName = nameParts[0] + " " + nameParts[2]; + } else { + firstName = nameParts[0]; + } + + authors.add(new Author(nameParts[1], firstName)); + } + + return authors; + } +} diff --git a/src/main/java/com/dre0059/articleprocessor/service/MetadataParser.java b/src/main/java/com/dre0059/articleprocessor/service/MetadataParser.java index 2cfc04bca09983da276d285b2dc06dad49492cc5..1ea90a9e7ffe877efb161fd3afcb19170b557c09 100644 --- a/src/main/java/com/dre0059/articleprocessor/service/MetadataParser.java +++ b/src/main/java/com/dre0059/articleprocessor/service/MetadataParser.java @@ -1,10 +1,7 @@ package com.dre0059.articleprocessor.service; -import com.dre0059.articleprocessor.model.Author; -import com.dre0059.articleprocessor.model.Document; -import com.dre0059.articleprocessor.repository.AuthorRepository; -import com.dre0059.articleprocessor.repository.DocumentRepository; -import com.dre0059.articleprocessor.repository.ReferenceRepository; +import com.dre0059.articleprocessor.model.*; +import com.dre0059.articleprocessor.repository.*; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -27,7 +24,7 @@ public class MetadataParser { this.authorRepository = authorRepository; } - public Document parseBibTeX(String bibtexString) { + public Dokument parseBibTeX(String bibtexString) { // Regular expression pre zĂskanie hodnĂ´t z BibTeX formátu Pattern pattern = Pattern.compile("@.*?\\{.*?,\\s*author\\s*=\\s*\\{(.*?)\\},\\s*title\\s*=\\s*\\{(.*?)\\},\\s*doi\\s*=\\s*\\{(.*?)\\},\\s*abstract\\s*=\\s*\\{(.*?)\\}"); Matcher matcher = pattern.matcher(bibtexString); @@ -40,10 +37,10 @@ public class MetadataParser { List<Author> authors = parseAuthors(authorsRaw); - Document document = new Document(title, null, doi, abstractText, null, null); + Dokument document = new Dokument(title, null, doi, null, null); document.setAuthors(authors); - documentRepository.save(document); + //documentRepository.save(document); for (Author author : authors) { authorRepository.save(author); diff --git a/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java b/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java index 4428ebb5a1cd4df41b09f601031e137d22b34d72..836614114263c7adfd35239d4399afc3203c9b81 100644 --- a/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java +++ b/src/main/java/com/dre0059/articleprocessor/service/ReferenceService.java @@ -1,4 +1,18 @@ package com.dre0059.articleprocessor.service; +import com.dre0059.articleprocessor.model.*; +import com.dre0059.articleprocessor.repository.*; +import jakarta.transaction.Transactional; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +@Service public class ReferenceService { + @Autowired + private ReferenceRepository referenceRepository; + + @Transactional + public Reference saveReference(Reference reference) { + return referenceRepository.save(reference); + } } diff --git a/src/main/java/com/dre0059/articleprocessor/service/TEIparser.java b/src/main/java/com/dre0059/articleprocessor/service/TEIparser.java index b39d4bdcf6f174d850ac4982de367e4c4fbb615f..ccbdbb8612f939fa1c036107f046463199cada58 100644 --- a/src/main/java/com/dre0059/articleprocessor/service/TEIparser.java +++ b/src/main/java/com/dre0059/articleprocessor/service/TEIparser.java @@ -1,9 +1,10 @@ package com.dre0059.articleprocessor.service; -import com.dre0059.articleprocessor.model.DocumentMetadata; +import com.dre0059.articleprocessor.model.Author; +import com.dre0059.articleprocessor.model.Dokument; import com.dre0059.articleprocessor.model.Reference; -import com.dre0059.articleprocessor.repository.DocumentRepository; -import com.dre0059.articleprocessor.repository.ReferenceRepository; +import com.dre0059.articleprocessor.service.*; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.w3c.dom.*; import org.xml.sax.InputSource; @@ -16,80 +17,47 @@ import java.util.List; @Service public class TEIparser { + @Autowired + private ReferenceService referenceService; + @Autowired + private DocumentService documentService; - private final ReferenceRepository referenceRepository; - private final DocumentRepository documentRepository; - - public TEIparser(ReferenceRepository referenceRepository, DocumentRepository documentRepository) { - this.referenceRepository = referenceRepository; - this.documentRepository = documentRepository; - } - - public void parseAndSaveToDB(String xmlContent, DocumentMetadata document) { - try { - List<Reference> references = parseReferencesFromXML(xmlContent, document); - - if (!references.isEmpty()) { - referenceRepository.saveAll(references); - System.out.println("References successfully saved to DB"); - } else { - System.out.println("No valid references found in XML."); - } - } catch (Exception e) { - System.err.println("Error parsing references: " + e.getMessage()); - e.printStackTrace(); - } - } - - private List<Reference> parseReferencesFromXML(String xmlContent, DocumentMetadata document) { - List<Reference> references = new ArrayList<>(); - - try { + public void processReferences(String xmlContent, Dokument parentDocument){ + try{ DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); - dbFactory.setNamespaceAware(true); - DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); - Document doc = dBuilder.parse(new InputSource(new StringReader(xmlContent))); + DocumentBuilder builder = dbFactory.newDocumentBuilder(); + Document doc = builder.parse(new InputSource(new StringReader(xmlContent))); + + NodeList biblioEntries = doc.getElementsByTagName("biblStruct"); - // <biblStruct> - bibliographical information (consists of all needed information) - NodeList biblStructs = doc.getElementsByTagNameNS("*", "biblStruct"); + for(int i = 0; i < biblioEntries.getLength(); i++){ + Element biblEntry = (Element) biblioEntries.item(i); + String title = biblEntry.getElementsByTagName("title").item(0).getTextContent(); + String authorSurname = biblEntry.getElementsByTagName("surname").item(0).getTextContent(); - for (int i = 0; i < biblStructs.getLength(); i++) { - Element bibl = (Element) biblStructs.item(i); + Dokument referencedDocument = new Dokument(); + referencedDocument.setTitle(title); - String title = getTagValueNS("title", bibl, "Unknown Title"); - String publisher = getTagValueNS("publisher", bibl, "Unknown Publisher"); - String year = getTagValueNS("year", bibl, "Unknown Year"); + Author author = new Author(); + author.setSurname(authorSurname); + referencedDocument.setAuthors(List.of(author)); - List<String> authors = new ArrayList<>(); - NodeList authorNodes = bibl.getElementsByTagNameNS("*", "author"); + Dokument savedReferencedDocument = documentService.saveDocument(referencedDocument); - for (int j = 0; j < authorNodes.getLength(); j++) { - Element authorElement = (Element) authorNodes.item(j); - Element persName = (Element) authorElement.getElementsByTagNameNS("*", "persName"); + // save Dokument + Reference reference = new Reference(); + reference.setFromDocument(parentDocument); + reference.setToDocument(savedReferencedDocument); + referenceService.saveReference(reference); - if (persName != null) { - String forename = getTagValueNS("forename", persName, ""); - String surname = getTagValueNS("surname", persName, ""); - if (!forename.isEmpty() || !surname.isEmpty()) { - authors.add(forename + " " + surname); - } - } - } - references.add(new Document(title, year, doi, abstractText, pages, publisher)); } - } catch (Exception e) { - System.err.println("Failed to parse references XML: " + e.getMessage()); + + } catch (Exception e){ e.printStackTrace(); } - - return references; } - private static String getTagValueNS(String tagName, Element element, String defaultValue) { - NodeList nodeList = element.getElementsByTagNameNS("*", tagName); - return (nodeList.getLength() > 0) ? nodeList.item(0).getTextContent().trim() : defaultValue; - } -} +} \ No newline at end of file