Select Git revision
image.component.spec.ts
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
citavi_export.R 2.68 KiB
# Function to automatically install required R-libraries
library2 <- function(x){
x = toString(substitute(x))
if(!require(x,character.only=TRUE)){
install.packages(x, repos = "http://cran.us.r-project.org")
base::library(x,character.only=TRUE)
}}
# Automatically install R-libraries
library2(RSQLite)
library2(readtext)
library2(purrr)
library2(stringr)
library2(tibble)
library2(readr)
library2(dplyr)
# Verbosly print tibbles for debbuging purposes
options(tibble.print_max = 5, tibble.print_min = 5,tibble.width=8000)
# Get the citavi project file path as user input
filepath <- ""
if (.Platform$OS.type == "windows")
filepath <- choose.files(default = "", caption = "Select the Citavi Project file as data source...", filters = cbind("Citavi Project V6 (*.ctv6)", "*.ctv6"))
# if a valid input is given
if (length(filepath) > 0 ){
# prepare citavi path for pdfs
citavi_attachments_dir <- paste0(dirname(filepath), "/", "Citavi Attachments", "/")
# Create an ephemeral in-memory RSQLite database referencing the citavi-project
con <- dbConnect(RSQLite::SQLite(), filepath)
# Get link between pdf_filepath, wos_doi and doi from tables
res <- dbSendQuery(con, "SELECT :citavi_attachments || json_extract(l.Address, '$.UriString') as pdf_filepath, substr( r.Notes,instr( r.Notes,'WOS:')+4) as wos_doi, r.DOI as doi
FROM Reference r LEFT OUTER JOIN Location l ON r.ID = l.ReferenceID and json_extract(l.Address, '$.LinkedResourceType') LIKE 1;", params = list(citavi_attachments = citavi_attachments_dir))
fetched <- dbFetch(res)
print("Fetched the Citavi Attachments and the corresponding doi.")
# create result dir if it not exists
result_dir <- file.path(dirname(filepath), "result")
if (!dir.exists(result_dir)){
dir.create(result_dir)
}
# function to retrieve text and store it in result/<id>.txt
pdf_to_text_file <- function(id, file_path) {
if (is.na(file_path) || str_length(file_path) <= 0){
return(FALSE)
}
# read pdf into string
#readtext_result <- readtext::readtext(file_path, encoding = "UTF-8")
# save string into file with filepath result/<id>.txt
#file_name <- paste0(dirname(filepath), "/result/", id, ".txt")
#readr::write_file(readtext_result$text, file_name)
return(TRUE)
}
# retrieve pdf content string and store it
tfetched <- as_tibble(fetched) %>%
# top_n(50,wos_doi) %>% # testing only!!
dplyr::mutate(text = purrr::map2_chr(wos_doi, pdf_filepath, ~pdf_to_text_file(.x, .y))) %>%
readr::write_csv2(paste0(dirname(filepath), "/result.csv"), append = FALSE, col_names = TRUE)