Skip to content
Snippets Groups Projects
Select Git revision
  • 53026f15e7cfab48e4f75acb75750fa0f4f2de18
  • main default protected
2 results

README.md

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    citavi_export.R 2.68 KiB
    # Function to automatically install required R-libraries
    library2 <- function(x){
      x = toString(substitute(x))
      if(!require(x,character.only=TRUE)){
        install.packages(x, repos = "http://cran.us.r-project.org")
        base::library(x,character.only=TRUE)
      }}
    
    # Automatically install R-libraries
    library2(RSQLite)
    library2(readtext)
    library2(purrr)
    library2(stringr)
    library2(tibble)
    library2(readr)
    library2(dplyr)
    
    # Verbosly print tibbles for debbuging purposes 
    options(tibble.print_max = 5, tibble.print_min = 5,tibble.width=8000)
    
    # Get the citavi project file path as user input 
    filepath <- ""
    if (.Platform$OS.type == "windows")
      filepath <- choose.files(default = "", caption = "Select the Citavi Project file as data source...", filters  = cbind("Citavi Project V6  (*.ctv6)", "*.ctv6"))
    
    # if a valid input is given
    if (length(filepath) > 0 ){
    
    # prepare citavi path for pdfs
    citavi_attachments_dir <- paste0(dirname(filepath), "/", "Citavi Attachments", "/")
    
    # Create an ephemeral in-memory RSQLite database referencing the citavi-project
    con <- dbConnect(RSQLite::SQLite(), filepath)
    
    # Get link between pdf_filepath, wos_doi and doi from tables
    res <- dbSendQuery(con, "SELECT :citavi_attachments || json_extract(l.Address, '$.UriString') as pdf_filepath, substr( r.Notes,instr( r.Notes,'WOS:')+4) as wos_doi, r.DOI as doi
    FROM Reference r LEFT OUTER JOIN Location l ON r.ID = l.ReferenceID and json_extract(l.Address, '$.LinkedResourceType') LIKE 1;", params = list(citavi_attachments = citavi_attachments_dir))
    fetched <- dbFetch(res)
    print("Fetched the Citavi Attachments and the corresponding doi.")
    
    # create result dir if it not exists
    result_dir <- file.path(dirname(filepath), "result")
    if (!dir.exists(result_dir)){
      dir.create(result_dir)
    }
    
    # function to retrieve text and store it in result/<id>.txt
    pdf_to_text_file <- function(id, file_path) {
      if (is.na(file_path) || str_length(file_path) <= 0){
        return(FALSE)
      }
      
      # read pdf into string
      #readtext_result <- readtext::readtext(file_path, encoding = "UTF-8")
      
      # save string into file with filepath result/<id>.txt
      #file_name <- paste0(dirname(filepath), "/result/", id, ".txt")
      #readr::write_file(readtext_result$text, file_name)
    
      
      return(TRUE)
      
    }
    
    # retrieve pdf content string and store it
    tfetched <- as_tibble(fetched) %>%
    #  top_n(50,wos_doi) %>% # testing only!!
      dplyr::mutate(text = purrr::map2_chr(wos_doi, pdf_filepath, ~pdf_to_text_file(.x, .y))) %>%
      readr::write_csv2(paste0(dirname(filepath), "/result.csv"), append = FALSE, col_names = TRUE)
    
    print(paste0("Exported extracted text in", dirname(filepath), "'result/<id>.txt' and 'result.csv'."))
    
    dbClearResult(res)
    # Disconnect from the database
    dbDisconnect(con)
    }