diff --git a/Output/README.md b/Output/README.md deleted file mode 100644 index 418fdbecc82cdc61979325e9ca7a5e87604a3a7a..0000000000000000000000000000000000000000 --- a/Output/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# Projekt CiS-Projekt 2021/22 - -Citation network made with **d3.js** - -## Usage -### Input -Json file **json\_text.json** in directory -```json -{ - "nodes": [ - { - "name": <title: string>, - "author": [<author1: string>, <author2: string>, ...], - "year": <date: tring>, - "journal": <journal: string>, - "doi": <doi: string>, - "group": <"input"/"height"/"depth">, - "citations": <citation: int> - }, ... - ], - "links": [ - { - "source": <doi: string>, - "target": <doi: string> - }, ... - ] -} -``` - -### Display the Citation Network -Starting a python web server: -```sh - cd <path to file> &&python3 -m http.server <port> -``` -Access to the server: -[http://0.0.0.0/:\<port\>](http://0.0.0.0/:<port>) - -## Files in Directory -- **index.html**: webpage -- **cn.js**: javascript code for force-directed graph, text elements and legend -- **json_text.json**: example data - - -## Authors -- Katja Ehlers -- Merle Stahl \ No newline at end of file diff --git a/Output/graph/cn.js b/Output/graph/cn.js deleted file mode 100644 index 309678b42520cec11d77cf6a3b4be2925cb8bede..0000000000000000000000000000000000000000 --- a/Output/graph/cn.js +++ /dev/null @@ -1,548 +0,0 @@ -/** -* creates a new zoom behavior -*/ -var zoom = d3.zoom().on("zoom", handle_zoom); - -/** -* creates svg object and associated attributes -* applies the zoom behavior to svg -*/ -var svg = d3.select("svg.graph") - .call(zoom), -width = svg.attr("width"), -height = svg.attr("height"), -perc; - -/** -* scale functions that return y coordinate/color of node depending on group -*/ -var color = d3.scaleOrdinal() - .domain(["height", "input", "depth"]) - .range([' #01d7c0', ' #8b90fe ', ' #a15eb2 ']), -y_scale = d3.scaleOrdinal() - .domain(["height", "input", "depth"]) - .range([0, 200, 400]), -to_remove; - -/** -* creates node object and (default) radius -*/ -var node, -r = 10; - -/** -* creates link object -*/ -var link; - -/** -* creates a background with a click functionality -*/ -var rect = svg.append("rect") - .attr("x", 0) - .attr("y", 0) - .attr("height", height) - .attr("width", width) - .style("fill", 'white') - .on('click', click_rect); - -/** -* creates svg object (legend) and associated attributes -*/ -var svg_legend = d3.select("svg.legendsvg"), -legend_position = [65,95,125], -arrow_legend_position = [0,25], -arrow_group_names = ["citation","self-citation"], -group_names = ["cited by","input","reference"], -line_type = d3.scaleOrdinal() - .domain(["line","dotted"]) - .range([("8,0"),("8,8")]), -text_info = '', -text_abstract = ''; - -var legend = svg_legend.selectAll(".legend") - .data(legend_position) - .enter() - .append("g") - .attr("class","legend") - .attr("transform", function(d,i) {return "translate(0," + d + ")"; }); - -legend.append("text") - .attr("x", 80) - .attr("y", 0) - .attr("dy", ".35em") - .style("text-anchor", "start") - .text(function(d,i) {return group_names[i]}); - -legend.append("circle") - .attr("r", r) - .attr("cx",30-r) - .style("fill", color); - -var legend_arrow = svg_legend.selectAll(".legendarr") - .data(arrow_legend_position) - .enter() - .append("g") - .attr("class","legendarr") - .attr("transform", function(d) { return "translate(0," + d + ")"; }); - -legend_arrow.append("line") - .attr("x1", 10) - .attr("x2", 50) - .attr("y1", 10) - .attr("y2", 10) - .style("stroke-dasharray",line_type) - .style("stroke", '#999') - .style("stroke-width", "1px") - .style('pointer-events', 'none') - .attr('marker-end',update_marker('#999',this)); - -legend_arrow.append("text") - .attr("x", 80) - .attr("y", 10) - .attr("dy", ".35em") - .style("text-anchor", "start") - .text(function(d,i){return arrow_group_names[i]}); - -/** -* creates a new simulation -* updates the positions of the links and nodes when the - state of the layout has changed (simulation has advanced by a tick) -*/ -var simulation = d3.forceSimulation() - .force("link", d3.forceLink().id(function(d) {return d.doi;}).distance(50).strength(function(d) { - if (d.group == "input") {return 0;} - else {return 5;} - })) - .force("collide", d3.forceCollide(function(d) { - if (d.group == "input") {return 100;} - else {return 65;} - }).strength(0.5)) - .force("charge", d3.forceManyBody().strength(0.001)) - .force("center", d3.forceCenter(width/2, height/2+20)) - .force("yscale", d3.forceY().strength(function(d) { - if (d.group == "input") {return 1000;} - else {return 50;} - }).y(function(d) {return y_scale(d.group)})) - .alpha(0.005) - .on("end", zoom_to); - -/** -* creates group element -*/ -var g = svg.append("g") - .attr("class", "everything") - -/** -* loads JSON data and calls the update function -*/ -d3.json("json_text.json").then(function(graph) { - update(graph.links, graph.nodes); -}) - -/** -* calls update functions for links and nodes -* adds the nodes, links and tick functionality to the simulation -* @param {object} nodes - nodes -* @param {object} links - links -*/ -function update(links, nodes) { - update_links(links); - update_nodes(nodes); - - simulation - .nodes(nodes) - .on("tick", handle_tick); - simulation.force("link") - .links(links); - - link.attr('marker-end', function(d) {return update_marker("#999", d.target);}) - .style("stroke-dasharray",function(d){return self_citation(d.source,d.target)? ("8,8"): ("1,0")}); -} - -/** -* initializes and shows links -* @param {object} links - links -*/ -function update_links(links) { - link = g.append("g") - .selectAll(".link") - .data(links) - .enter() - .append("line") - .style("stroke-width", "1px") - .style("stroke", "#999") - .attr("class", "link"); -} - -/** -* initializes and shows nodes with circles, texts and a click functionality -* creates a new drag behavior and applies it to the circles -* @param {object} nodes - nodes -*/ -function update_nodes(nodes) { - node = g.selectAll(".node") - .data(nodes) - .enter() - .append("g") - .attr("class", "node") - .call(d3.drag() - .on("start", start_drag) - .on("drag", dragged) - ); - - node.append("circle") - .attr("class", "circle") - .attr("r", function(d) {return 1.5*r+d.citations*0.05}) - .style("fill", function(d){ return color(d.group)}) - .on('click', click_node); - - node.append("text") - .attr("class", "text") - .style("font-size", "15px") - .style('pointer-events', 'auto') - .text(function (d) {const first_author=d.author[0].split(" ") - return first_author[first_author.length-1];}) - .on('click', click_node); -} - -/** -* creates arrowhead and returns its url -* @param {string} color - color of arrowhead -* @param {string} target - target node -*/ -function update_marker(color, target) { - var radius = 1.5*r+target.citations*0.05; - svg.append('defs').append('marker') - .attr('id',color.replace("#", "")+radius) - .attr('viewBox','-0 -5 10 10') - .attr('refX',radius+9.5) - .attr('refY',0) - .attr('orient','auto') - .attr('markerWidth',10) - .attr('markerHeight',15) - .attr('xoverflow','visible') - .append('svg:path') - .attr('d', 'M 0,-5 L 10 ,0 L 0,5') - .attr('fill', color) - .style('stroke','none'); - return "url(" + color + radius + ")"; -}; - -/** -* sets color of circle and its links to black and removes the previous highlights -* displays overview info of node in textbox -* @param {object} node - node -*/ -function click_node(node) { - d3.select(this.parentNode).raise(); - fix_nodes(node); - if(to_remove){ - d3.select(to_remove).selectAll(".circle").style("stroke","none") - } - to_remove = this.parentNode; - d3.select(this.parentNode).selectAll(".circle").style("stroke","black") - mark_link(node) - textbox_content(node) - reset_button_highlight() - highlight_button("overview") -} - -/** -* removes the highlights of the circles and their links -*/ -function click_rect() { - fix_nodes(node); - d3.selectAll(".circle").style("stroke", "none") - d3.selectAll(".link") - .style("stroke", "#999") - .attr('marker-end', function(d) {return update_marker('#999', d.target);}) - text_abstract=''; - text_info=''; - reset_button_highlight() - document.getElementById('textbox').innerHTML = "Click node"; -} - -/** -* returns true if journals have a common author (self-citation) -* @param {object} source - node -* @param {object} target - node -*/ -function self_citation(source,target) { - return source.author.some(item=>target.author.includes(item)) -} - -/** -* sets color of link (line and arrowhead) to black if it is directly connected to node -* and to grey otherwise -* @param {object} node - node -*/ -function mark_link(node) { - d3.selectAll(".link") - .style("stroke", function(o) { - return is_link_for_node(node, o) ? "black" : "#999";}) - .attr('marker-end', function(o) { - return is_link_for_node(node, o) ? update_marker('#000000', o.target) : update_marker('#999', o.target);}) -} - -/** -* returns true if link is directly connected to node and false if it is not -* @param {object} node - node -* @param {object} link - link -*/ -function is_link_for_node(node, link) { - return link.source.index == node.index || link.target.index == node.index; -} - -/** -* saves text for overview and abstract of node -* outputs node info to textbox -* @param {object} node - node -*/ -function textbox_content(node) { - text_info = "Title:" + '</br>' + node.name + - '</br>' +'</br>'+"Author:"+ '</br>' +node.author+'</br>'+'</br>'+"Date:"+'</br>' - +node.year+'</br>'+'</br>'+"Journal:"+'</br>'+node.journal+'</br>'+'</br>'+"doi:" - +'</br>'+'<a href="'+node.doi+ '">'+node.doi+'</a>'+'</br>'+'</br>'+"Citations:" - +'</br>'+node.citations; - text_abstract = node.abstract; - document.getElementById('textbox').innerHTML = text_info; -} - -/** -* sets color of btn to dark gray -* @param {object} btn - button -*/ -function highlight_button(btn) { - reset_button_highlight(); - document.getElementById(btn).style.background = "#CACACA"; -} - -/** -* sets color of all buttons to default light gray -*/ -function reset_button_highlight() { - document.getElementById("overview").style.background = ''; - document.getElementById("abstract").style.background = ''; -} - -/** -* displays abstract in textbox if a is true, overview text otherwise -* @param {bool} a- bool -*/ -function display_abstract(a) { - if (text_abstract == '' && text_info == '') { - document.getElementById('textbox').innerHTML="Click node"; - } - else { - if (a == true) { - document.getElementById('textbox').innerHTML = text_abstract; - } - else { - document.getElementById('textbox').innerHTML = text_info; - } - } -} - -/** -* updates the positions of the links and nodes -*/ -function handle_tick() { - link.attr("x1", function (d) {return d.source.x;}) - .attr("y1", function (d) {return d.source.y;}) - .attr("x2", function (d) {return d.target.x;}) - .attr("y2", function (d) {return d.target.y;}); - node.attr("transform", function (d) {return "translate(" + d.x + ", " + d.y + ")";}); -} - -/** -* initializes the dragging of the node -* @param {object} node - node -*/ -function start_drag(node) { - d3.select(this).raise(); - if (!d3.event.active) - simulation.alphaTarget(0.3).restart() - node.fx = node.x; - node.fy = node.y; - fix_nodes(node); -} - -/** -* applies the dragging to the node -* @param {object} node - node -*/ -function dragged(node) { - node.fx = d3.event.x; - node.fy = d3.event.y; - fix_nodes(node); -} - -/** -* fix positions of all nodes except for the current node -* @param {object} this_node - node -*/ -function fix_nodes(this_node) { - node.each(function(d) { - if (this_node != d) { - d.fx = d.x; - d.fy = d.y; - } - }); -} - -/** -* applies the transformation (zooming or dragging) to the g element -*/ -function handle_zoom() { - d3.select('g').attr("transform", d3.event.transform); -} - -/** -* transforms svg so that the zoom is adapted to the size of the graph -*/ -function zoom_to() { - node_bounds = d3.selectAll("svg.graph").node().getBBox(); - svg_bounds = d3.select("rect").node().getBBox(); - - perc_x = width/(node_bounds.width+100); - perc_y = height/(node_bounds.height+100); - perc = d3.min([perc_x, perc_y]) - - d3.select('svg') - .call(zoom.scaleBy, perc); -} - -/** -* transforms svg so that the zoom and drag is reset -*/ -function reset_view() { - d3.select('svg') - .call(zoom.scaleTo, 1) - d3.select('svg') - .call(zoom.translateTo, 0.5 * width, 0.5 * height); - d3.select('svg') - .call(zoom.scaleBy, perc); -} - -/** -* save svg as png -*/ -function save_svg(){ - var svgString = get_svg_string(svg.node()); - svg_string_to_image(svgString, 2*width, 2*height, 'png', save); // passes Blob and filesize String to the callback - - function save( dataBlob, filesize ){ - saveAs(dataBlob, 'D3 vis exported to PNG.png'); // FileSaver.js function - } -}; - -/** -* generate svgString -* @param {object} svgNode - node -*/ -function get_svg_string(svgNode) { - svgNode.setAttribute('xlink', 'http://www.w3.org/1999/xlink'); - var cssStyleText = get_css_styles(svgNode); - append_css(cssStyleText, svgNode); - - var serializer = new XMLSerializer(); - var svgString = serializer.serializeToString(svgNode); - svgString = svgString.replace(/(\w+)?:?xlink=/g, 'xmlns:xlink='); // Fix root xlink without namespace - svgString = svgString.replace(/NS\d+:href/g, 'xlink:href'); // Safari NS namespace fix - - return svgString; - - function get_css_styles(parentElement) { - var selectorTextArr = []; - - // Add Parent element Id and Classes to the list - selectorTextArr.push('#' + parentElement.id); - for (var c = 0; c < parentElement.classList.length; c++) - if (!contains('.'+parentElement.classList[c], selectorTextArr)) - selectorTextArr.push('.'+parentElement.classList[c]); - - // Add Children element Ids and Classes to the list - var nodes = parentElement.getElementsByTagName("*"); - for (var i = 0; i < nodes.length; i++) { - var id = nodes[i].id; - if (!contains('#'+id, selectorTextArr)) - selectorTextArr.push('#' + id); - - var classes = nodes[i].classList; - for (var c = 0; c < classes.length; c++) - if (!contains('.'+classes[c], selectorTextArr)) - selectorTextArr.push('.'+classes[c]); - } - - // Extract CSS Rules - var extractedCSSText = ""; - for (var i = 0; i < document.styleSheets.length; i++) { - var s = document.styleSheets[i]; - - try { - if(!s.cssRules) continue; - } catch(e) { - if(e.name !== 'SecurityError') throw e; // for Firefox - continue; - } - - var cssRules = s.cssRules; - for (var r = 0; r < cssRules.length; r++) { - if (contains(cssRules[r].selectorText, selectorTextArr)) - extractedCSSText += cssRules[r].cssText; - } - } - - - return extractedCSSText; - - function contains(str,arr) { - return arr.indexOf(str) === -1 ? false : true; - } - - } - - function append_css(cssText, element) { - var styleElement = document.createElement("style"); - styleElement.setAttribute("type","text/css"); - styleElement.innerHTML = cssText; - var refNode = element.hasChildNodes() ? element.children[0] : null; - element.insertBefore(styleElement, refNode); - } -} - -/** -* convert svgString to image and export it -* @param {object} svgString - svgString -* @param {object} width - width of image -* @param {object} height - height of image -* @param {object} format - format to save image in -* @param {object} callback - callback function -*/ -function svg_string_to_image( svgString, width, height, format, callback ) { - var format = format ? format : 'png'; - - var imgsrc = 'data:image/svg+xml;base64,'+ btoa(unescape(encodeURIComponent(svgString))); // Convert SVG string to data URL - - var canvas = document.createElement("canvas"); - var context = canvas.getContext("2d"); - - canvas.width = width; - canvas.height = height; - - var image = new Image(); - image.onload = function() { - context.clearRect(0, 0, width, height); - context.drawImage(image, 0, 0, width, height); - - canvas.toBlob(function(blob) { - var filesize = Math.round(blob.length/1024) + ' KB'; - if (callback) callback(blob, filesize); - }); - - }; - - image.src = imgsrc; -} - diff --git a/Output/graph/index.html b/Output/graph/index.html deleted file mode 100644 index 78560da48c01f39debc7a810b58a60672c0995dc..0000000000000000000000000000000000000000 --- a/Output/graph/index.html +++ /dev/null @@ -1,107 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="utf-8"> - - <!-- style specifications for button and div elements --> - <style type="text/css"> - button { - width: 100px; - height:20px; - display: flex; - justify-content: center; - position: absolute; - left: 455px; - top: 575px; - transition-duration: 0.4s; - border-radius:3px; - border:1px solid #909090; - } - - .button:hover { - background-color: #CACACA; - } - - button.resetZoom { - margin-left: 110px; - } - - button.save { - margin-left: 220px; - } - - button.abstract { - width:146px; - position:absolute; - top: 181px; - left: 1114px; - border-radius:0; - border:1px solid #909090; - } - - button.overview { - width:147px; - position:absolute; - display:inline-block; - top: 181px; - left: 968px; - border-radius:0; - border:1px solid #909090; - } - - div.legendbox { - width:270px; - height:170px; - padding: 10px; - /*border: 1px solid #999;*/ - position: absolute; - top: 10px; - left: 968px; - display: inline-block; - margin: 0; - } - - div.textbox { - width:270px; - min-height:200px; - max-height:370px; - padding: 10px; - border: 1px solid #999; - position: absolute; - top: 200px; - left: 968px; - display: inline-block; - overflow-y: scroll; - margin: 0; - } - </style> -</head> - -<body> - <!-- graph --> - <svg class="graph" width="960" height="560"></svg> - - <!-- legend --> - <div class="legendbox"> <svg class="legendsvg"></svg></div> - - <!-- textbox --> - <div class="textbox" id = "textbox">Click node</div> - <button id="overview" class="overview" onclick='display_abstract(false), highlight_button("overview")'>Overview</button> - <button id="abstract" class="abstract" onclick='display_abstract(true), highlight_button("abstract")'>Abstract</button> - - <!-- buttons --> - <button onclick="location.reload()">Reload Graph</button> - <button class="resetZoom" onclick="reset_view()">Reset View</button> - <button class="save" onclick="save_svg()">Save</button> - - <!-- D3 (version 5) --> - <script src="https://d3js.org/d3.v5.min.js"></script> - - <!-- scripts to save svg element as png --> - <script src="https://cdn.rawgit.com/eligrey/canvas-toBlob.js/f1a01896135ab378aa5c0118eadd81da55e698d8/canvas-toBlob.js"></script> - <script src="https://cdn.rawgit.com/eligrey/FileSaver.js/e9d941381475b5df8b7d7691013401e171014e89/FileSaver.min.js"></script> - - <!-- javascript for force-directed graph --> - <script type="text/javascript" id="cn" src="cn.js"></script> -</body> -</html> \ No newline at end of file diff --git a/Output/graph/json_text.json b/Output/graph/json_text.json deleted file mode 100644 index eb3c1d3d7ab6d4d770686f9170bfae697b7d2ca7..0000000000000000000000000000000000000000 --- a/Output/graph/json_text.json +++ /dev/null @@ -1,819 +0,0 @@ -{ - "nodes": [ - { - "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", - "author": [ - "Emanuel S. R. Ehmki", - "Robert Schmidt", - "Farina Ohm", - "Matthias Rarey" - ], - "year": "May 24, 2019", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/acs.jcim.9b00249", - "group": "input", - "citations": 5 - }, - { - "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", - "author": [ - "John A. Keith", - "Valentin Vassilev-Galindo", - "Bingqing Cheng", - "Stefan Chmiela", - "Michael Gastegger", - "Klaus-Robert M\u00fcller", - "Alexandre Tkatchenko" - ], - "year": "July 7, 2021", - "journal": "Chem. Rev.", - "doi": "https://doi.org/10.1021/acs.chemrev.1c00107", - "group": "height", - "citations": 1 - }, - { - "name": "Disconnected Maximum Common Substructures under Constraints", - "author": [ - "Robert Schmidt", - "Florian Krull", - "Anna Lina Heinzke", - "Matthias Rarey" - ], - "year": "December 16, 2020", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/acs.jcim.0c00741", - "group": "height", - "citations": 0 - }, - { - "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", - "author": [ - "Ansgar Schuffenhauer", - "Nadine Schneider", - "Samuel Hintermann", - "Douglas Auld", - "Jutta Blank", - "Simona Cotesta", - "Caroline Engeloch", - "Nikolas Fechner", - "Christoph Gaul", - "Jerome Giovannoni", - "Johanna Jansen", - "John Joslin", - "Philipp Krastel", - "Eugen Lounkine", - "John Manchester", - "Lauren G. Monovich", - "Anna Paola Pelliccioli", - "Manuel Schwarze", - "Michael D. Shultz", - "Nikolaus Stiefl", - "Daniel K. Baeschlin" - ], - "year": "November 3, 2020", - "journal": "Journal of Medicinal Chemistry", - "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", - "group": "height", - "citations": 8 - }, - { - "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", - "author": [ - "Robert Schmidt", - "Emanuel S. R. Ehmki", - "Farina Ohm", - "Hans-Christian Ehrlich", - "Andriy Mashychev", - "Matthias Rarey" - ], - "year": "May 23, 2019", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/acs.jcim.9b00250", - "group": "height", - "citations": 12 - }, - { - "name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", - "author": [ - "Jerome Eberhardt", - "Diogo Santos-Martins", - "Andreas F. Tillack", - "Stefano Forli" - ], - "year": "July 19, 2021", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/acs.jcim.1c00203", - "group": "input", - "citations": 1 - }, - { - "name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", - "author": [ - "Diogo Santos-Martins", - "Leonardo Solis-Vasquez", - "Andreas F Tillack", - "Michel F Sanner", - "Andreas Koch", - "Stefano Forli" - ], - "year": "January 6, 2021", - "journal": "Journal of Chemical Theory and Computation", - "doi": "https://doi.org/10.1021/acs.jctc.0c01006", - "group": "depth", - "citations": 14 - }, - { - "name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", - "author": [ - "Yuqi Zhang", - "Michel F. Sanner" - ], - "year": "September 11, 2019", - "journal": "Journal of Chemical Theory and Computation", - "doi": "https://doi.org/10.1021/acs.jctc.9b00557", - "group": "depth", - "citations": 9 - }, - { - "name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", - "author": [ - "David Ryan Koes", - "Matthew P. Baumgartner", - "Carlos J. Camacho" - ], - "year": "February 4, 2013", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/ci300604z", - "group": "depth", - "citations": 100 - }, - { - "name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", - "author": [ - "Anita K. Nivedha", - "David F. Thieker", - "Spandana Makeneni", - "Huimin Hu", - "Robert J. Woods" - ], - "year": "January 8, 2016", - "journal": "Journal of Chemical Theory and Computation", - "doi": "https://doi.org/10.1021/acs.jctc.5b00834", - "group": "depth", - "citations": 48 - }, - { - "name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", - "author": [ - "Stefano Forli", - "Maurizio Botta" - ], - "year": "June 22, 2007", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/ci700036j", - "group": "depth", - "citations": 32 - }, - { - "name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", - "author": [ - "Diogo Santos-Martins", - "Stefano Forli", - "Maria Jo\u00e3o Ramos", - "Arthur J. Olson" - ], - "year": "June 15, 2014", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/ci500209e", - "group": "depth", - "citations": 100 - }, - { - "name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", - "author": [ - "Stefano Forli", - "Arthur J. Olson" - ], - "year": "December 9, 2011", - "journal": "Journal of Medicinal Chemistry", - "doi": "https://doi.org/10.1021/jm2005145", - "group": "depth", - "citations": 100 - }, - { - "name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", - "author": [ - "Douglas R. Houston", - "Malcolm D. Walkinshaw" - ], - "year": "January 27, 2013", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/ci300399w", - "group": "depth", - "citations": 100 - }, - { - "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", - "author": [ - "Themis Lazaridis" - ], - "year": "April 14, 1998", - "journal": "Journal of Physical Chemistry B", - "doi": "https://doi.org/10.1021/jp9723574", - "group": "depth", - "citations": 100 - }, - { - "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", - "author": [ - "Themis Lazaridis" - ], - "year": "April 14, 1998", - "journal": "Journal of Physical Chemistry B", - "doi": "https://doi.org/10.1021/jp972358w", - "group": "depth", - "citations": 100 - }, - { - "name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", - "author": [ - "John J. Irwin", - "Khanh G. Tang", - "Jennifer Young", - "Chinzorig Dandarchuluun", - "Benjamin R. Wong", - "Munkhzul Khurelbaatar", - "Yurii S. Moroz", - "John Mayfield", - "Roger A. Sayle" - ], - "year": "October 29, 2020", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/acs.jcim.0c00675", - "group": "depth", - "citations": 25 - }, - { - "name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", - "author": [ - "Yan Jiang", - "Chunlin Zhuang", - "Long Chen", - "Junjie Lu", - "Guoqiang Dong", - "Zhenyuan Miao", - "Wannian Zhang", - "Jian Li", - "Chunquan Sheng" - ], - "year": "September 20, 2017", - "journal": "Journal of Medicinal Chemistry", - "doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", - "group": "depth", - "citations": 12 - }, - { - "name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", - "author": [ - "Michael M. Mysinger", - "Michael Carchia", - "John. J. Irwin", - "Brian K. Shoichet" - ], - "year": "June 20, 2012", - "journal": "Journal of Medicinal Chemistry", - "doi": "https://doi.org/10.1021/jm300687e", - "group": "depth", - "citations": 100 - }, - { - "name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", - "author": [ - "Thomas Gaillard" - ], - "year": "July 10, 2018", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/acs.jcim.8b00312", - "group": "depth", - "citations": 74 - }, - { - "name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", - "author": [ - "Nguyen Thanh Nguyen", - "Trung Hai Nguyen", - "T. Ngoc Han Pham", - "Nguyen Truong Huy", - "Mai Van Bay", - "Minh Quan Pham", - "Pham Cam Nam", - "Van V. Vu", - "Son Tung Ngo" - ], - "year": "December 30, 2019", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/acs.jcim.9b00778", - "group": "depth", - "citations": 65 - }, - { - "name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", - "author": [ - "Richard A. Friesner", - "Jay L. Banks", - "Robert B. Murphy", - "Thomas A. Halgren", - "Jasna J. Klicic", - "Daniel T. Mainz", - "Matthew P. Repasky", - "Eric H. Knoll", - "Mee Shelley", - "Jason K. Perry", - "David E. Shaw", - "Perry Francis", - "Peter S. Shenkin" - ], - "year": "February 27, 2004", - "journal": "Journal of Medicinal Chemistry", - "doi": "https://doi.org/10.1021/jm0306430", - "group": "depth", - "citations": 97 - }, - { - "name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", - "author": [ - "Ajay N. Jain" - ], - "year": "January 21, 2003", - "journal": "Journal of Medicinal Chemistry", - "doi": "https://doi.org/10.1021/jm020406h", - "group": "depth", - "citations": 100 - }, - { - "name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", - "author": [ - "Guo-Bo Li", - "Ling-Ling Yang", - "Wen-Jing Wang", - "Lin-Li Li", - "Sheng-Yong Yang" - ], - "year": "February 9, 2013", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/ci300493w", - "group": "depth", - "citations": 99 - }, - { - "name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", - "author": [ - "Chi Zhang", - "Song Liu", - "Qianqian Zhu", - "Yaoqi Zhou" - ], - "year": "February 16, 2005", - "journal": "Journal of Medicinal Chemistry", - "doi": "https://doi.org/10.1021/jm049314d", - "group": "depth", - "citations": 100 - }, - { - "name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", - "author": [ - "Mohammad K. Parvez", - "Sarfaraz Ahmed", - "Mohammed S. Al-Dosari", - "Mazin A. S. Abdelwahid", - "Ahmed H. Arbab", - "Adnan J. Al-Rehaily", - "Mai M. Al-Oqail" - ], - "year": "October 21, 2021", - "journal": "ACS Omega", - "doi": "https://doi.org/10.1021/acsomega.1c04320", - "group": "height", - "citations": 0 - }, - { - "name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", - "author": [ - "Ilaria Proietti Silvestri", - "Paul J. J. Colbon" - ], - "year": "July 16, 2021", - "journal": "ACS Med. Chem. Lett.", - "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", - "group": "height", - "citations": 0 - }, - { - "name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", - "author": [ - "Paul D. Leeson", - "A. Patricia Bento", - "Anna Gaulton", - "Anne Hersey", - "Emma J. Manners", - "Chris J. Radoux", - "Andrew R. Leach" - ], - "year": "May 13, 2021", - "journal": "Journal of Medicinal Chemistry", - "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", - "group": "height", - "citations": 0 - }, - { - "name": "Topological Characterization and Graph Entropies of Tessellations of Kekulene Structures: Existence of Isentropic Structures and Applications to Thermochemistry, Nuclear Magnetic Resonance, and Electron Spin Resonance", - "author": [ - "S. Ruth Julie Kavitha", - "Jessie Abraham", - "Micheal Arockiaraj", - "Joseph Jency", - "Krishnan Balasubramanian" - ], - "year": "September 1, 2021", - "journal": "J. Phys. Chem. A", - "doi": "https://doi.org/10.1021/acs.jpca.1c06264", - "group": "height", - "citations": 0 - }, - { - "name": "Automatic Identification of Lansoprazole Degradants under Stress Conditions by LC-HRMS with MassChemSite and WebChembase", - "author": [ - "Stefano Bonciarelli", - "Jenny Desantis", - "Laura Goracci", - "Lydia Siragusa", - "Ismael Zamora", - "Elisabeth Ortega-Carrasco" - ], - "year": "June 1, 2021", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/acs.jcim.1c00226", - "group": "height", - "citations": 0 - }, - { - "name": "Computational Approaches to Identify Structural Alerts and Their Applications in Environmental Toxicology and Drug Discovery", - "author": [ - "Hongbin Yang", - "Chaofeng Lou", - "Weihua Li", - "Guixia Liu", - "Yun Tang" - ], - "year": "February 24, 2020", - "journal": "Chem. Res. Toxicol.", - "doi": "https://doi.org/10.1021/acs.chemrestox.0c00006", - "group": "height", - "citations": 11 - }, - { - "name": "Toward a Global Understanding of Chemical Pollution: A First Comprehensive Analysis of National and Regional Chemical Inventories", - "author": [ - "Zhanyun Wang", - "Glen W. Walker", - "Derek C. G. Muir", - "Kakuko Nagatani-Yoshida" - ], - "year": "January 22, 2020", - "journal": "Environ. Sci. Technol.", - "doi": "https://doi.org/10.1021/acs.est.9b06379", - "group": "height", - "citations": 100 - }, - { - "name": "ZINC \u2212 A Free Database of Commercially Available Compounds for Virtual Screening", - "author": [ - "John J. Irwin", - "Brian K. Shoichet" - ], - "year": "December 14, 2004", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/ci049714+", - "group": "depth", - "citations": 99 - }, - { - "name": "ZINC: A Free Tool to Discover Chemistry for Biology", - "author": [ - "John J. Irwin", - "Teague Sterling", - "Michael M. Mysinger", - "Erin S. Bolstad", - "Ryan G. Coleman" - ], - "year": "May 15, 2012", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/ci3001277", - "group": "depth", - "citations": 100 - }, - { - "name": "ZINC 15 \u2013 Ligand Discovery for Everyone", - "author": [ - "Teague Sterling", - "John J. Irwin" - ], - "year": "October 19, 2015", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/acs.jcim.5b00559", - "group": "depth", - "citations": 98 - }, - { - "name": "Application of Belief Theory to Similarity Data Fusion for Use in Analog Searching and Lead Hopping", - "author": [ - "Steven W. Muchmore", - "Derek A. Debe", - "James T. Metz", - "Scott P. Brown", - "Yvonne C. Martin", - "Philip J. Hajduk" - ], - "year": "April 17, 2008", - "journal": "Journal of Chemical Information and Modeling", - "doi": "https://doi.org/10.1021/ci7004498", - "group": "depth", - "citations": 100 - }, - { - "name": "Do Structurally Similar Molecules Have Similar Biological Activity?", - "author": [ - "Yvonne C. Martin", - "James L. Kofron", - "Linda M. Traphagen" - ], - "year": "August 13, 2002", - "journal": "Journal of Medicinal Chemistry", - "doi": "https://doi.org/10.1021/jm020155c", - "group": "depth", - "citations": 100 - }, - { - "name": "The Properties of Known Drugs. 1. Molecular Frameworks", - "author": [ - "Guy W. Bemis", - "Mark A. Murcko" - ], - "year": "July 19, 1996", - "journal": "Journal of Medicinal Chemistry", - "doi": "https://doi.org/10.1021/jm9602928", - "group": "depth", - "citations": 100 - }, - { - "name": "Molecular Shape Diversity of Combinatorial Libraries:\u2009 A Prerequisite for Broad Bioactivity\u2020", - "author": [ - "Wolfgang H. B. Sauer", - "Matthias K. Schwarz" - ], - "year": "March 14, 2003", - "journal": "J. Chem. Inf. Comput. Sci.", - "doi": "https://doi.org/10.1021/ci025599w", - "group": "depth", - "citations": 99 - } - ], - "links": [ - { - "source": "https://doi.org/10.1021/acs.chemrev.1c00107", - "target": "https://doi.org/10.1021/acs.jcim.9b00249" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.0c00741", - "target": "https://doi.org/10.1021/acs.jcim.9b00249" - }, - { - "source": "https://doi.org/10.1021/acs.jmedchem.0c01332", - "target": "https://doi.org/10.1021/acs.jcim.9b00249" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.9b00250", - "target": "https://doi.org/10.1021/acs.jcim.9b00249" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/acs.jctc.0c01006" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/acs.jctc.9b00557" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/ci300604z" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/acs.jctc.5b00834" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/ci700036j" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/ci500209e" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/jm2005145" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/ci300399w" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/jp9723574" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/jp972358w" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/acs.jcim.0c00675" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/acs.jmedchem.7b01243" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/jm300687e" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/acs.jcim.8b00312" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/acs.jcim.9b00778" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/jm0306430" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/jm020406h" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/ci300493w" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00203", - "target": "https://doi.org/10.1021/jm049314d" - }, - { - "source": "https://doi.org/10.1021/acsomega.1c04320", - "target": "https://doi.org/10.1021/acs.jcim.1c00203" - }, - { - "source": "https://doi.org/10.1021/acsmedchemlett.1c00251", - "target": "https://doi.org/10.1021/acs.jmedchem.0c01332" - }, - { - "source": "https://doi.org/10.1021/acs.jmedchem.1c00416", - "target": "https://doi.org/10.1021/acs.jmedchem.0c01332" - }, - { - "source": "https://doi.org/10.1021/acs.jpca.1c06264", - "target": "https://doi.org/10.1021/acs.chemrev.1c00107" - }, - { - "source": "https://doi.org/10.1021/acs.chemrev.1c00107", - "target": "https://doi.org/10.1021/acs.jcim.9b00250" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.1c00226", - "target": "https://doi.org/10.1021/acs.jcim.9b00250" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.0c00741", - "target": "https://doi.org/10.1021/acs.jcim.9b00250" - }, - { - "source": "https://doi.org/10.1021/acs.jmedchem.0c01332", - "target": "https://doi.org/10.1021/acs.jcim.9b00250" - }, - { - "source": "https://doi.org/10.1021/acs.chemrestox.0c00006", - "target": "https://doi.org/10.1021/acs.jcim.9b00250" - }, - { - "source": "https://doi.org/10.1021/acs.est.9b06379", - "target": "https://doi.org/10.1021/acs.jcim.9b00250" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.9b00249", - "target": "https://doi.org/10.1021/acs.jcim.9b00250" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.0c00675", - "target": "https://doi.org/10.1021/ci049714+" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.0c00675", - "target": "https://doi.org/10.1021/ci3001277" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.0c00675", - "target": "https://doi.org/10.1021/acs.jcim.5b00559" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.0c00675", - "target": "https://doi.org/10.1021/ci7004498" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.0c00675", - "target": "https://doi.org/10.1021/jm020155c" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.0c00675", - "target": "https://doi.org/10.1021/jm9602928" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.0c00675", - "target": "https://doi.org/10.1021/ci025599w" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.5b00559", - "target": "https://doi.org/10.1021/ci049714+" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.5b00559", - "target": "https://doi.org/10.1021/ci3001277" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.5b00559", - "target": "https://doi.org/10.1021/jm300687e" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.8b00312", - "target": "https://doi.org/10.1021/ci300604z" - }, - { - "source": "https://doi.org/10.1021/acs.jctc.0c01006", - "target": "https://doi.org/10.1021/ci700036j" - }, - { - "source": "https://doi.org/10.1021/acs.jctc.9b00557", - "target": "https://doi.org/10.1021/ci700036j" - }, - { - "source": "https://doi.org/10.1021/ci500209e", - "target": "https://doi.org/10.1021/ci700036j" - }, - { - "source": "https://doi.org/10.1021/acs.jctc.0c01006", - "target": "https://doi.org/10.1021/ci500209e" - }, - { - "source": "https://doi.org/10.1021/acs.jctc.0c01006", - "target": "https://doi.org/10.1021/jm2005145" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.8b00312", - "target": "https://doi.org/10.1021/jm2005145" - }, - { - "source": "https://doi.org/10.1021/ci500209e", - "target": "https://doi.org/10.1021/jm2005145" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.0c00741", - "target": "https://doi.org/10.1021/jm300687e" - }, - { - "source": "https://doi.org/10.1021/acs.jcim.9b00778", - "target": "https://doi.org/10.1021/acs.jcim.8b00312" - }, - { - "source": "https://doi.org/10.1021/acs.jctc.0c01006", - "target": "https://doi.org/10.1021/acs.jcim.9b00778" - }, - { - "source": "https://doi.org/10.1021/acs.jctc.0c01006", - "target": "https://doi.org/10.1021/ci049714+" - }, - { - "source": "https://doi.org/10.1021/ci7004498", - "target": "https://doi.org/10.1021/jm020155c" - }, - { - "source": "https://doi.org/10.1021/acsmedchemlett.1c00251", - "target": "https://doi.org/10.1021/ci025599w" - }, - { - "source": "https://doi.org/10.1021/acs.jmedchem.0c01332", - "target": "https://doi.org/10.1021/ci025599w" - } - ] -} \ No newline at end of file diff --git a/citation_parser_ui.py b/citation_parser_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..453ead98cdb0543e35c58a3e0adfd734e34067bc --- /dev/null +++ b/citation_parser_ui.py @@ -0,0 +1,255 @@ +import base64 +import re +import dash +from dash import dcc +from dash import html +from dash import callback_context +from dash.dependencies import Input, Output, State +from dash.exceptions import PreventUpdate +from input.interface import InputInterface +import input.publication + +app = dash.Dash(__name__) + +# List of options when inputting data and generating the graph +additional_options = ['Update Automatically','Smart Input'] + +# Reads the contents of info_box.txt. +# They can later be displayed by pressing the corresponding button. +f = open('info_box.txt', 'r') +boxcontent = f.read() +f.close() + +app.layout = html.Div([ + # Layer 0: For the Header and Help Function(s) + html.Div([ + html.Button(id='show-info',children='Show Info',n_clicks=0), + html.Div(id='info-box') + ]), + # Layer 1: For all mandatory Inputs + html.Div([ + "Input: ", + # A simple box for inputting a string. + # Value is transmitted upon pressing return or clicking out of the box. + dcc.Input(id='input-string', value='', type='text',debounce=True), + # Forward recursion. Values between 1 and 10 can be entered. + dcc.Input(id='forward-depth',value='1',type='number',min='1',max='10'), + # Backward recursion. Values between 1 and 10 can be entered. + dcc.Input(id='backward-depth',value='1',type='number',min='1',max='10'), + # Upload box. Can be used via drag-and-drop or byclicking on it to open a file viewer. + dcc.Upload( + id="upload-data", + children=html.Div( + ["Drag and drop or click to select a file to upload."]), + style={ + "width": "30%", + "height": "60px", + "lineHeight": "60px", + "borderWidth": "1px", + "borderStyle": "dashed", + "borderRadius": "5px", + "textAlign": "center", + "margin": "10px", + }) + ]), + # Layer 2: For the checklist, Remove-/Start-Buttons and input-error-message + html.Div([ + # All input DOIs are collected in this checklist. + # It is initialized to avoid error messages. + dcc.Checklist(id='input-checklist',options=[], + labelStyle = dict(display='block'),value=[]), + # Displays error message if 'Smart Input' is active. + html.Div(id='input-err',style={'color':'red'}), + # Clears the entire list. + html.Button(id='clear-all-button',children='Clear All'), + # Clear all selected elements. + html.Button(id='clear-selected-button',children='Clear Selected'), + # Starts the process that generates a graph. + html.Button(id='start-button',children='Generate Graph') + ]), + # Layer 3: For additional Options (e.g. Topological Sort) + html.Div([ + html.H4('Additional Options'), + # A checklist of all additional options that are listed above. + dcc.Checklist(id='additional-options', + options=[{'label':k,'value':k} for k in additional_options], + value=[]) + ]), + # Layer 4: For the Graph + html.Div([ + html.Div(id='test-output') + ]) +]) + +@app.callback( + Output('input-checklist','options'), + Output('input-checklist','value'), + Output('input-string','value'), + Output('input-err','children'), + Input('input-string','value'), + Input('clear-all-button','n_clicks'), + Input('clear-selected-button','n_clicks'), + Input('upload-data','contents'), + State('input-checklist','options'), + State('input-checklist','value'), + State('additional-options','value') +) +def update_input_checklist(input_value,btn1,btn2,filecontents,all_inputs, + selected_inputs,additional_options): + ''' + Most important callback function. Updates the checklist that holds all inputs. + State of the checklist as input is needed so that previews entries are readded. + input-string is required as Output to clear the input box after each input. + Different actions are performed depending on which input triggered the callback. + The value-attribute of input-checklist must be updates so that the values + of deleted elements no longer appear in the list of selected elements. + + :param input_value: given by dcc.Input + :type input_value: string + :param btn1: signals pressing of clear-all-button + :type btn1: int + :param btn2: signals pressing of clear-selected-button + :type btn2: int + :param filecontents: the contents of an uploaded file + :type filecontents: bit-string + :param all_inputs: all labels and values from the checklist, + regardless if they have been checked or not + :type all_inputs: list of dictionaries with 2 entries each + :param selected_inputs: values of all checked elements + :type selected_inputs: list of strings + :param addtitional_options: all checked additional options + :type additional_options: list of strings + ''' + # changed_id is used to determine which Input has triggered the callback + changed_id = [p['prop_id'] for p in callback_context.triggered][0] + + # if clear-all-button was pressed: + if 'clear-all-button' in changed_id: + return list(),list(),'','' + + # if clear-selected-button was pressed: + if 'clear-selected-button' in changed_id: + all_inputs = [i for i in all_inputs if i['value'] not in selected_inputs] + return all_inputs,list(),'','' + + # when a new element is added via dcc.Input + if 'input-string' in changed_id: + # Creates a list of previously added inputs to make sure nothing is added twice + currValues = [x['value'] for x in all_inputs] + if input_value not in currValues: + + # if 'Smart Input' is selected, the input will be checked for validity + # and a more readable string will be returned + if 'Smart Input' in additional_options: + try: + # Attempts to call get_publication. If unsuccesful, + # the DOI is not added and an error message is returned + i = InputInterface() + pub = i.get_pub_light(input_value) + except Exception as err: + return options,selected_inputs,'','{}'.format(err) + # Creates a more readable string to display in the checklist + rep_str = pub.contributors[0] + ',' + pub.journal + \ + ',' + pub.publication_date + all_inputs.append({'label':rep_str, 'value':input_value}) + + # if 'Smart Input' is not selected, the input value is added as is, + # without checking for validity. + else: + all_inputs.append({'label':input_value,'value':input_value}) + return all_inputs,selected_inputs,'','' + + # when a txt-file is uploaded + if 'upload-data.contents' in changed_id: + if filecontents: + # Skips the info portion that is added when a file is uploaded + found = base64.b64decode(re.search(',(.+?)$', filecontents).group(1)) + # Returns the binary string into a proper text + text = found.decode('utf-8') + # Creates a list of inputs by splitting the lines + list_of_inputs = (text.strip().split('\n')) + CurrValues = [x['value'] for x in all_inputs] + # For every line the same actions as for a single input are performed + for input_value in list_of_inputs: + if input_value not in CurrValues: + if 'Smart Input' in additional_options: + try: + i = InputInterface() + pub = i.get_pub_light(input_value) + except Exception as err: + return all_inputs,selected_inputs,'','{}'.format(err) + rep_str = pub.contributors[0] + ',' + pub.journal + \ + ',' + pub.publication_date + all_inputs.append({'label':rep_str, 'value':input_value}) + else: + all_inputs.append({'label':input_value,'value':input_value}) + return all_inputs,selected_inputs,'','' + # when the programm is first started: + # if this is not done, the input_checklist will be generated + # with one element that contains an empty string + if input_value == '': + return list(),list(),'','' + +@app.callback( + Output('info-box','children'), + Input('show-info','n_clicks') +) +def show_hide_info_box(n_clicks): + ''' + This callback shows and hides the (first) info-box by, checking how often + the button has been pressed. The text was loaded at the top. + :param n_clicks: number of times show-info has been clicked. + 'type n_clicks: int + ''' + if n_clicks % 2 == 0: + return '' + else: + return html.Div(boxcontent, style={'whiteSpace': 'pre-line'}) + +@app.callback( + Output('test-output','children'), + Input('start-button','n_clicks'), + Input('input-checklist','options'), + Input('input-checklist','value'), + Input('forward-depth','value'), + Input('backward-depth','value'), + State('additional-options','value') +) +def generate_output(n_clicks,all_inputs,selected_inputs, + forward_depth,backward_depth,additional_options): + ''' + Basic structure for a callback that generates an output. This is only a + proof of concept and has noting to do with the intended output yet. + + :param n_clicks: how often has Generate Graph been clicked + :type n_clicks: int + :param all_inputs: all labels and values from the checklist, + regardless if they have been checked or not + :type all_inputs: list of dictionaries with 2 entries each + :param selected_inputs: values of all checked elements + :type selected_inputs: list of strings + :param forward_depth: forward recursion depth + :type forward_depth: unsigned int + :param backward_depth: backward recursion depth + :type backward_depth: unsigned int + :param additional_options: value of all selected additional options + :type additional_options: list of strings + ''' + changed_id = [p['prop_id'] for p in callback_context.triggered][0] + if n_clicks is None: + raise PreventUpdate + elif 'Update Automatically' in additional_options \ + or 'start-button' in changed_id: + s = '' + for i in range(len(all_inputs)): + x = all_inputs[i]['value'] + if x in selected_inputs: + s += x*(abs(int(forward_depth)-int(backward_depth))) + else: + s += x*(int(forward_depth)+int(backward_depth)) + return s + else: + raise PreventUpdate + +if __name__ == '__main__': + app.run_server(debug=True) diff --git a/info_box.txt b/info_box.txt new file mode 100644 index 0000000000000000000000000000000000000000..3cb826b85b7336e0083f35a235a197e88f77ee5d --- /dev/null +++ b/info_box.txt @@ -0,0 +1,43 @@ +English + +Show Info: Can be activated and deactivated by clicking on the button. + +Input: input by entering a DOI ("Digital Object Identifier") + +Drag and drop or click to select a file to upload: entering multiple DOI by txt-file is only possible if every DOI has its own line. + +Recursion: + +Clear All: clearing all inputs + +Clear Selected: clearing all selected inputs + +Generate Graph: generates the graph + +Update Automatically: automatically updates the graph for every new input + +Smart Input: checks the correctness of the entered DOI and shows a nicer depiction: Author, Journal, publication date. + + + +German + +Show Info: Durch wiederholtes klicken kann das Fenster ein und aus geblendet werden. + +Input: Die Eingabe erfolgt in Form eines DOI ("Digital Object Identifier") + +Drag and drop or click to select a file to upload: Mehrere DOI in einem txt-Dokument müssen untereinander angeordnet sein. + +Recursion: + +Clear All: alle Eingaben werden gelöscht + +Clear Selected: alle markierten Eingaben werden gelöscht + +Generate Graph: generiert den zugehörigen Graphen + +Update Automatically: automatische Aktualisierung des Graphen nach neuer Eingabe + +Smart Input: direkte Überprüfung der Eingabe auf Richtigkeit zudem wird nicht mehr der DOI angezeigt sondern: Der Autor, Das Journal, Das Veröffentlichungsdatum. + + diff --git a/input/get/__pycache__/__init__.cpython-38.pyc b/input/get/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1e24ad908499dfeb45afebf60601d0704dbbbcb Binary files /dev/null and b/input/get/__pycache__/__init__.cpython-38.pyc differ diff --git a/input/get/__pycache__/acs.cpython-38.pyc b/input/get/__pycache__/acs.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3585b798ebbeab22676f00cc409ef48cd6b6019 Binary files /dev/null and b/input/get/__pycache__/acs.cpython-38.pyc differ diff --git a/input/get/__pycache__/journal_fetcher.cpython-38.pyc b/input/get/__pycache__/journal_fetcher.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3be8ddceae79b7f027f194a2c4cec6c1fe9575a5 Binary files /dev/null and b/input/get/__pycache__/journal_fetcher.cpython-38.pyc differ diff --git a/input/get/__pycache__/nature.cpython-38.pyc b/input/get/__pycache__/nature.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6008e587da3dab81c3c79bb42345b63f0226ebbd Binary files /dev/null and b/input/get/__pycache__/nature.cpython-38.pyc differ diff --git a/ui_programm_fragmente/README.md b/ui_programm_fragmente/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d3a7e438ee56b341db5d7a1eabcb996e154f103f --- /dev/null +++ b/ui_programm_fragmente/README.md @@ -0,0 +1,38 @@ +# Projekt CiS-Biochemie 2021-22 UI + +# Benötigt: +- Dash +- Pandas +- beautifulsoup4 +- requests + +# Starten des Programms: + +Ausführen von citation_parser_ui.py und einfügen des entstandenen Liks in einen Browser. +Danach müsste sich die Benutzeroberfläche im Browser öffnen. + + +# Übersicht der Benutzeroberfläche: + +- Show Info: Durch wiederholtes klicken kann das Fenster ein und aus geblendet werden. + +- Input: Die Eingabe erfolgt in Form eines DOI ("Digital Object Identifier") + +- Drag and drop or click to select a file to upload: Mehrere DOI in einem txt-Dokument (genau ein DOI pro Zeile). + +- Recursion: die beiden noch unbeschrifteten Felder rechts neben Input sind für die Rekursionstiefen in beide Richtungen + +- Clear All: alle Eingaben werden gelöscht + +- Clear Selected: alle markierten Eingaben werden gelöscht + +- Generate Graph: generiert den zugehörigen Graphen (generiert momentan nur einen string) + +- Update Automatically: automatische Aktualisierung des Graphen bei jeder neuen Eingabe + +- Smart Input: direkte Überprüfung der Eingabe auf Richtigkeit zudem wird nicht mehr der DOI angezeigt sondern: + Der Autor, Das Journal, Das Veröffentlichungsdatum. (muss vor Hinzufügen aktiviert worden sein) + +## Autoren +- Isabelle Siebels +- Sebastian David diff --git a/ui_programm_fragmente/input_to_checklist.py b/ui_programm_fragmente/input_to_checklist.py new file mode 100644 index 0000000000000000000000000000000000000000..3c00ed4c23c7acf914c02af576fec41d8ba2efc7 --- /dev/null +++ b/ui_programm_fragmente/input_to_checklist.py @@ -0,0 +1,160 @@ +import dash +from dash import dcc +from dash import html +from dash import callback_context +from dash.dependencies import Input, Output, State +from dash.exceptions import PreventUpdate +from input.interface import InputInterface +import input.publication + +app = dash.Dash(__name__) + +additional_options = ['Update Automatically'] + +app.layout = html.Div([ + # Layer 0: For the Header and Help Function(s) + html.Div([ + html.Button(id='show-info',children='Show Info',n_clicks=0), + html.Div(id='info-box') + ]), + # Layer 1: For all mandatory Inputs + html.Div([ + "Input: ", + dcc.Input(id='input-string', value='', type='text',debounce=True), + dcc.Input(id='forward-depth',value='1',type='number',min='1',max='10'), + dcc.Input(id='backward-depth',value='1',type='number',min='1',max='10') + ]), + # Layer 2: For the checklist, Remove-/Start-Buttons and input-error-message + html.Div([ + dcc.Checklist(id='input-checklist',options=[],labelStyle = dict(display='block'),value=[]), + html.Div(id='input-err',style={'color':'red'}), + html.Button(id='clear-all-button',children='Clear All'), + html.Button(id='clear-selected-button',children='Clear Selected'), + html.Button(id='start-button',children='Generate Graph') + ]), + # Layer 3: For additional Options (e.g. Topological Sort) + html.Div([ + html.H4('Additional Options'), + dcc.Checklist(id='additional-options', + options=[{'label':k,'value':k} for k in additional_options], + value=[]) + ]), + # Layer 4: For the Graph + html.Div([ + html.Div(id='test-output') + ]) +]) + +''' +Most important callback function. Updates the checklist that holds all inputs. +input-string is required as Output to clear the input box after each input +''' +@app.callback( + Output('input-checklist','options'), + Output('input-checklist','value'), + Output('input-string','value'), + Output('input-err','children'), + Input('input-string','value'), + Input('clear-all-button','n_clicks'), + Input('clear-selected-button','n_clicks'), + State('input-checklist','options'), + State('input-checklist','value') +) +def update_input_checklist(input_value,btn1,btn2,all_inputs,selected_inputs): + ''' + :param input_value: given by dcc.Input + :type input_value: string + :param btn1: signals pressing of clear-all-button + :param btn2: signals pressing of clear-selected-button + :param all_inputs: all labels and values from the checklist, + regardless if they have been checked or not + :type all_inputs: list of dictionaries with 2 entries each + :param selected_inputs: values of all checked elements + :type selected_inputs: list of strings + ''' + changed_id = [p['prop_id'] for p in callback_context.triggered][0] + # if clear-all-button was pressed: + if 'clear-all-button' in changed_id: + return list(),list(),'','' + # if clear-selected-button was pressed: + if 'clear-selected-button' in changed_id: + all_inputs = [i for i in all_inputs if i['value'] not in selected_inputs] + return all_inputs,list(),'','' + # when the programm is first started: + if input_value == '': + app.layout['input-checklist'].options.clear() + return list(),list(),'','' + # when a new element is added via dcc.Input + if 'input-string' in changed_id: + options = all_inputs + currValues = [x['value'] for x in options] + if input_value not in currValues: + try: + i = InputInterface() + pub = i.get_pub_light(input_value) + except Exception as err: + return options,selected_inputs,'','{}'.format(err) + rep_str = pub.contributors[0] + ',' + pub.journal + ',' + pub.publication_date + options.append({'label':rep_str, 'value':input_value}) + return options,selected_inputs,'','' + +''' +This callback shows and hides the (first) help-box +''' +@app.callback( + Output('info-box','children'), + Input('show-info','n_clicks') +) +def show_hide_info_box(n_clicks): + if n_clicks % 2 == 0: + return '' + else: + return 'Hier koennte Ihre Werbung stehen' + +''' +Basic structure for a callback that generates an output +''' +@app.callback( + Output('test-output','children'), + Input('start-button','n_clicks'), + Input('input-checklist','options'), + Input('input-checklist','value'), + Input('forward-depth','value'), + Input('backward-depth','value'), + State('additional-options','value') +) +def generate_output(n_clicks,all_inputs,selected_inputs, + forward_depth,backward_depth,additional_options): + ''' + :param n_clicks: how often has Generate Graph been clicked + :type n_clicks: int + :param all_inputs: all labels and values from the checklist, + regardless if they have been checked or not + :type all_inputs: list of dictionaries with 2 entries each + :param selected_inputs: values of all checked elements + :type selected_inputs: list of strings + :param forward_depth: forward recursion depth + :type forward_depth: unsigned int + :param backward_depth: backward recursion depth + :type backward_depth: unsigned int + :param additional_options: value of all selected additional options + :type additional_options: list of strings + ''' + changed_id = [p['prop_id'] for p in callback_context.triggered][0] + if n_clicks is None: + raise PreventUpdate + elif 'Update Automatically' in additional_options \ + or 'start-button' in changed_id: + s = '' + for i in range(len(all_inputs)): + x = all_inputs[i]['value'] + if x in selected_inputs: + s += x*(abs(int(forward_depth)-int(backward_depth))) + else: + s += x*(int(forward_depth)+int(backward_depth)) + return s + else: + raise PreventUpdate + +if __name__ == '__main__': + app.run_server(debug=True) diff --git a/ui_programm_fragmente/upload_to_checklist.py b/ui_programm_fragmente/upload_to_checklist.py new file mode 100644 index 0000000000000000000000000000000000000000..9a094f213901a808ad924f4b1ffa87fb87f2f75d --- /dev/null +++ b/ui_programm_fragmente/upload_to_checklist.py @@ -0,0 +1,78 @@ +import dash +from dash import dcc +from dash import html +from dash.dependencies import Input, Output, State +import base64 +import re + +app = dash.Dash(__name__) + +list_of_inputs = dict() + +app.layout = html.Div([ + html.H4("Add all lines in a file to a list"), + html.Div([ + dcc.Upload( + id="upload-data", + children=html.Div( + ["Drag and drop or click to select a file to upload."] + ), + + style={ + "width": "30%", + "height": "60px", + "lineHeight": "60px", + "borderWidth": "1px", + "borderStyle": "dashed", + "borderRadius": "5px", + "textAlign": "center", + "margin": "10px", + }), + + ]), + dcc.Checklist(id='input-checklist',options=list(),labelStyle = dict(display='block'),value=[]), + +]) + +@app.callback( + Output('input-checklist','options'), + Input('upload-data','filename'), + Input('upload-data','contents'), + State('input-checklist','options') +) +def update_input_list(uploaded_filenames,uploaded_file_contents,all_inputs): + if uploaded_file_contents is not None: + + + string = uploaded_file_contents + + #cutting the first part of the String away to decode + found = base64.b64decode(re.search(',(.+?)$', string).group(1)) + print(found.decode('utf-8')) + + uploaded_file_contents = found.decode('utf-8') + + + list_of_inputs = (uploaded_file_contents.split()) + #das hier sollte es untereinander anzeigen, bekomme ich allerdings nicht auf die Seite... + #return (*list_of_inputs, sep="\n") + + options = all_inputs + if not options: + options = list() + CurrValues = [x['value'] for x in options] + + + # würde auch funktionieren + # return (found.decode('utf-8')) + for i in list_of_inputs: + if i not in CurrValues: + options.append({'label':i, 'value':i}) + + + return options + +if __name__ == '__main__': + app.run_server(debug=True) + + diff --git a/verarbeitung/.gitignore b/verarbeitung/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b604f4fdf854363614dd5eb73cb2ab92a941ac64 --- /dev/null +++ b/verarbeitung/.gitignore @@ -0,0 +1,61 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +#CodeCounter + +.VSCodeCounter/ \ No newline at end of file diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py deleted file mode 100644 index 0dcc7391bd5a633a86841f6097f486017ae94dfa..0000000000000000000000000000000000000000 --- a/verarbeitung/Processing.py +++ /dev/null @@ -1,247 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Functions to generate a graph representing citations between multiple ACS/Nature journals - -""" - -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" -__email__ = "cis-project2021@zbh.uni-hamburg.de" -__status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" - -from bs4 import BeautifulSoup as bs -import requests as req -import sys -from pathlib import Path -from input_fj import input -from input_test import input_test_func -from json_demo import output_to_json - -# adds every publication from input list to graph structure -# doi_input_list: list of publication dois from user -def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): - references_pub_obj_list = [] - citations_pub_obj_list = [] - - for pub_doi in doi_input_list: - - #checks if its a test and chooses input function accordingly - if(test_var): - pub = input_test_func(pub_doi) - else: - pub = input(pub_doi) - - # checks if publication already exists in nodes - not_in_nodes = True - for node in nodes: # checks if a pub is already in nodes - if (pub.doi_url == node.doi_url): - not_in_nodes = False - break - if (not_in_nodes): - nodes.append(pub) - pub.group = "input" - else: - doi_input_list.remove(pub_doi) - - # inserts references as publication objects into list and - # inserts first depth references into nodes/edges if maximum search depth > 0 - for reference in create_graph_structure_references(pub, 0, search_depth_max, test_var): - references_pub_obj_list.append(reference) - - # inserts citations as publication objects into list and - # inserts first height citations into nodes if maximum search height > 0 - for citation in create_graph_structure_citations(pub, 0, search_height_max, test_var): - citations_pub_obj_list.append(citation) - - return(references_pub_obj_list, citations_pub_obj_list) - - -# adds edges between citation and reference group -def complete_inner_edges(test_var): - for node in nodes: - if (node.group == "depth"): - for citation in node.citations: - for cit in nodes: - if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges): - edges.append([citation.doi_url, node.doi_url]) - if (node.group == "height"): - for reference in node.references: - for ref in nodes: - if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges): - edges.append([node.doi_url,reference.doi_url]) - - - -# adds a node for every publication unknown -# adds edges for references between publications -def create_graph_structure_references(pub, search_depth, search_depth_max, test_var): - references_pub_obj_list = [] - for reference in pub.references: - not_in_nodes = True - for node in nodes: - # checks every reference for duplication - if (reference.doi_url == node.doi_url): - not_in_nodes = False - break - if (not_in_nodes): - if (search_depth < search_depth_max): - - #checks if its a test and chooses input function accordingly - if (test_var): - reference_pub_obj = input_test_func(reference.doi_url) - else: - reference_pub_obj = input(reference.doi_url) - - reference_pub_obj.group = "depth" - nodes.append(reference_pub_obj) - edges.append([pub.doi_url,reference_pub_obj.doi_url]) - references_pub_obj_list.append(reference_pub_obj) - - # adds edge only if citation already exists - elif [pub.doi_url,reference.doi_url] not in edges: - edges.append([pub.doi_url,reference.doi_url]) - return references_pub_obj_list - - -# recursive function to implement height-first-search on references -# references_pub_obj_list: input list of references as publication objects -# search_depth: current search_depth of height-first-search -# search_depth_max: maximal search_depth for dfs -def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): - # adds next level to nodes/edges - for pub in references_pub_obj_list: - new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var) - - # If the maximum height has not yet been reached, calls function recursivly with increased height - if (search_depth < search_depth_max): - process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var) - - - - -# adds a node for every publication unknown -# adds edges for citations between publications -def create_graph_structure_citations(pub, search_height, search_height_max, test_var): - citations_pub_obj_list = [] - for citation in pub.citations: - not_in_nodes = True - for node in nodes: - # checks every citation for duplication - if (citation.doi_url == node.doi_url): - not_in_nodes = False - break - if (not_in_nodes): - if (search_height < search_height_max): - - #checks if its a test and chooses input function accordingly - if (test_var): - citation_pub_obj = input_test_func(citation.doi_url) - else: - citation_pub_obj = input(citation.doi_url) - - citation_pub_obj.group = "height" - nodes.append(citation_pub_obj) - edges.append([citation_pub_obj.doi_url,pub.doi_url]) - citations_pub_obj_list.append(citation_pub_obj) - - # adds only edge if citation already exists - elif [citation.doi_url,pub.doi_url] not in edges: - edges.append([citation.doi_url,pub.doi_url]) - return citations_pub_obj_list - - - -# recursive function to implement height-first-search on citations -# citations_pub_obj_list: input list of citations as publication objects -# search_height: current search_height of height-first-search -# search_height_max: maximal search_height for dfs -def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var): - # adds next level to nodes/edges - for pub in citations_pub_obj_list: - new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var) - - # If the maximum height has not yet been reached, calls function recursivly with increased height - if (search_height < search_height_max): - process_citations_rec(new_citation_pub_obj_list, search_height+1, search_height_max, test_var) - - - - -# main function to call. Needs as input: -# doi_input_list: input list of dois -# search_height: max search height to process to -# search_depth: max search depth to process to -# test_var: only needed for unit test as True, default is False -def process_main(doi_input_list, search_height, search_depth, test_var = False): - # ERROR-Handling doi_array = NULL - if (len(doi_input_list) == 0): - print("Error, no input data") - - # ERROR- if a negative number is entered for height - if (search_height < 0): - print("Error, search_height of search must be positive") - - # ERROR- if a negative number is entered for depth - if (search_depth < 0): - print("Error, search_depth of search must be positive") - - # create empty array for the nodes - # create empty array for the edges - global nodes, edges - nodes = [] - edges = [] - - # initializes nodes/edges from input and gets a list with publication objects for citations and references returned - references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) - - # function calls to begin recursive processing up to max depth/height - process_citations_rec(citations_obj_list, 1, search_height, test_var) - process_references_rec(references_obj_list, 1, search_depth, test_var) - - # adds edges between reference group and citation group of known publications - complete_inner_edges(test_var) - - # calls a skript to save nodes and edges of graph in .json file - output_to_json(nodes,edges) - - # only for unit tests - if (test_var == True): - doi_nodes_list = [] - for node in nodes: - doi_nodes_list.append(node.doi_url) - return(doi_nodes_list, edges) - - - - -# a function to print nodes and edges from a graph -def print_graph(nodes, edges): - print("Knoten:\n") - for node in nodes: - print(node.title, "\n") - print("\nKanten:\n") - for edge in edges: - print(edge,"\n") - - -# program test, because there is no connection to UI yet. -def try_known_publications(): - doi_list = [] - doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - #arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') - #arr.append('https://doi.org/10.1021/acs.jcim.0c00741') - - #arr.append('https://doi.org/10.1021/ci700007b') - #arr.append('https://doi.org/10.1021/acs.jcim.5b00292') - #url = sys.argv[1] - #arr.append[url] - - - nodes,edges = process_main(doi_list,2,2) - - print_graph(nodes, edges) \ No newline at end of file diff --git a/verarbeitung/Processing_unittest.py b/verarbeitung/Processing_unittest.py deleted file mode 100644 index 772d57204ce3374211d1d1fd3d08d279f085aac3..0000000000000000000000000000000000000000 --- a/verarbeitung/Processing_unittest.py +++ /dev/null @@ -1,66 +0,0 @@ -import unittest -from Processing import process_main - -class ProcessingTest(unittest.TestCase): - def testCycle(self): - nodes, edges = process_main(['doiz1'],1,1,True) - self.assertCountEqual(nodes, ['doiz1', 'doiz2']) - self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) - - nodes, edges = process_main(['doiz1'],2,2,True) - self.assertCountEqual(nodes, ['doiz1', 'doiz2']) - self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) - - #def testBigCycle(self): - - #def testEmptyHeight(self): - - #def testEmptyDepth(self): - - def testEmptyDepthHeight(self): - nodes, edges = process_main(['doi1'],0,0,True) - self.assertCountEqual(nodes,['doi1']) - self.assertCountEqual(edges, []) - - nodes, edges = process_main(['doi1', 'doi2'],0,0,True) - self.assertCountEqual(nodes, ['doi1','doi2']) - self.assertCountEqual(edges, [['doi1', 'doi2']]) - - nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True) - self.assertCountEqual(nodes, ['doi1','doi2', 'doi3']) - self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) - - - def testInnerEdges(self): - nodes, edges = process_main(['doi_ie1'],1,1,True) - self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3']) - self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) - - def testRightHeight(self): - nodes, edges = process_main(['doi_h01'],1,0,True) - self.assertCountEqual(nodes,['doi_h01']) - self.assertCountEqual(edges, []) - - nodes, edges = process_main(['doi_h02'],1,0,True) - self.assertCountEqual(nodes,['doi_h02','doi_h1']) - self.assertCountEqual(edges, [['doi_h1','doi_h02']]) - - nodes, edges = process_main(['doi_h02'],2,0,True) - self.assertCountEqual(nodes,['doi_h02','doi_h1','doi_h2']) - self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) - - def testRightDepth(self): - nodes, edges = process_main(['doi_d01'],0,1,True) - self.assertCountEqual(nodes,['doi_d01']) - self.assertCountEqual(edges, []) - - nodes, edges = process_main(['doi_d02'],0,1,True) - self.assertCountEqual(nodes,['doi_d02','doi_d1']) - self.assertCountEqual(edges, [['doi_d02','doi_d1']]) - - nodes, edges = process_main(['doi_d02'],0,2,True) - self.assertCountEqual(nodes,['doi_d02','doi_d1','doi_d2']) - self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) - -if __name__ == "__main__": - unittest.main() \ No newline at end of file diff --git a/verarbeitung/README.md b/verarbeitung/README.md new file mode 100644 index 0000000000000000000000000000000000000000..10640cbd5243b9db6c9c84652690db567082c215 --- /dev/null +++ b/verarbeitung/README.md @@ -0,0 +1,61 @@ +# Projekt CiS-Projekt 2021/22 + +Processing-Package to generate theoretical graph for citations and references of given input publications. + +## Usage/Examples + +```python +from verarbeitung.process_main import Processing + + +def main(url_list): + Processing(url_list) +``` + +Grundlegender Prozess: +Es wird von der UI eine Liste an DOIs an die Verarbeitung übergeben und +diese wird dann umgewandelt in eine Knoten-und Kantenmenge, welche die Zitierungen darstellen. +Die Informationen über die Paper und die Zitierungen kommen von der Input Gruppe über den Aufruf +von der Funktion Publication. Die Knoten- und Kantenmengen werden in Form einer +Json Datei an den Output übergeben. + +## Files and functions in directory + + +get_pub_from_input.py: + +```python +def get_pub(pub_doi, test_var) +``` +- Gibt für eine DOI ein Klassenobjekt zurück, in dem alle nötigen Informationen gespeichert sind. + + +process_main.py: + +```python +def Processing(url_list) +``` +- Überprüft, ob bereits eine Json Datei existiert und ruft dann entweder die Funktion auf, um + einen neuen Graphen zu erstellen oder die Funktion um einen Vorhandenen zu updaten. + + +start.script.py: + + - Wird benötigt, um die Dateien ordnerübergreifend aufzurufen. Nur fürs interne Testen der + Funktionalität + + +<name>.json: + +- sind momentan Beispiele, die an den Output übergeben werden könnten. + +## Testing + +python -m unittest discover verarbeitung/test -v + +## Authors +- Donna Löding +- Alina Molkentin +- Xinyi Tang +- Judith Große +- Malte Schokolowski \ No newline at end of file diff --git a/verarbeitung/__init__.py b/verarbeitung/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/__pycache__/Processing.cpython-36.pyc b/verarbeitung/__pycache__/Processing.cpython-36.pyc deleted file mode 100644 index eb6d8a0418a1340b746f2f664997515622356d8a..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/Processing.cpython-38.pyc b/verarbeitung/__pycache__/Processing.cpython-38.pyc deleted file mode 100644 index 63ac529316c848e829cd83ef44ec749e5903bf9e..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/Processing.cpython-39.pyc b/verarbeitung/__pycache__/Processing.cpython-39.pyc deleted file mode 100644 index 54c63251bbf3affbdd176d3d55f4956c2fc08406..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc b/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc deleted file mode 100644 index 9ce1023e6ea54e1b04b37ad5a1fd08115d5f52a4..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_fj.cpython-36.pyc b/verarbeitung/__pycache__/input_fj.cpython-36.pyc deleted file mode 100644 index 04312c91f0a7675651e99a2a6c10a2c9da146758..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_fj.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_fj.cpython-38.pyc b/verarbeitung/__pycache__/input_fj.cpython-38.pyc deleted file mode 100644 index 515ab99c01a5ce78bb5bb6de554a4dae3ffe4b4b..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_fj.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_fj.cpython-39.pyc b/verarbeitung/__pycache__/input_fj.cpython-39.pyc deleted file mode 100644 index 175f9ebbfdf5f3313196b4f10aa01dc2e8e20509..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_fj.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_test.cpython-36.pyc b/verarbeitung/__pycache__/input_test.cpython-36.pyc deleted file mode 100644 index 85878d6d127d9d2bd5efe9130672d982bb70c5fa..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_test.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_test.cpython-38.pyc b/verarbeitung/__pycache__/input_test.cpython-38.pyc deleted file mode 100644 index df395212453392e135532b12396cd4c30a92ea05..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_test.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_test.cpython-39.pyc b/verarbeitung/__pycache__/input_test.cpython-39.pyc deleted file mode 100644 index 68e42fd6a47a02787524c68816a42574834931d2..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_test.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-36.pyc b/verarbeitung/__pycache__/json_demo.cpython-36.pyc deleted file mode 100644 index 04acef5f40630ee2c7b6e887e33dc740b5e16a74..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/json_demo.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc deleted file mode 100644 index 4a1e7ba987775a20fddaa4a8f846bb238670d6a1..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/json_demo.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc deleted file mode 100644 index 4e31ce337645d5282ddab11668bc6d745735f9f8..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/json_demo.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/unittest.cpython-36.pyc b/verarbeitung/__pycache__/unittest.cpython-36.pyc deleted file mode 100644 index 245eb7f9be9221daa930d9fa83c77368ba463af7..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/unittest.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/construct_new_graph/README.md b/verarbeitung/construct_new_graph/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d73f4ebe71ea67c80f38c8d0e923d6bd925b43f5 --- /dev/null +++ b/verarbeitung/construct_new_graph/README.md @@ -0,0 +1,29 @@ +# Projekt CiS-Projekt 2021/22 + +Directory for functions to create the fundamental graph structure at first time call of programm. + +## Files in directory + +initialize_graph.py + +- Führt den grundlegendem Graphbauprozess aus. Die Input-DOIs werden + als Klassenobjekt zur Knotenmenge hinzugefügt und über einen rekursiven Aufruf + wird die angegene Zitierungstiefe in beide Richtungen zu den Kanten hinzugefügt. + + +add_citations_rec.py + +- Die DOIs, die in den Zitierungen des Inputs zu finden sind, werden ebenfalls zu Knoten + und je nach angegebener Höhe oder Tiefe wird dies für weitere Tiefen erneut ausgeführt. + + +export_to_json.py + +- Wandelt die berechnete Knoten- und Kantenmenge in eine Json Datei um. + +## Authors +- Donna Löding +- Alina Molkentin +- Xinyi Tang +- Judith Große +- Malte Schokolowski \ No newline at end of file diff --git a/verarbeitung/construct_new_graph/__init__.py b/verarbeitung/construct_new_graph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py new file mode 100644 index 0000000000000000000000000000000000000000..a00d1f6fc200dd30493075561833079fca9b65df --- /dev/null +++ b/verarbeitung/construct_new_graph/add_citations_rec.py @@ -0,0 +1,187 @@ +# -*- coding: utf-8 -*- +""" +Functions to add citations recursivly for multiple ACS/Nature journals + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +from pathlib import Path +from os import error +sys.path.append("../") + +from input.publication import Publication +from verarbeitung.get_pub_from_input import get_pub + +def get_cit_type_list(pub, cit_type): + ''' + :param pub: Publication which citations will be added + :type pub: Publication + + :param cit_type: variable to differenciate citation and reference call + :type cit_type: String + + function to create nodes and edges and call create_graph_structure_citations + ''' + if (cit_type == "Citation"): + return(pub.citations) + else: + return(pub.references) + +def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var): + ''' + :param input_nodes: list of nodes from Processing + :type input_nodes: List[Publication] + + :param input_edges: list of edges from Processing + :type input_edges: List[String, String] + + :param pub: Publication which citations will be added + :type pub: Publication + + :param search_depth: current depth to search for citations + :type search_depth_max: int + + :param search_depth_max: maximum depth to search for citations + :type search_depth_max: int + + :param cit_type: variable to differenciate citation and reference call + :type cit_type: String + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to create nodes and edges and call create_graph_structure_citations + ''' + + global nodes, edges + nodes = input_nodes + edges = input_edges + + return create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var) + + +def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var): + ''' + :param pub: publication which citations will be added + :type pub: Publication + + :param search_depth: current depth to search for citations + :type search_depth_max: int + + :param search_depth_max: maximum depth to search for citations + :type search_depth_max: int + + :param cit_type: variable to differenciate citation and reference call + :type cit_type: String + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + adds a node for every citing publication unknown + adds edges to added citations + ''' + + citations_pub_obj_list = [] + for citation in get_cit_type_list(pub, cit_type): + not_in_nodes = True + for node in nodes: # checks every citation for duplication + if (citation.doi_url == node.doi_url): + not_in_nodes = False + break + if (not_in_nodes): + if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly + citation_pub_obj = get_pub(citation.doi_url, test_var) + if (type(citation_pub_obj) != Publication): + print(pub) + continue + + if (cit_type == "Citation"): + citation_pub_obj.group = search_depth + 1 + edges.append([citation_pub_obj.doi_url,pub.doi_url]) + else: + citation_pub_obj.group = -(search_depth + 1) + edges.append([pub.doi_url,citation_pub_obj.doi_url]) + nodes.append(citation_pub_obj) + citations_pub_obj_list.append(citation_pub_obj) + + # adds just the edge if citation already exists + else: + if (cit_type == "Citation"): + if ([citation.doi_url,pub.doi_url] not in edges): + edges.append([citation.doi_url,pub.doi_url]) + else: + if ([pub.doi_url,citation.doi_url] not in edges): + edges.append([pub.doi_url,citation.doi_url]) + return citations_pub_obj_list + + +def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var): + ''' + :param citations_pub_obj_list: list of publications which citations will be added + :type citations_pub_obj_list: List[Publication] + + :param search_depth: current depth to search for citations + :type search_depth_max: int + + :param search_depth_max: maximum depth to search for citations + :type search_depth_max: int + + :param cit_type: variable to differenciate citation and reference call + :type cit_type: String + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + recursive function to implement depth-first-search on citations + ''' + + # adds next level to nodes/edges + for pub in citations_pub_obj_list: + new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var) + + # If the maximum depth has not yet been reached, calls function recursivly with increased depth + if (search_depth < search_depth_max): + process_citations_rec(new_citation_pub_obj_list, search_depth+1, search_depth_max, cit_type, test_var) + + +def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var): + ''' + :param input_nodes: list of nodes from Processing + :type input_nodes: List[Publication] + + :param input_edges: list of edges from Processing + :type input_edges: List[String, String] + + :param citations_pub_obj_list: list of publications which citations will be added + :type citations_pub_obj_list: List[Publication] + + :param search_depth: current depth to search for citations + :type search_depth_max: int + + :param search_depth_max: maximum depth to search for citations + :type search_depth_max: int + + :param cit_type: variable to differenciate citation and reference call + :type cit_type: String + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to call recursive depth-first-search of citations + ''' + global nodes, edges + nodes = input_nodes + edges = input_edges + + process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var) + #return(nodes, edges) \ No newline at end of file diff --git a/verarbeitung/construct_new_graph/export_to_json.py b/verarbeitung/construct_new_graph/export_to_json.py new file mode 100644 index 0000000000000000000000000000000000000000..c69a61befcc402f9aee5b2184db56e0ad245618f --- /dev/null +++ b/verarbeitung/construct_new_graph/export_to_json.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +""" +Functions that format the computed graph to match the interface to the output-part and saves as a json file + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +import json + + +def format_nodes(nodes): + ''' + :param nodes: list of publications to export to json + :type nodes: List[Publication] + + creates a list that contains a dictionary for each node + ''' + list_of_node_dicts = list() + for node in nodes: + new_dict = dict() + new_dict["doi"] = node.doi_url + new_dict["name"] = node.title + new_dict["author"] = node.contributors + new_dict["year"] = node.publication_date + new_dict["journal"] = node.journal + if (node.group == 0): + new_dict["group"] = "Input" + elif (node.group > 0): + new_dict["group"] = "Citedby" + else: + new_dict["group"] = "Reference" + new_dict["depth"] = node.group + new_dict["citations"] = len(node.citations) + list_of_node_dicts.append(new_dict) + return list_of_node_dicts + +# creates a list that contains a disctionary for each edge +# the dictionaries contain the source as keys and the target as values +def format_edges(edges): + ''' + :param edges: list of links to export to json + :type edges: List[String,String] + + function to format links, append to list and return list to output_to_json + ''' + list_of_edge_dicts = list() + for edge in edges: + new_dict_2 = dict() + new_dict_2["source"] = edge[0] + new_dict_2["target"] = edge[1] + list_of_edge_dicts.append(new_dict_2) + return list_of_edge_dicts + + +def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False): + ''' + :param nodes: list of publications to export to json + :type nodes: List[Publication] + + :param edges: list of links to export to json + :type edges: List[String,String] + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to export nodes and links as a dictionary to json file + ''' + dict_of_all = dict() + list_of_node_dicts = format_nodes(nodes) + list_of_edge_dicts = format_edges(edges) + dict_of_all["nodes"] = list_of_node_dicts + dict_of_all["links"] = list_of_edge_dicts + if (test_var and json_file == 'json_text.json'): + with open('test_output.json','w') as outfile: + json.dump(dict_of_all, outfile) + else: + with open(json_file,'w') as outfile: + json.dump(dict_of_all, outfile) diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..ba86e8bc979de42c388c5e8fe0e4ced9282500f0 --- /dev/null +++ b/verarbeitung/construct_new_graph/initialize_graph.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +""" +Functions to generate a graph representing citations between multiple ACS/Nature journals + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +from pathlib import Path +from os import error +sys.path.append("../") + +from input.publication import Publication +from verarbeitung.get_pub_from_input import get_pub +from .export_to_json import output_to_json +from .add_citations_rec import add_citations, create_global_lists_cit + + +def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): + ''' + :param doi_input_list: input list of doi from UI + :type doi_input_list: List[String] + + :param search_depth_max: maximum depth to search for references + :type search_depth_max: int + + :param search_height_max: maximum height to search for citations + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + adds input dois to nodes and retrieves citations and references for input publications + ''' + + # saves found citations and references in lists + references_pub_obj_list = [] + citations_pub_obj_list = [] + + for pub_doi in doi_input_list: #iterates over every incoming doi + pub = get_pub(pub_doi, test_var) + if (type(pub) != Publication): + print(pub) + continue + + # checks if publication already exists in nodes + not_in_nodes = True #boolean value to check if a node already exists in the set of nodes + for node in nodes: #iterates over every node in the set of nodes + if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set + not_in_nodes = False #false --> node will not be created + break + if (not_in_nodes): #there is no node with this doi in the set + nodes.append(pub) #appends Publication Object + pub.group = 0 + else: + doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list + + # inserts references as publication objects into list and + # inserts first depth references into nodes/edges if maximum search depth > 0 + for reference in create_global_lists_cit(nodes, edges, pub, 0, search_depth_max, "Reference", test_var): + references_pub_obj_list.append(reference) + + # inserts citations as publication objects into list and + # inserts first height citations into nodes if maximum search height > 0 + for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, "Citation", test_var): + citations_pub_obj_list.append(citation) + + return(references_pub_obj_list, citations_pub_obj_list) + + + +def complete_inner_edges(): + ''' + completes inner edges between nodes of group height and depth + ''' + + for node in nodes: + if (node.group < 0): + for citation in node.citations: + for pub in nodes: + if ((pub.doi_url == citation.doi_url) and ([citation.doi_url, node.doi_url] not in edges)): + edges.append([citation.doi_url, node.doi_url]) + if (node.group > 0): + for reference in node.references: + for pub in nodes: + if ((pub.doi_url == reference.doi_url) and ([node.doi_url, reference.doi_url] not in edges)): + edges.append([node.doi_url,reference.doi_url]) + + +def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False): + ''' + :param doi_input_list: input list of doi from UI + :type doi_input_list: List[String] + + :param search_height: maximum height to search for citations + :type search_height: int + + :param search_depth: maximum depth to search for references + :type search_depth: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + main function to start graph generation + ''' + + # ERROR-Handling doi_array = NULL + if (len(doi_input_list) == 0): + print("Error, no input data") + + # ERROR- if a negative number is entered for height + if (search_height < 0): + print("Error, search_height of search must be positive") + + # ERROR- if a negative number is entered for depth + if (search_depth < 0): + print("Error, search_depth of search must be positive") + + + # creates empty lists to save nodes and edges + global nodes, edges + nodes = [] + edges = [] + + # initializes nodes/edges from input and gets a list with publication objects for citations and references returned + references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) + + # function calls to begin recursive processing up to max depth/height + add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var) + add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var) + + # adds edges between reference group and citation group of known publications + complete_inner_edges() + + # calls a skript to save nodes and edges of graph in .json file + #output_to_json(nodes, edges, test_var) + + return(nodes,edges) diff --git a/verarbeitung/dev_files/README.md b/verarbeitung/dev_files/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53ccd029db552570862ab3eef82313bcb43192f1 --- /dev/null +++ b/verarbeitung/dev_files/README.md @@ -0,0 +1 @@ +Dieser Ordner ist nur für uns intern, um Testläufe mir echten DOIs zu starten. \ No newline at end of file diff --git a/verarbeitung/dev_files/__init__.py b/verarbeitung/dev_files/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/dev_files/print_graph_test.py b/verarbeitung/dev_files/print_graph_test.py new file mode 100644 index 0000000000000000000000000000000000000000..b45e90956fd3535c08fafa5196c3b2f351985d13 --- /dev/null +++ b/verarbeitung/dev_files/print_graph_test.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +""" +Functions to test and print the nodes and edges sets + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys + +#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +sys.path.append("../../") +from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction +from verarbeitung.update_graph.import_from_json import input_from_json +from verarbeitung.update_graph.update_graph import update_graph + +# a function to print nodes and edges from a graph +def print_graph(nodes, edges): + print("Knoten:\n") + for node in nodes: + print(node.title, "\n") + print("\nKanten:\n") + for edge in edges: + print(edge,"\n") + print(len(nodes)) + print(len(edges)) + print(" ") + +def print_extended_graph(nodes, edges): + print("Knoten:\n") + for node in nodes: + print(node.title, "\n") + print(node.doi_url) + for reference in node.references: + print(reference.doi_url) + for citation in node.citations: + print(citation.doi_url) + print("\nKanten:\n") + for edge in edges: + print(edge,"\n") + print(len(nodes)) + print(len(edges)) + print(" ") + +def print_simple(nodes, edges): + # for node in nodes: + # print(node) + # for edge in edges: + # print(edge) + print(len(nodes)) + print(len(edges)) + print(" ") + +# program test with some random dois +def try_known_publications(): + doi_list = [] + doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + #doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') + doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') + #arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') + #arr.append('https://doi.org/10.1021/acs.jcim.0c00741') + + #arr.append('https://doi.org/10.1021/ci700007b') + #doi_list.append('https://doi.org/10.1021/acs.jcim.5b00292') + + #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.0c00675') + #url = sys.argv[1] + #arr.append[url] + + + nodes, edges = init_graph_construction(doi_list,2,2) + + print_graph(nodes, edges) + + return(nodes, edges) + +def try_delete_nodes(): + doi_list = [] + doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') + nodes, edges = init_graph_construction(doi_list,1,1) + #print_simple(nodes, edges) + + # list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json') + # doi_list = [] + # doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + # valid_nodes, valid_edges = update_graph(doi_list, list_of_nodes_py, list_of_edges_py) + # print_simple(valid_nodes, valid_edges) + +def try_import(): + nodes, edges = input_from_json('json_text.json') + print_extended_graph(nodes,edges) + +#nodes, edges = try_known_publications() +#nodes_new, edges_new = input_from_json("json_text.json") +#print_graph(nodes_new, edges_new) +try_delete_nodes() + +#try_import() \ No newline at end of file diff --git a/verarbeitung/get_pub_from_input.py b/verarbeitung/get_pub_from_input.py new file mode 100644 index 0000000000000000000000000000000000000000..2766ba836e2ae807c0950ac60389a4f620c39ba5 --- /dev/null +++ b/verarbeitung/get_pub_from_input.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +""" +A function to return an object of Type Publication for a given doi + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +from pathlib import Path +sys.path.append("../") + +from input.interface import InputInterface as Input +from verarbeitung.test.input_test import input_test_func + + +def get_pub(pub_doi, test_var): + ''' + :param pub_doi: input doi to get Publication object for + :type pub_doi: String + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to return an object of type Publication for given input doi depending on whether its a test or url doi + ''' + #checks if it's a test and chooses appropiate function + if(test_var): + pub = input_test_func(pub_doi) + + #checks that it isnt a test and chooses standart-input function + else: + inter = Input() + try: + pub = inter.get_publication(pub_doi) #creates an object of class Publication + except AttributeError: + pub = inter.get_publication(pub_doi) + except ValueError: + return(ValueError) + except IndexError: + return(IndexError) + return(pub) \ No newline at end of file diff --git a/verarbeitung/input_test.py b/verarbeitung/input_test.py deleted file mode 100644 index 44361c4b095f1c4fb0fce1868498d0e9da32f551..0000000000000000000000000000000000000000 --- a/verarbeitung/input_test.py +++ /dev/null @@ -1,82 +0,0 @@ -class Publication: - def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - if references is None: - self.references = [] - else: - self.references = ref(references) - if citations is None: - self.citations = [] - else: - self.citations = cit(citations) - self.group = group - - -class Citation: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - -class Reference: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - -def input_test_func(pub_doi): - for array in list_of_arrays: - if pub_doi == array[0]: - pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7]) - return pub - - -def cit(list_doi): - cits = [] - for doi_url in list_doi: - for array in list_of_arrays: - if doi_url == array[0]: - cits.append(Citation(array[0], array[1], array[2], array[3], array[4])) - return cits - -def ref(list_doi): - refs = [] - for doi_url in list_doi: - for array in list_of_arrays: - if doi_url == array[0]: - refs.append(Citation(array[0], array[1], array[2], array[3], array[4])) - return refs - - -beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['doi2'], ['doi3'], ''] -beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', [], ['doi1'], ''] -beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], [], ''] - -zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], ''] -zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], ''] - -inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['doi_ie2'], ['doi_ie3'], ''] -inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], ''] -inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], ''] - -right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', [], [], ''] -right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', [], ['doi_h1'], ''] -right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', [], ['doi_h2'], ''] -right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', [], ['doi_h3'], ''] -right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', [], [], ''] - -right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', [], [], ''] -right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['doi_d1'], [], ''] -right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['doi_d2'], [], ''] -right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['doi_d3'], [], ''] -right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', [], [], ''] - -list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3] diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py deleted file mode 100644 index b9f618d1a2dcac13ca51a530f365d40aa226bc11..0000000000000000000000000000000000000000 --- a/verarbeitung/json_demo.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -import json -from input_fj import input - -""" -Functions that format the computed graph to match the interface to the output-part - -""" - -# creates a list that contains a dictionary for each node -# the dictionaries store the values for the attributes -def format_nodes(V): - list_of_node_dicts = list() - for node in V: - new_dict = dict() - new_dict["name"] = node.title - new_dict["author"] = node.contributors - new_dict["year"] = node.publication_date - new_dict["journal"] = node.journal - new_dict["doi"] = node.doi_url - new_dict["group"] = node.group - list_of_node_dicts.append(new_dict) - return list_of_node_dicts - -# creates a list that contains a disctionary for each edge -# the dictionaries contain the source as keys and the target as values -def format_edges(E): - list_of_edge_dicts = list() - for edge in E: - new_dict_2 = dict() - new_dict_2["source"] = edge[0] - new_dict_2["target"] = edge[1] - list_of_edge_dicts.append(new_dict_2) - return list_of_edge_dicts - -# combine the lists of nodes and edges to a dictionary and saves it to a json file -def output_to_json(V,E): - dict_of_all = dict() - list_of_node_dicts = format_nodes(V) - list_of_edge_dicts = format_edges(E) - dict_of_all["nodes"] = list_of_node_dicts - dict_of_all["links"] = list_of_edge_dicts - with open('json_text.json','w') as outfile: - json.dump(dict_of_all, outfile) - -#knoten = ["doi1", "doi2", "doi3"] -#kanten = [[1,2],[3,4],[5,6]] -#output_to_json(knoten,kanten) - diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json new file mode 100644 index 0000000000000000000000000000000000000000..aeb1ae04b7931c7e81e6ea73efcc52fd41dc20b0 --- /dev/null +++ b/verarbeitung/json_text.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 1, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 12}, {"doi": "https://doi.org/10.1021/acs.jcim.1c00203", "name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 1}, {"doi": "https://doi.org/10.1021/acs.jctc.0c01006", "name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 14}, {"doi": "https://doi.org/10.1021/acs.jctc.9b00557", "name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 9}, {"doi": "https://doi.org/10.1021/ci300604z", "name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jctc.5b00834", "name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 48}, {"doi": "https://doi.org/10.1021/ci700036j", "name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 32}, {"doi": "https://doi.org/10.1021/ci500209e", "name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jm2005145", "name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/ci300399w", "name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jp9723574", "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jp972358w", "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00675", "name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 26}, {"doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 12}, {"doi": "https://doi.org/10.1021/jm300687e", "name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.8b00312", "name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 74}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00778", "name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 66}, {"doi": "https://doi.org/10.1021/jm0306430", "name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 97}, {"doi": "https://doi.org/10.1021/jm020406h", "name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/ci300493w", "name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 99}, {"doi": "https://doi.org/10.1021/jm049314d", "name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acsomega.1c04320", "name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jpcb.1c08383", "name": "Molecular Simulations of Aqueous Electrolytes: Role of Explicit Inclusion of Charge Transfer into Force Fields", "author": ["Max L. Berkowitz"], "year": "November 22, 2021", "journal": "Journal of Physical Chemistry B", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jpca.1c06264", "name": "Topological Characterization and Graph Entropies of Tessellations of Kekulene Structures: Existence of Isentropic Structures and Applications to Thermochemistry, Nuclear Magnetic Resonance, and Electron Spin Resonance", "author": ["S. Ruth Julie Kavitha", "Jessie Abraham", "Micheal Arockiaraj", "Joseph Jency", "Krishnan Balasubramanian"], "year": "September 1, 2021", "journal": "J. Phys. Chem. A", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", "name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", "author": ["Ilaria Proietti Silvestri", "Paul J. J. Colbon"], "year": "July 16, 2021", "journal": "ACS Med. Chem. Lett.", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", "name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", "author": ["Paul D. Leeson", "A. Patricia Bento", "Anna Gaulton", "Anne Hersey", "Emma J. Manners", "Chris J. Radoux", "Andrew R. Leach"], "year": "May 13, 2021", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jcim.1c00226", "name": "Automatic Identification of Lansoprazole Degradants under Stress Conditions by LC-HRMS with MassChemSite and WebChembase", "author": ["Stefano Bonciarelli", "Jenny Desantis", "Laura Goracci", "Lydia Siragusa", "Ismael Zamora", "Elisabeth Ortega-Carrasco"], "year": "June 1, 2021", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.chemrestox.0c00006", "name": "Computational Approaches to Identify Structural Alerts and Their Applications in Environmental Toxicology and Drug Discovery", "author": ["Hongbin Yang", "Chaofeng Lou", "Weihua Li", "Guixia Liu", "Yun Tang"], "year": "February 24, 2020", "journal": "Chem. Res. Toxicol.", "group": "Citedby", "depth": 2, "citations": 11}, {"doi": "https://doi.org/10.1021/acs.est.9b06379", "name": "Toward a Global Understanding of Chemical Pollution: A First Comprehensive Analysis of National and Regional Chemical Inventories", "author": ["Zhanyun Wang", "Glen W. Walker", "Derek C. G. Muir", "Kakuko Nagatani-Yoshida"], "year": "January 22, 2020", "journal": "Environ. Sci. Technol.", "group": "Citedby", "depth": 2, "citations": 100}, {"doi": "https://doi.org/10.1021/ci049714+", "name": "ZINC \u2212 A Free Database of Commercially Available Compounds for Virtual Screening", "author": ["John J. Irwin", "Brian K. Shoichet"], "year": "December 14, 2004", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 98}, {"doi": "https://doi.org/10.1021/ci3001277", "name": "ZINC: A Free Tool to Discover Chemistry for Biology", "author": ["John J. Irwin", "Teague Sterling", "Michael M. Mysinger", "Erin S. Bolstad", "Ryan G. Coleman"], "year": "May 15, 2012", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.5b00559", "name": "ZINC 15 \u2013 Ligand Discovery for Everyone", "author": ["Teague Sterling", "John J. Irwin"], "year": "October 19, 2015", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 98}, {"doi": "https://doi.org/10.1021/ci7004498", "name": "Application of Belief Theory to Similarity Data Fusion for Use in Analog Searching and Lead Hopping", "author": ["Steven W. Muchmore", "Derek A. Debe", "James T. Metz", "Scott P. Brown", "Yvonne C. Martin", "Philip J. Hajduk"], "year": "April 17, 2008", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/jm020155c", "name": "Do Structurally Similar Molecules Have Similar Biological Activity?", "author": ["Yvonne C. Martin", "James L. Kofron", "Linda M. Traphagen"], "year": "August 13, 2002", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/jm9602928", "name": "The Properties of Known Drugs. 1. Molecular Frameworks", "author": ["Guy W. Bemis", "Mark A. Murcko"], "year": "July 19, 1996", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/ci025599w", "name": "Molecular Shape Diversity of Combinatorial Libraries:\u2009 A Prerequisite for Broad Bioactivity\u2020", "author": ["Wolfgang H. B. Sauer", "Matthias K. Schwarz"], "year": "March 14, 2003", "journal": "J. Chem. Inf. Comput. Sci.", "group": "Reference", "depth": -2, "citations": 99}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acs.jpcb.1c08383", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jpca.1c06264", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jmedchem.1c00416", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00226", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.chemrestox.0c00006", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.est.9b06379", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/acs.jcim.5b00559"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci7004498"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm9602928"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/ci7004498", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/ci025599w"}]} \ No newline at end of file diff --git a/verarbeitung/new_height.json b/verarbeitung/new_height.json new file mode 100644 index 0000000000000000000000000000000000000000..f96362a05cea7ad954fa28bfc22074e15e9fa1cd --- /dev/null +++ b/verarbeitung/new_height.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}]} \ No newline at end of file diff --git a/verarbeitung/process_main.py b/verarbeitung/process_main.py new file mode 100644 index 0000000000000000000000000000000000000000..4e0678386b6da3ee18d4a0f7b0c271f3167c93fd --- /dev/null +++ b/verarbeitung/process_main.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +""" +main function to call to generate a graph representing citations between multiple ACS/Nature journals + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +from pathlib import Path +from os import error + +sys.path.append("../") + +from verarbeitung.construct_new_graph.export_to_json import output_to_json +from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction +from verarbeitung.update_graph.update_graph import update_graph + +def Processing(url_list, search_depth, search_height, json_file = 'json_text.json'): + ''' + :param url_list: list of urls to construct publication graph for + :type url_list: List[String] + + :param search_depth: maximum depth to search for references + :type search_depth: int + + :param search_height: maximum height to search for citations + :type search_height: int + + :param json_file: file to export graph to + :type json_file: String + + main function to construct new or updated publication graphs + ''' + + # updates graph if json file is known in directory otherwise starts new graph construction + try: + with open(json_file) as f: + nodes, edges = update_graph(url_list, json_file, search_depth, search_height) + + except IOError: + nodes, edges = init_graph_construction(url_list, search_depth, search_height) + + # exports graph to given json file name + output_to_json(nodes, edges, json_file) + \ No newline at end of file diff --git a/verarbeitung/start_script.py b/verarbeitung/start_script.py new file mode 100644 index 0000000000000000000000000000000000000000..71c0e8dadfc0a736f2e465fd78c56741631dacd2 --- /dev/null +++ b/verarbeitung/start_script.py @@ -0,0 +1,11 @@ +import sys +from pathlib import Path +from verarbeitung.process_main import Processing +from verarbeitung.dev_files.print_graph_test import try_known_publications, try_delete_nodes + + +doi_list = [] +doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') +#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') +doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') +Processing(doi_list, 2, 2, 'test.json') \ No newline at end of file diff --git a/verarbeitung/test.json b/verarbeitung/test.json new file mode 100644 index 0000000000000000000000000000000000000000..aeb1ae04b7931c7e81e6ea73efcc52fd41dc20b0 --- /dev/null +++ b/verarbeitung/test.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 1, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 12}, {"doi": "https://doi.org/10.1021/acs.jcim.1c00203", "name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 1}, {"doi": "https://doi.org/10.1021/acs.jctc.0c01006", "name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 14}, {"doi": "https://doi.org/10.1021/acs.jctc.9b00557", "name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 9}, {"doi": "https://doi.org/10.1021/ci300604z", "name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jctc.5b00834", "name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 48}, {"doi": "https://doi.org/10.1021/ci700036j", "name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 32}, {"doi": "https://doi.org/10.1021/ci500209e", "name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jm2005145", "name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/ci300399w", "name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jp9723574", "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jp972358w", "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00675", "name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 26}, {"doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 12}, {"doi": "https://doi.org/10.1021/jm300687e", "name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.8b00312", "name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 74}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00778", "name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 66}, {"doi": "https://doi.org/10.1021/jm0306430", "name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 97}, {"doi": "https://doi.org/10.1021/jm020406h", "name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/ci300493w", "name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 99}, {"doi": "https://doi.org/10.1021/jm049314d", "name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acsomega.1c04320", "name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jpcb.1c08383", "name": "Molecular Simulations of Aqueous Electrolytes: Role of Explicit Inclusion of Charge Transfer into Force Fields", "author": ["Max L. Berkowitz"], "year": "November 22, 2021", "journal": "Journal of Physical Chemistry B", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jpca.1c06264", "name": "Topological Characterization and Graph Entropies of Tessellations of Kekulene Structures: Existence of Isentropic Structures and Applications to Thermochemistry, Nuclear Magnetic Resonance, and Electron Spin Resonance", "author": ["S. Ruth Julie Kavitha", "Jessie Abraham", "Micheal Arockiaraj", "Joseph Jency", "Krishnan Balasubramanian"], "year": "September 1, 2021", "journal": "J. Phys. Chem. A", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", "name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", "author": ["Ilaria Proietti Silvestri", "Paul J. J. Colbon"], "year": "July 16, 2021", "journal": "ACS Med. Chem. Lett.", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", "name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", "author": ["Paul D. Leeson", "A. Patricia Bento", "Anna Gaulton", "Anne Hersey", "Emma J. Manners", "Chris J. Radoux", "Andrew R. Leach"], "year": "May 13, 2021", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jcim.1c00226", "name": "Automatic Identification of Lansoprazole Degradants under Stress Conditions by LC-HRMS with MassChemSite and WebChembase", "author": ["Stefano Bonciarelli", "Jenny Desantis", "Laura Goracci", "Lydia Siragusa", "Ismael Zamora", "Elisabeth Ortega-Carrasco"], "year": "June 1, 2021", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.chemrestox.0c00006", "name": "Computational Approaches to Identify Structural Alerts and Their Applications in Environmental Toxicology and Drug Discovery", "author": ["Hongbin Yang", "Chaofeng Lou", "Weihua Li", "Guixia Liu", "Yun Tang"], "year": "February 24, 2020", "journal": "Chem. Res. Toxicol.", "group": "Citedby", "depth": 2, "citations": 11}, {"doi": "https://doi.org/10.1021/acs.est.9b06379", "name": "Toward a Global Understanding of Chemical Pollution: A First Comprehensive Analysis of National and Regional Chemical Inventories", "author": ["Zhanyun Wang", "Glen W. Walker", "Derek C. G. Muir", "Kakuko Nagatani-Yoshida"], "year": "January 22, 2020", "journal": "Environ. Sci. Technol.", "group": "Citedby", "depth": 2, "citations": 100}, {"doi": "https://doi.org/10.1021/ci049714+", "name": "ZINC \u2212 A Free Database of Commercially Available Compounds for Virtual Screening", "author": ["John J. Irwin", "Brian K. Shoichet"], "year": "December 14, 2004", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 98}, {"doi": "https://doi.org/10.1021/ci3001277", "name": "ZINC: A Free Tool to Discover Chemistry for Biology", "author": ["John J. Irwin", "Teague Sterling", "Michael M. Mysinger", "Erin S. Bolstad", "Ryan G. Coleman"], "year": "May 15, 2012", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.5b00559", "name": "ZINC 15 \u2013 Ligand Discovery for Everyone", "author": ["Teague Sterling", "John J. Irwin"], "year": "October 19, 2015", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 98}, {"doi": "https://doi.org/10.1021/ci7004498", "name": "Application of Belief Theory to Similarity Data Fusion for Use in Analog Searching and Lead Hopping", "author": ["Steven W. Muchmore", "Derek A. Debe", "James T. Metz", "Scott P. Brown", "Yvonne C. Martin", "Philip J. Hajduk"], "year": "April 17, 2008", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/jm020155c", "name": "Do Structurally Similar Molecules Have Similar Biological Activity?", "author": ["Yvonne C. Martin", "James L. Kofron", "Linda M. Traphagen"], "year": "August 13, 2002", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/jm9602928", "name": "The Properties of Known Drugs. 1. Molecular Frameworks", "author": ["Guy W. Bemis", "Mark A. Murcko"], "year": "July 19, 1996", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/ci025599w", "name": "Molecular Shape Diversity of Combinatorial Libraries:\u2009 A Prerequisite for Broad Bioactivity\u2020", "author": ["Wolfgang H. B. Sauer", "Matthias K. Schwarz"], "year": "March 14, 2003", "journal": "J. Chem. Inf. Comput. Sci.", "group": "Reference", "depth": -2, "citations": 99}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acs.jpcb.1c08383", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jpca.1c06264", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jmedchem.1c00416", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00226", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.chemrestox.0c00006", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.est.9b06379", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/acs.jcim.5b00559"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci7004498"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm9602928"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/ci7004498", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/ci025599w"}]} \ No newline at end of file diff --git a/verarbeitung/test/README.md b/verarbeitung/test/README.md new file mode 100644 index 0000000000000000000000000000000000000000..79afed64d1efe590ebf60814882959c67c60de0e --- /dev/null +++ b/verarbeitung/test/README.md @@ -0,0 +1,27 @@ +# Projekt CiS-Projekt 2021/22 + +Directory to contain unittests for construction and update of publication graph + +## Files in directory + +input_test.py + +- Immitiert die Arbeit der Input Gruppe auf eine sehr einfache Weise. + Beispielhafte Informationen werden aus Strings herausgelesen und als Klassenobjekt gespeichert. + +construct_graph_unittest.py + +- Führt diverse Tests zur Konstruktion des Graphen ohne Vorkenntnisse mit eigenen Beispielen und + unserer Input_test Funktion aus. + +update_graph_unittest.py + +- Führt diverse Tests zum Updaten eines alten Graphs mit aktualisierter Input Liste mit eigenen + Beispielen und unserer Input_test Funktion aus. + +## Authors +- Donna Löding +- Alina Molkentin +- Xinyi Tang +- Judith Große +- Malte Schokolowski \ No newline at end of file diff --git a/verarbeitung/test/__init__.py b/verarbeitung/test/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/test/construct_graph_unittest.py b/verarbeitung/test/construct_graph_unittest.py new file mode 100644 index 0000000000000000000000000000000000000000..13f201c03a32d13d3364b5bd9af22c3e0efbebdc --- /dev/null +++ b/verarbeitung/test/construct_graph_unittest.py @@ -0,0 +1,102 @@ +import unittest + +import sys +from pathlib import Path +sys.path.append("../") + +from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction + +class ConstructionTest(unittest.TestCase): + maxDiff = None + + + def testCycle(self): + nodes, edges = init_graph_construction(['doiz1'],1,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) + self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) + + nodes, edges = init_graph_construction(['doiz1'],2,2,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) + self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) + + #def testBigCycle(self): + + #def testEmptyHeight(self): + + #def testEmptyDepth(self): + + def testEmptyDepthHeight(self): + nodes, edges = init_graph_construction(['doi1'],0,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi1']) + self.assertCountEqual(edges, []) + + nodes, edges = init_graph_construction(['doi1', 'doi2'],0,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1','doi2']) + self.assertCountEqual(edges, [['doi1', 'doi2']]) + + nodes, edges = init_graph_construction(['doi1', 'doi2', 'doi3'],0,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3']) + self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) + + + def testInnerEdges(self): + nodes, edges = init_graph_construction(['doi_ie1'],1,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3']) + self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) + + def testRightHeight(self): + nodes, edges = init_graph_construction(['doi_h01'],1,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h01']) + self.assertCountEqual(edges, []) + + nodes, edges = init_graph_construction(['doi_h02'],1,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h02','doi_h1']) + self.assertCountEqual(edges, [['doi_h1','doi_h02']]) + + nodes, edges = init_graph_construction(['doi_h02'],2,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2']) + self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) + + def testRightDepth(self): + nodes, edges = init_graph_construction(['doi_d01'],0,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d01']) + self.assertCountEqual(edges, []) + + nodes, edges = init_graph_construction(['doi_d02'],0,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d02','doi_d1']) + self.assertCountEqual(edges, [['doi_d02','doi_d1']]) + + nodes, edges = init_graph_construction(['doi_d02'],0,2,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2']) + self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) + + + + +def keep_only_dois(nodes): + ''' + :param nodes: input list of nodes of type Publication + :type nodes: List[Publication] + + gets nodes of type pub and return only their doi + ''' + doi_list = [] + for node in nodes: + doi_list.append(node.doi_url) + return doi_list + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/verarbeitung/test/input_test.py b/verarbeitung/test/input_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a701f7e294ccbda42f29973078921a3b330b948b --- /dev/null +++ b/verarbeitung/test/input_test.py @@ -0,0 +1,104 @@ +import sys +sys.path.append("../") + +from input.publication import Publication, Citation + + +def input_test_func(pub_doi): + ''' + :param pub_doi: pub doi to find publication in list_of_arrays + :type pub_doi: String + + returns the publication class for given doi + ''' + + for array in list_of_arrays: + if pub_doi == array[0]: + pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], [], []) + pub.citations = cit(array[7], "Citation") + pub.references = cit(array[6], "Reference") + return pub + + +def cit(list_doi, cit_type): + ''' + :param list_doi list of citation dois to get their Citation Class + :type list_doi: List[String] + + returns a list of citations objects for given doi list + ''' + + cits = [] + for doi_url in list_doi: + for array in list_of_arrays: + if doi_url == array[0]: + cits.append(Citation(array[0], array[1], array[2], array[3], cit_type)) + return cits + + + +beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']] +beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']] +beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []] + +zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['subjectz1'], ['doiz2'], ['doiz2']] +zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['subjectz1'], ['doiz1'], ['doiz1']] + +inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['subject_ie1'], ['doi_ie2'], ['doi_ie3']] +inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', ['subject_ie2'], [], ['doi_ie1','doi_ie3']] +inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['subject_ie3'], ['doi_ie1','doi_ie2'], []] + +right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', ['subject_h01'], [], []] +right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', ['subject_h02'], [], ['doi_h1']] +right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', ['subject_h1'], [], ['doi_h2']] +right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', ['subject_h2'], [], ['doi_h3']] +right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', ['subject_h3'], [], []] + +right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', ['subject_d01'], [], []] +right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['subject_d01'], ['doi_d1'], []] +right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['subject_d1'], ['doi_d2'], []] +right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['subject_d2'], ['doi_d3'], []] +right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', ['subject_d3'], [], []] + +large_graph_1_h21 = ['doi_lg_1_h21', 'title_lg_1_h21', ['contributor_lg_1_h21'], 'journal_lg_1_h21', 'date_lg_1_h21', ['subject_lg_1_h21'], ['doi_lg_1_h11'], []] +large_graph_1_h22 = ['doi_lg_1_h22', 'title_lg_1_h22', ['contributor_lg_1_h22'], 'journal_lg_1_h22', 'date_lg_1_h22', ['subject_lg_1_h22'], ['doi_lg_1_h11','doi_lg_1_h12'], []] +large_graph_1_h23 = ['doi_lg_1_h23', 'title_lg_1_h23', ['contributor_lg_1_h23'], 'journal_lg_1_h23', 'date_lg_1_h23', ['subject_lg_1_h23'], ['doi_lg_1_h12','doi_cg_i'], []] +large_graph_1_h11 = ['doi_lg_1_h11', 'title_lg_1_h11', ['contributor_lg_1_h11'], 'journal_lg_1_h11', 'date_lg_1_h11', ['subject_lg_1_h11'], ['doi_lg_1_i'], ['doi_lg_1_h21','doi_lg_1_h22']] +large_graph_1_h12 = ['doi_lg_1_h12', 'title_lg_1_h12', ['contributor_lg_1_h12'], 'journal_lg_1_h12', 'date_lg_1_h12', ['subject_lg_1_h12'], ['doi_lg_1_i','doi_lg_1_d12'], ['doi_lg_1_h22','doi_lg_1_h23']] +large_graph_1_i = ['doi_lg_1_i' , 'title_lg_1_i' , ['contributor_lg_1_i'] , 'journal_lg_1_i' , 'date_lg_1_i' , ['subject_lg_1_i'] , ['doi_lg_1_d11','doi_lg_1_d12'], ['doi_lg_1_h11','doi_lg_1_h12']] +large_graph_1_d11 = ['doi_lg_1_d11', 'title_lg_1_d11', ['contributor_lg_1_d11'], 'journal_lg_1_d11', 'date_lg_1_d11', ['subject_lg_1_d11'], ['doi_lg_1_d21','doi_lg_1_d22'], ['doi_lg_1_i']] +large_graph_1_d12 = ['doi_lg_1_d12', 'title_lg_1_d12', ['contributor_lg_1_d12'], 'journal_lg_1_d12', 'date_lg_1_d12', ['subject_lg_1_d12'], ['doi_lg_1_d23'], ['doi_lg_1_h12','doi_lg_1_i']] +large_graph_1_d21 = ['doi_lg_1_d21', 'title_lg_1_d21', ['contributor_lg_1_d21'], 'journal_lg_1_d21', 'date_lg_1_d21', ['subject_lg_1_d21'], ['doi_lg_1_d22'], ['doi_lg_1_d11','doi_lg_1_d22']] +large_graph_1_d22 = ['doi_lg_1_d22', 'title_lg_1_d22', ['contributor_lg_1_d22'], 'journal_lg_1_d22', 'date_lg_1_d22', ['subject_lg_1_d22'], ['doi_lg_1_d21'], ['doi_lg_1_d11','doi_lg_1_d21']] +large_graph_1_d23 = ['doi_lg_1_d23', 'title_lg_1_d23', ['contributor_lg_1_d23'], 'journal_lg_1_d23', 'date_lg_1_d23', ['subject_lg_1_d23'], [], ['doi_lg_1_d12','doi_cg_d11']] + +large_graph_2_h21 = ['doi_lg_2_h21', 'title_lg_2_h21', ['contributor_lg_2_h21'], 'journal_lg_2_h21', 'date_lg_2_h21', ['subject_lg_2_h21'], ['doi_lg_2_h11'], []] +large_graph_2_h22 = ['doi_lg_2_h22', 'title_lg_2_h22', ['contributor_lg_2_h22'], 'journal_lg_2_h22', 'date_lg_2_h22', ['subject_lg_2_h22'], ['doi_lg_2_h11'], []] +large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], 'journal_lg_2_h23', 'date_lg_2_h23', ['subject_lg_2_h23'], ['doi_lg_2_h12','doi_lg_2_h24'], ['doi_lg_2_h24']] +large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', ['subject_lg_2_h24'], ['doi_lg_2_h12','doi_lg_2_h23','doi_lg_2_d12'], ['doi_lg_2_h23']] +large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', ['subject_lg_2_h11'], ['doi_lg_2_i','doi_cg_i'], ['doi_lg_2_h21','doi_lg_2_h22']] +large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23','doi_lg_2_h24']] +large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12','doi_cg_i','doi_lg_2_h11']] +large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], ['doi_lg_2_i','doi_lg_2_d21'], ['doi_lg_2_i']] +large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d22','doi_lg_2_d23','doi_lg_2_d24'], ['doi_lg_2_h24','doi_lg_2_i']] +large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d11']] +large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', ['subject_lg_2_d22'], [], ['doi_lg_2_d12']] +large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', ['subject_lg_2_d23'], [], ['doi_lg_2_d12']] +large_graph_2_d24 = ['doi_lg_2_d24', 'title_lg_2_d24', ['contributor_lg_2_d24'], 'journal_lg_2_d24', 'date_lg_2_d24', ['subject_lg_2_d24'], [], ['doi_lg_2_d12']] + +crossed_graph_h21 = ['doi_cg_h21', 'title_cg_h21', ['contributor_cg_h21'], 'journal_cg_h21', 'date_cg_h21', ['subject_cg_h21'], ['doi_cg_h11'], []] +crossed_graph_h22 = ['doi_cg_h22', 'title_cg_h22', ['contributor_cg_h22'], 'journal_cg_h22', 'date_cg_h22', ['subject_cg_h22'], ['doi_cg_h11'], []] +crossed_graph_h11 = ['doi_cg_h11', 'title_cg_h11', ['contributor_cg_h11'], 'journal_cg_h11', 'date_cg_h11', ['subject_cg_h11'], ['doi_cg_i'], ['doi_cg_h21','doi_cg_h22']] +crossed_graph_i = ['doi_cg_i', 'title_cg_i', ['contributor_cg_i'], 'journal_cg_i', 'date_cg_i', ['subject_cg_i'], ['doi_lg_2_i','doi_cg_d11','doi_cg_d12'], ['doi_lg_1_h23','doi_cg_h11','doi_lg_2_h11']] +crossed_graph_d11 = ['doi_cg_d11', 'title_cg_d11', ['contributor_cg_d11'], 'journal_cg_d11', 'date_cg_d11', ['subject_cg_d11'], ['doi_lg_1_d23','doi_cg_d21'], ['doi_cg_i']] +crossed_graph_d12 = ['doi_cg_d12', 'title_cg_d12', ['contributor_cg_d12'], 'journal_cg_d12', 'date_cg_d12', ['subject_cg_d12'], ['doi_cg_d22'], ['doi_cg_i']] +crossed_graph_d21 = ['doi_cg_d21', 'title_cg_d21', ['contributor_cg_d21'], 'journal_cg_d21', 'date_cg_d21', ['subject_cg_d21'], [], ['doi_cg_d11']] +crossed_graph_d22 = ['doi_cg_d22', 'title_cg_d22', ['contributor_cg_d22'], 'journal_cg_d22', 'date_cg_d22', ['subject_cg_d22'], [], ['doi_cg_d12']] + + +list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, + right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3, + large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, large_graph_1_i, large_graph_1_d11, large_graph_1_d12, + large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12, + large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23, crossed_graph_h21, crossed_graph_h22, crossed_graph_h11, + crossed_graph_i, crossed_graph_d11, crossed_graph_d12, crossed_graph_d21, crossed_graph_d22] diff --git a/verarbeitung/test/test_graphs_plan.pdf b/verarbeitung/test/test_graphs_plan.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c45b187bf3665e98fc84a4267bad2cbb9e3e64fa Binary files /dev/null and b/verarbeitung/test/test_graphs_plan.pdf differ diff --git a/verarbeitung/test/update_graph_unittest.py b/verarbeitung/test/update_graph_unittest.py new file mode 100644 index 0000000000000000000000000000000000000000..cf8261af6307585aa641c8a9c388e00ad6c7cadd --- /dev/null +++ b/verarbeitung/test/update_graph_unittest.py @@ -0,0 +1,73 @@ +import unittest + +import sys +from pathlib import Path + +sys.path.append("../") + +from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction +from verarbeitung.construct_new_graph.export_to_json import output_to_json +from verarbeitung.update_graph.import_from_json import input_from_json +from verarbeitung.update_graph.update_graph import update_graph + +class UpdatingTest(unittest.TestCase): + maxDiff = None + + # def test_import_from_json(self): + # nodes_old, edges_old = init_graph_construction(['doi_lg_1_i'],2,2,True) + # output_to_json(nodes_old, edges_old, test_var = True) + # nodes_new, edges_new = input_from_json('test_output.json') + # self.assertCountEqual(nodes_old,nodes_new) + # self.assertCountEqual(edges_old, edges_new) + + # def test_deleted_input_dois(self): + # nodes_old_single, edges_old_single = init_graph_construction(['doi_lg_1_i'],2,2,True) + # nodes_old_both, edges_old_both = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True) + # output_to_json(nodes_old_both, edges_old_both, test_var=True) + # nodes_new_single, edges_new_single = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True) + # self.assertCountEqual(nodes_old_single,nodes_new_single) + # self.assertCountEqual(edges_old_single, edges_new_single) + + # nodes_old_single, edges_old_single = init_graph_construction(['doi_cg_i'],3,3,True) + # nodes_old_two, edges_old_two = init_graph_construction(['doi_lg_1_i','doi_cg_i'],3,3,True) + # nodes_old_three, edges_old_three = init_graph_construction(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True) + + def test_new_height(self): + nodes_height_0, edges_height_0 = init_graph_construction(['doi_lg_1_i'],2,0,True) + nodes_height_1, edges_height_1 = init_graph_construction(['doi_lg_1_i'],2,1,True) + nodes_height_2, edges_height_2 = init_graph_construction(['doi_lg_1_i'],2,2,True) + + output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True) + nodes_new_height_1, edges_new_height_1 = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 1, True) + self.assertCountEqual(nodes_height_1, nodes_new_height_1) + self.assertCountEqual(edges_height_1, edges_new_height_1) + + nodes_height_2, edges_height_2 = init_graph_construction(['doi_lg_1_i'],2,2,True) + output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True) + nodes_new_height_0, edges_new_height_0 = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 0, True) + self.assertCountEqual(nodes_height_0, nodes_new_height_0) + self.assertCountEqual(edges_height_0, edges_new_height_0) + + + + + + + + + +def keep_only_dois(nodes): + ''' + :param nodes: input list of nodes of type Publication + :type nodes: List[Publication] + + gets nodes of type pub and return only their doi + ''' + doi_list = [] + for node in nodes: + doi_list.append(node.doi_url) + return doi_list + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/verarbeitung/test_output.json b/verarbeitung/test_output.json new file mode 100644 index 0000000000000000000000000000000000000000..840e19dbb925319d75057380b5dbf2c1176e139a --- /dev/null +++ b/verarbeitung/test_output.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_2_i", "name": "title_lg_2_i", "author": ["contributor_lg_2_i"], "year": "date_lg_2_i", "journal": "journal_lg_2_i", "group": "Input", "depth": 0, "citations": 4}, {"doi": "doi_lg_2_d11", "name": "title_lg_2_d11", "author": ["contributor_lg_2_d11"], "year": "date_lg_2_d11", "journal": "journal_lg_2_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_2_d12", "name": "title_lg_2_d12", "author": ["contributor_lg_2_d12"], "year": "date_lg_2_d12", "journal": "journal_lg_2_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_2_h11", "name": "title_lg_2_h11", "author": ["contributor_lg_2_h11"], "year": "date_lg_2_h11", "journal": "journal_lg_2_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_2_h12", "name": "title_lg_2_h12", "author": ["contributor_lg_2_h12"], "year": "date_lg_2_h12", "journal": "journal_lg_2_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_cg_i", "name": "title_cg_i", "author": ["contributor_cg_i"], "year": "date_cg_i", "journal": "journal_cg_i", "group": "Citedby", "depth": 1, "citations": 3}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h21", "name": "title_lg_2_h21", "author": ["contributor_lg_2_h21"], "year": "date_lg_2_h21", "journal": "journal_lg_2_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h22", "name": "title_lg_2_h22", "author": ["contributor_lg_2_h22"], "year": "date_lg_2_h22", "journal": "journal_lg_2_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h23", "name": "title_lg_2_h23", "author": ["contributor_lg_2_h23"], "year": "date_lg_2_h23", "journal": "journal_lg_2_h23", "group": "Citedby", "depth": 2, "citations": 1}, {"doi": "doi_lg_2_h24", "name": "title_lg_2_h24", "author": ["contributor_lg_2_h24"], "year": "date_lg_2_h24", "journal": "journal_lg_2_h24", "group": "Citedby", "depth": 2, "citations": 1}, {"doi": "doi_cg_h11", "name": "title_cg_h11", "author": ["contributor_cg_h11"], "year": "date_cg_h11", "journal": "journal_cg_h11", "group": "Citedby", "depth": 2, "citations": 2}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_2_d21", "name": "title_lg_2_d21", "author": ["contributor_lg_2_d21"], "year": "date_lg_2_d21", "journal": "journal_lg_2_d21", "group": "Reference", "depth": -2, "citations": 1}, {"doi": "doi_lg_2_d22", "name": "title_lg_2_d22", "author": ["contributor_lg_2_d22"], "year": "date_lg_2_d22", "journal": "journal_lg_2_d22", "group": "Reference", "depth": -2, "citations": 1}, {"doi": "doi_lg_2_d23", "name": "title_lg_2_d23", "author": ["contributor_lg_2_d23"], "year": "date_lg_2_d23", "journal": "journal_lg_2_d23", "group": "Reference", "depth": -2, "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_2_i", "target": "doi_lg_2_d11"}, {"source": "doi_lg_2_i", "target": "doi_lg_2_d12"}, {"source": "doi_lg_2_h11", "target": "doi_lg_2_i"}, {"source": "doi_lg_2_h12", "target": "doi_lg_2_i"}, {"source": "doi_cg_i", "target": "doi_lg_2_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_2_h21", "target": "doi_lg_2_h11"}, {"source": "doi_lg_2_h22", "target": "doi_lg_2_h11"}, {"source": "doi_lg_2_h23", "target": "doi_lg_2_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_h23"}, {"source": "doi_lg_2_h23", "target": "doi_lg_2_h24"}, {"source": "doi_lg_1_h23", "target": "doi_cg_i"}, {"source": "doi_cg_h11", "target": "doi_cg_i"}, {"source": "doi_lg_2_h11", "target": "doi_cg_i"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_2_d11", "target": "doi_lg_2_i"}, {"source": "doi_lg_2_d11", "target": "doi_lg_2_d21"}, {"source": "doi_lg_2_d12", "target": "doi_lg_2_d22"}, {"source": "doi_lg_2_d12", "target": "doi_lg_2_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_d12"}]} \ No newline at end of file diff --git a/verarbeitung/update_graph/Kanten_Vergleich.py b/verarbeitung/update_graph/Kanten_Vergleich.py new file mode 100644 index 0000000000000000000000000000000000000000..fbfa63e604680fab11bd7c50f0efed0c1ba4ae50 --- /dev/null +++ b/verarbeitung/update_graph/Kanten_Vergleich.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +def back_to_valid_edges(links_from_json, processed_input_list): + ''' + :param links_from_json: list of edges from the old graph + :type links_from_json: list + :param processed_input_list: list pubs still in graph + :type processed_input_list: list + + function that deletes edges, if one ore two including nodes are deleted nodes + ''' + list_of_valid_edges = links_from_json.copy() + + + #iterates over all edges from old graph + for edge in list_of_valid_edges: + + # counter for adjacent nodes + found_adj_nodes = 0 + for pub in processed_input_list: + # checks for both adjacent nodes of edge if pub is source/target node + for adj_node in edge: + # increases counter if adjacent node was found + if (adj_node == pub.doi_url): + found_adj_nodes += 1 + if (found_adj_nodes == 2): + break + + #removes the edge if less than 2 adjacent nodes found + if (found_adj_nodes < 2): + links_from_json.remove(edge) + +#Kanten_Menge_Ganz = [["doi_1","doi_2"],["doi_3","doi_4"],["doi_5","doi_6"]] +#Geloeschte = ["doi_2","doi_1","doi_4"] +#print(back_to_valid_edges(Kanten_Menge_Ganz,Geloeschte)) + +#Im Anschluss muss mit den Hinzugefügten Knoten Processing aufgerufen werden diff --git a/verarbeitung/update_graph/Knoten_Vergleich.py b/verarbeitung/update_graph/Knoten_Vergleich.py new file mode 100644 index 0000000000000000000000000000000000000000..55c10f2872ea3dcfb40aa30aac4b09aa01fe48a6 --- /dev/null +++ b/verarbeitung/update_graph/Knoten_Vergleich.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +from collections import Counter + +def doi_listen_vergleichen(alte,neue): + ''' + :param alte: list of dois from old graph + :type alte: list + :param neue: list of dois from new graph + :type neue: list + + function to calculate, which nodes from the old graph are deleted and which are added + ''' + dois_from_old_graph = alte #WICHTIG: Keine doppelten DOIs + dois_from_new_graph = neue + deleted_nodes = [] + common_nodes = [] + inserted_nodes = [] + all_dois = dois_from_old_graph + dois_from_new_graph + + for doi in all_dois: # iterates over the merged list of new and old dois + if ((all_dois.count(doi) == 2) & (doi not in common_nodes)): # If the doi occurs twice the node is in the old and the new graph + common_nodes.append(doi) #appends the doi to common ones, if its not alredy in it + elif ((doi in dois_from_old_graph) & (doi not in dois_from_new_graph)): #If the doi occurs once and it is from old graph it is a deleted node + deleted_nodes.append(doi) #appends the doi to deleted ones + elif ((doi in dois_from_new_graph) & (doi not in dois_from_old_graph)): #if the doi occurs ince and it is from new graph it is a inserted node + inserted_nodes.append(doi) #appends the doi to the inserted ones + return(common_nodes, inserted_nodes, deleted_nodes) + + +#Test Prints + #liste_1 = ["doi_1","doi_2","doi_3","doi_4","doi_5"] + #liste_2 = ["doi_1","doi_2","doi_3","doi_6","doi_7"] + #print("gemeinsame Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[0]) + #print("hinzugefügte Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[1]) + #print("gelöschte Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[2]) + diff --git a/verarbeitung/update_graph/README.md b/verarbeitung/update_graph/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f680e9942cfb4e1b843fffae2c81c63af869e559 --- /dev/null +++ b/verarbeitung/update_graph/README.md @@ -0,0 +1,37 @@ +# Projekt CiS-Projekt 2021/22 + +Directory for functions to adjust a publication graph to updated input lists and changed citation/reference depths. For minimal use of the time consuming Input function, a reinterpretation of the exported json file is implemented. + +## Files in directory + +import_from_json.py + +- Stellt die alte Knoten-und Kantenmenge aus der Json Datei wieder her. + +Knoten_Vergleich.py + +- Überprüft welche Knoten neu hinzugekommen sind und welche enfternt wurden. + +Kanten_Vergleich.py + +- Stellt nach der Löschung eines Knotens wieder eine valide Kantenmenge her. + +update_graph_del.py + +- Führt die Löschung eines Knotens durch + +connect_new_input.py + +- Verbindet den alten Graphen aus der Json Datei mit den neuen DOIs zu dem neuen Graphen. + +update_graph.py + +- Überprüft welche Änderungen der Benutzer vorgenommen hat (Löschen oder hinzufügen von DOIs) + und führt diese aus. + +## Authors +- Donna Löding +- Alina Molkentin +- Xinyi Tang +- Judith Große +- Malte Schokolowski \ No newline at end of file diff --git a/verarbeitung/update_graph/__init__.py b/verarbeitung/update_graph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py new file mode 100644 index 0000000000000000000000000000000000000000..3f11e591db737bd11f2a261213973a2bf297ed1c --- /dev/null +++ b/verarbeitung/update_graph/connect_new_input.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +""" +Functions to update a graph representing citations between multiple ACS/Nature journals + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +import sys +from pathlib import Path +from os import error +sys.path.append("../") + +from .import_from_json import input_from_json +from verarbeitung.construct_new_graph.initialize_graph import initialize_nodes_list, complete_inner_edges +from verarbeitung.construct_new_graph.add_citations_rec import add_citations +from verarbeitung.construct_new_graph.export_to_json import output_to_json + +def connect_old_and_new_input(json_file, new_doi_list, search_depth, search_height, test_var = False): + ''' + :param json_file: json file with old graph + :type json_file: json file + + :param new_doi_list: additional dois which has to be connected to the old graph + :type new_doi_list: list of strings + + :param search_depth: depth to search for references + :type search_depth: int + + :param search_height: height to search for citations + :type search_height: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + connetcs the old graph and the new input dois to a complete new graph + ''' + global nodes, edges + nodes = [] + edges = [] + + nodes, edges = input_from_json(json_file) + + complete_changed_group_nodes(new_doi_list, search_depth, search_height, test_var) + + # initializes nodes/edges from input and gets a list with publication objects for citations and references returned + references_obj_list, citations_obj_list = initialize_nodes_list(new_doi_list,search_depth, search_height, test_var) + + # function calls to begin recursive processing up to max depth/height + add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var) + add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var) + + # adds edges between reference group and citation group of known publications + complete_inner_edges(test_var) + + # calls a skript to save nodes and edges of graph in .json file + output_to_json(nodes,edges, test_var) + + return(nodes, edges) + + +def complete_changed_group_nodes(new_doi_list, search_depth_max, search_height_max, test_var): + ''' + work in progress + ''' + changed_group_node_citations = [] + changed_group_node_references = [] + + for node in nodes: + if (node.group < 0) and (node.doi in new_doi_list): + node.group = "input" + + + elif (node.group > 0) and (node.doi in new_doi_list): + node.group = "input" + + diff --git a/verarbeitung/update_graph/delete_nodes_edges.py b/verarbeitung/update_graph/delete_nodes_edges.py new file mode 100644 index 0000000000000000000000000000000000000000..0e4571a15be9628d15a892629688d70ba5f9abf3 --- /dev/null +++ b/verarbeitung/update_graph/delete_nodes_edges.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +""" +Functions to remove publications/links from nodes/edges list, if they can no longer be reached + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +import sys +from pathlib import Path +sys.path.append("../../") + +from .Kanten_Vergleich import back_to_valid_edges + + +def search_ref_graph_rec(pub): + ''' + :param pub: pub go get appended to usable_nodes + :type pub: Publication + + function that appends nodes of group "reference" to list usable_nodes, if they are reachable from input nodes + ''' + for reference in pub.references: + for ref_pub in input_obj_list: + if ((reference.doi_url == ref_pub.doi_url) and (ref_pub not in usable_nodes)): + usable_nodes.append(ref_pub) + + # to find a cyclus and avoid recursion error + not_in_citations = True + for citation in pub.citations: + if (reference.doi_url == citation.doi_url): + not_in_citations = False + break + if (not_in_citations): + search_ref_graph_rec(ref_pub) + + +def search_cit_graph_rec(pub): + ''' + :param pub: pub go get appended to usable_nodes + :type pub: Publication + + function that appends nodes of group "citation" to list usable_nodes, if they are reachable from input nodes + ''' + for citation in pub.citations: + for cit_pub in input_obj_list: + if ((citation.doi_url == cit_pub.doi_url) and (cit_pub not in usable_nodes)): + usable_nodes.append(cit_pub) + + # to find a cyclus and avoid recursion error + not_in_references = True + for reference in pub.references: + if (citation.doi_url == reference.doi_url): + not_in_references = False + break + if (not_in_references): + search_cit_graph_rec(cit_pub) + + + +def delete_nodes_and_edges(input_list, common_nodes, old_edges_list): + ''' + :param input_list: list of publications to get reduced + :type input_list: List[Publication] + + :param common_nodes: list of input dois which are in old and new input call + :type common_nodes: List[String] + + :param old_edges_list: list of links between publications from old call + :type old_edges_list: List[List[String,String]] + + function to start recursive node removal for references and citations and to change edge list to valid state + ''' + global usable_nodes, input_obj_list + usable_nodes = [] + input_obj_list = input_list + + # starts for every common input node a tree-search and adds found nodes to usable_nodes + for common in common_nodes: + for pub in input_obj_list: + if (common == pub.doi_url): + usable_nodes.append(pub) + search_ref_graph_rec(pub) + search_cit_graph_rec(pub) + + valid_edges = back_to_valid_edges(old_edges_list, usable_nodes) + + return(usable_nodes, valid_edges) \ No newline at end of file diff --git a/verarbeitung/update_graph/import_from_json.py b/verarbeitung/update_graph/import_from_json.py new file mode 100644 index 0000000000000000000000000000000000000000..92d9b02e2c225eaf2a5cd2c3607f080ee9c231a9 --- /dev/null +++ b/verarbeitung/update_graph/import_from_json.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +""" +Functions to read old json files to recreate old graph structure + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import json +import sys +sys.path.append("../") + +from input.publication import Publication, Citation + + + +def create_pubs_from_json(input_dict): + ''' + :param input_dict: dictionary read from old graph Json File + :type json_file: dictionary + + creates list of publication retrieved from old json file + ''' + + #iterates over the list of nodes + for node in input_dict["nodes"]: + #creates for the nodes the objects class Publication + + pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], []) + pub.group = node["depth"] + #appends the objects to a list + list_of_nodes_py.append(pub) + +def add_ref_and_cit_to_pubs(input_dict): + ''' + :param input_dict: dictionary read from old graph Json File + :type json_file: dictionary + + adds references and citations to retrieved publication list + ''' + + # iterates over the list of edges + for edge in input_dict["links"]: + for source in list_of_nodes_py: + for target in list_of_nodes_py: + + # when correct dois found, adds then as references/citatons to publication list + if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])): + new_reference = Citation(target.doi_url, target.title, target.journal, target.contributors, "Reference") + source.references.append(new_reference) + + new_citation = Citation(source.doi_url, source.title, source.journal, source.contributors, "Citation") + target.citations.append(new_citation) + + # adds edge to list + list_of_edges_py.append([edge["source"],edge["target"]]) + + +def input_from_json(json_file): + ''' + :param json_file: Json-Datei for the old graph + :type json_file: String + + retrieves information from old json file to be reused for new graph construction + ''' + + # creates global sets for nodes and edges + global list_of_nodes_py, list_of_edges_py + list_of_nodes_py = [] + list_of_edges_py = [] + + #opens the json file and saves content in dictionary + with open(json_file,'r') as file: + input_dict = json.load(file) + + # creates nodes of Class Publication from input Json file + create_pubs_from_json(input_dict) + + # adds references and citations to publications and creates edges + add_ref_and_cit_to_pubs(input_dict) + + + return(list_of_nodes_py, list_of_edges_py) \ No newline at end of file diff --git a/verarbeitung/update_graph/update_depth.py b/verarbeitung/update_graph/update_depth.py new file mode 100644 index 0000000000000000000000000000000000000000..179a9988c138f4eb6122749eb1bedd3507370d00 --- /dev/null +++ b/verarbeitung/update_graph/update_depth.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- +""" +Functions to update the citation depth of recursive graph construction + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +import sys +sys.path.append("../../") + +from verarbeitung.construct_new_graph.add_citations_rec import add_citations +from verarbeitung.construct_new_graph.initialize_graph import complete_inner_edges +from .Kanten_Vergleich import back_to_valid_edges + + +def reduce_max_height(max_height): + ''' + :param max_height: new maximum height to reduce publications in publication list to + :type max_height: int + + function to remove all publications which are not in new maximum height threshold + ''' + input_list_del = processed_input_list.copy() + for pub in input_list_del: + if (pub.group > 0): + if (pub.group > max_height): + processed_input_list.remove(pub) + +def reduce_max_depth(max_depth): + ''' + :param max_depth: new maximum depth to reduce publications in publication list to + :type max_depth: int + + function to remove all publications which are not in new maximum depth threshold + ''' + input_list_del = processed_input_list.copy() + for pub in input_list_del: + if (pub.group < 0): + if (abs(pub.group) > max_depth): + processed_input_list.remove(pub) + + + +def get_old_height_depth(): + ''' + function to get old max height and max depth from previous construction call + ''' + max_height = 0 + max_depth = 0 + for pub in processed_input_list: + if (pub.group < 0): + max_depth = max(max_depth, abs(pub.group)) + if (pub.group > 0): + max_height = max(max_height, pub.group) + return(max_height, max_depth) + +def get_old_max_references(old_depth): + ''' + :param old_depth: old maximum depth to search for citations + :type old_depth: int + + function to get references for new recursive levels + ''' + old_max_references = [] + for pub in processed_input_list: + if (abs(pub.group) == old_depth): + old_max_references.append(pub.references) + return(old_max_references) + +def get_old_max_citations(old_height): + ''' + :param old_height: old maximum height to search for citations + :type old_height: int + + function to get citations for new recursive levels + ''' + old_max_citations = [] + for pub in processed_input_list: + if (abs(pub.group) == old_height): + old_max_citations.append(pub.citations) + return(old_max_citations) + +def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var): + ''' + :param obj_input_list: input list of publications of type Publication from update_graph + :type obj_input_list: List[Publication] + + :param input_edges: list of publications from update_graph + :type input_edges: List[Publication] + + :param new_depth: new maximum depth to search for references + :type new_depth: int + + :param new_height: new maximum height to search for citations + :type new_height: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to adjust old publication search depth to update call + ''' + + global processed_input_list, valid_edges + processed_input_list = obj_input_list + valid_edges = input_edges + + old_height, old_depth = get_old_height_depth() + + # removes publications and links from recursion levels which aren't needed anymore + if (old_depth > new_depth): + reduce_max_depth(new_depth) + elif (old_height > new_height): + reduce_max_height(new_height) + + + # adds publications and links for new recursion levels + elif (old_depth < new_depth): + old_max_references = get_old_max_references() + add_citations(processed_input_list, valid_edges, old_max_references, old_depth+1, new_depth, "Reference", test_var) + elif (old_height < new_height): + old_max_citations = get_old_max_citations() + add_citations(processed_input_list, valid_edges, old_max_citations, old_height+1, new_height, "Citation", test_var) + back_to_valid_edges(valid_edges, processed_input_list) + + # adds edges between reference group and citation group of known publications + complete_inner_edges() + + + + diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..11a9eda9fb2638fdb4fe0a9560439a297e360612 --- /dev/null +++ b/verarbeitung/update_graph/update_graph.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +""" +Functions to update a graph representing citations between multiple ACS/Nature journals + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys + +sys.path.append("../../") + +from input.publication import Publication +from verarbeitung.get_pub_from_input import get_pub +from .Knoten_Vergleich import doi_listen_vergleichen +from .delete_nodes_edges import delete_nodes_and_edges +from .connect_new_input import connect_old_and_new_input +from .update_depth import update_depth +from .import_from_json import input_from_json + + +def get_old_input_dois(old_obj_input_list): + ''' + :param old_obj_input_list: list of publications retrieved from old json file + :type old_obj_input_list: List[Publication] + + function to return pub dois for old publications of group input retrieved from json file + ''' + + # new list to save doi_url for each old publication of group input + old_input_dois = [] + for pub in old_obj_input_list: + if (pub.group == 0): + old_input_dois.append(pub.doi_url) + return old_input_dois + +def get_new_input_dois(new_input, test_var): + ''' + :param new_input: input list of doi from UI + :type new_input: list of strings + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to return pub dois for input urls + ''' + + # new list to save doi_url for each new input url + new_input_dois = [] + for new_node in new_input: + # retrieves information and adds to new list if successful + pub = get_pub(new_node, test_var) + if (type(pub) != Publication): + print(pub) + continue + + new_input_dois.append(pub.doi_url) + return(new_input_dois) + + +def update_graph(new_doi_input_list, json_file, search_depth, search_height, test_var = False): + ''' + :param new_doi_input_list: input list of doi from UI + :type new_doi_input_list: List[String] + + :param old_obj_input_list: list of publications retrieved from old json file + :type old_obj_input_list: List[Publication] + + :param old_edges_list: list of links between publications retrieved from old json file + :type old_edges_list: List[List[String,String]] + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges + ''' + + # gets information from previous cunstruction call + old_obj_input_list , old_edges_list = input_from_json(json_file) + print(type(old_edges_list[1])) + + # one global list to save the process of removing unneeded publications and one to save valid edges + global processed_list, valid_edges + processed_list = old_obj_input_list + valid_edges = old_edges_list + + + # get dois from lists to compare for differences + old_doi_input_list = get_old_input_dois(old_obj_input_list) + new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var) + + # retrieve which publications are already known, removed, inserted + common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list) + + # deletes publications and edges from node_list if publications can no longer be reached + if (len(deleted_nodes) > 0): + processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, old_edges_list) + + update_depth(processed_list, valid_edges, search_depth, search_height, test_var) + + if (len(inserted_nodes) > 0): + connect_old_and_new_input(json_file, inserted_nodes, search_depth, search_height, test_var) + + return(processed_list, valid_edges)