diff --git a/.citation_parser_ui.py.swp b/.citation_parser_ui.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..38c0d61d4724d853b03e358ed4916b537560e72f Binary files /dev/null and b/.citation_parser_ui.py.swp differ diff --git a/assets/cn.js b/assets/cn.js new file mode 100644 index 0000000000000000000000000000000000000000..309678b42520cec11d77cf6a3b4be2925cb8bede --- /dev/null +++ b/assets/cn.js @@ -0,0 +1,548 @@ +/** +* creates a new zoom behavior +*/ +var zoom = d3.zoom().on("zoom", handle_zoom); + +/** +* creates svg object and associated attributes +* applies the zoom behavior to svg +*/ +var svg = d3.select("svg.graph") + .call(zoom), +width = svg.attr("width"), +height = svg.attr("height"), +perc; + +/** +* scale functions that return y coordinate/color of node depending on group +*/ +var color = d3.scaleOrdinal() + .domain(["height", "input", "depth"]) + .range([' #01d7c0', ' #8b90fe ', ' #a15eb2 ']), +y_scale = d3.scaleOrdinal() + .domain(["height", "input", "depth"]) + .range([0, 200, 400]), +to_remove; + +/** +* creates node object and (default) radius +*/ +var node, +r = 10; + +/** +* creates link object +*/ +var link; + +/** +* creates a background with a click functionality +*/ +var rect = svg.append("rect") + .attr("x", 0) + .attr("y", 0) + .attr("height", height) + .attr("width", width) + .style("fill", 'white') + .on('click', click_rect); + +/** +* creates svg object (legend) and associated attributes +*/ +var svg_legend = d3.select("svg.legendsvg"), +legend_position = [65,95,125], +arrow_legend_position = [0,25], +arrow_group_names = ["citation","self-citation"], +group_names = ["cited by","input","reference"], +line_type = d3.scaleOrdinal() + .domain(["line","dotted"]) + .range([("8,0"),("8,8")]), +text_info = '', +text_abstract = ''; + +var legend = svg_legend.selectAll(".legend") + .data(legend_position) + .enter() + .append("g") + .attr("class","legend") + .attr("transform", function(d,i) {return "translate(0," + d + ")"; }); + +legend.append("text") + .attr("x", 80) + .attr("y", 0) + .attr("dy", ".35em") + .style("text-anchor", "start") + .text(function(d,i) {return group_names[i]}); + +legend.append("circle") + .attr("r", r) + .attr("cx",30-r) + .style("fill", color); + +var legend_arrow = svg_legend.selectAll(".legendarr") + .data(arrow_legend_position) + .enter() + .append("g") + .attr("class","legendarr") + .attr("transform", function(d) { return "translate(0," + d + ")"; }); + +legend_arrow.append("line") + .attr("x1", 10) + .attr("x2", 50) + .attr("y1", 10) + .attr("y2", 10) + .style("stroke-dasharray",line_type) + .style("stroke", '#999') + .style("stroke-width", "1px") + .style('pointer-events', 'none') + .attr('marker-end',update_marker('#999',this)); + +legend_arrow.append("text") + .attr("x", 80) + .attr("y", 10) + .attr("dy", ".35em") + .style("text-anchor", "start") + .text(function(d,i){return arrow_group_names[i]}); + +/** +* creates a new simulation +* updates the positions of the links and nodes when the + state of the layout has changed (simulation has advanced by a tick) +*/ +var simulation = d3.forceSimulation() + .force("link", d3.forceLink().id(function(d) {return d.doi;}).distance(50).strength(function(d) { + if (d.group == "input") {return 0;} + else {return 5;} + })) + .force("collide", d3.forceCollide(function(d) { + if (d.group == "input") {return 100;} + else {return 65;} + }).strength(0.5)) + .force("charge", d3.forceManyBody().strength(0.001)) + .force("center", d3.forceCenter(width/2, height/2+20)) + .force("yscale", d3.forceY().strength(function(d) { + if (d.group == "input") {return 1000;} + else {return 50;} + }).y(function(d) {return y_scale(d.group)})) + .alpha(0.005) + .on("end", zoom_to); + +/** +* creates group element +*/ +var g = svg.append("g") + .attr("class", "everything") + +/** +* loads JSON data and calls the update function +*/ +d3.json("json_text.json").then(function(graph) { + update(graph.links, graph.nodes); +}) + +/** +* calls update functions for links and nodes +* adds the nodes, links and tick functionality to the simulation +* @param {object} nodes - nodes +* @param {object} links - links +*/ +function update(links, nodes) { + update_links(links); + update_nodes(nodes); + + simulation + .nodes(nodes) + .on("tick", handle_tick); + simulation.force("link") + .links(links); + + link.attr('marker-end', function(d) {return update_marker("#999", d.target);}) + .style("stroke-dasharray",function(d){return self_citation(d.source,d.target)? ("8,8"): ("1,0")}); +} + +/** +* initializes and shows links +* @param {object} links - links +*/ +function update_links(links) { + link = g.append("g") + .selectAll(".link") + .data(links) + .enter() + .append("line") + .style("stroke-width", "1px") + .style("stroke", "#999") + .attr("class", "link"); +} + +/** +* initializes and shows nodes with circles, texts and a click functionality +* creates a new drag behavior and applies it to the circles +* @param {object} nodes - nodes +*/ +function update_nodes(nodes) { + node = g.selectAll(".node") + .data(nodes) + .enter() + .append("g") + .attr("class", "node") + .call(d3.drag() + .on("start", start_drag) + .on("drag", dragged) + ); + + node.append("circle") + .attr("class", "circle") + .attr("r", function(d) {return 1.5*r+d.citations*0.05}) + .style("fill", function(d){ return color(d.group)}) + .on('click', click_node); + + node.append("text") + .attr("class", "text") + .style("font-size", "15px") + .style('pointer-events', 'auto') + .text(function (d) {const first_author=d.author[0].split(" ") + return first_author[first_author.length-1];}) + .on('click', click_node); +} + +/** +* creates arrowhead and returns its url +* @param {string} color - color of arrowhead +* @param {string} target - target node +*/ +function update_marker(color, target) { + var radius = 1.5*r+target.citations*0.05; + svg.append('defs').append('marker') + .attr('id',color.replace("#", "")+radius) + .attr('viewBox','-0 -5 10 10') + .attr('refX',radius+9.5) + .attr('refY',0) + .attr('orient','auto') + .attr('markerWidth',10) + .attr('markerHeight',15) + .attr('xoverflow','visible') + .append('svg:path') + .attr('d', 'M 0,-5 L 10 ,0 L 0,5') + .attr('fill', color) + .style('stroke','none'); + return "url(" + color + radius + ")"; +}; + +/** +* sets color of circle and its links to black and removes the previous highlights +* displays overview info of node in textbox +* @param {object} node - node +*/ +function click_node(node) { + d3.select(this.parentNode).raise(); + fix_nodes(node); + if(to_remove){ + d3.select(to_remove).selectAll(".circle").style("stroke","none") + } + to_remove = this.parentNode; + d3.select(this.parentNode).selectAll(".circle").style("stroke","black") + mark_link(node) + textbox_content(node) + reset_button_highlight() + highlight_button("overview") +} + +/** +* removes the highlights of the circles and their links +*/ +function click_rect() { + fix_nodes(node); + d3.selectAll(".circle").style("stroke", "none") + d3.selectAll(".link") + .style("stroke", "#999") + .attr('marker-end', function(d) {return update_marker('#999', d.target);}) + text_abstract=''; + text_info=''; + reset_button_highlight() + document.getElementById('textbox').innerHTML = "Click node"; +} + +/** +* returns true if journals have a common author (self-citation) +* @param {object} source - node +* @param {object} target - node +*/ +function self_citation(source,target) { + return source.author.some(item=>target.author.includes(item)) +} + +/** +* sets color of link (line and arrowhead) to black if it is directly connected to node +* and to grey otherwise +* @param {object} node - node +*/ +function mark_link(node) { + d3.selectAll(".link") + .style("stroke", function(o) { + return is_link_for_node(node, o) ? "black" : "#999";}) + .attr('marker-end', function(o) { + return is_link_for_node(node, o) ? update_marker('#000000', o.target) : update_marker('#999', o.target);}) +} + +/** +* returns true if link is directly connected to node and false if it is not +* @param {object} node - node +* @param {object} link - link +*/ +function is_link_for_node(node, link) { + return link.source.index == node.index || link.target.index == node.index; +} + +/** +* saves text for overview and abstract of node +* outputs node info to textbox +* @param {object} node - node +*/ +function textbox_content(node) { + text_info = "Title:" + '</br>' + node.name + + '</br>' +'</br>'+"Author:"+ '</br>' +node.author+'</br>'+'</br>'+"Date:"+'</br>' + +node.year+'</br>'+'</br>'+"Journal:"+'</br>'+node.journal+'</br>'+'</br>'+"doi:" + +'</br>'+'<a href="'+node.doi+ '">'+node.doi+'</a>'+'</br>'+'</br>'+"Citations:" + +'</br>'+node.citations; + text_abstract = node.abstract; + document.getElementById('textbox').innerHTML = text_info; +} + +/** +* sets color of btn to dark gray +* @param {object} btn - button +*/ +function highlight_button(btn) { + reset_button_highlight(); + document.getElementById(btn).style.background = "#CACACA"; +} + +/** +* sets color of all buttons to default light gray +*/ +function reset_button_highlight() { + document.getElementById("overview").style.background = ''; + document.getElementById("abstract").style.background = ''; +} + +/** +* displays abstract in textbox if a is true, overview text otherwise +* @param {bool} a- bool +*/ +function display_abstract(a) { + if (text_abstract == '' && text_info == '') { + document.getElementById('textbox').innerHTML="Click node"; + } + else { + if (a == true) { + document.getElementById('textbox').innerHTML = text_abstract; + } + else { + document.getElementById('textbox').innerHTML = text_info; + } + } +} + +/** +* updates the positions of the links and nodes +*/ +function handle_tick() { + link.attr("x1", function (d) {return d.source.x;}) + .attr("y1", function (d) {return d.source.y;}) + .attr("x2", function (d) {return d.target.x;}) + .attr("y2", function (d) {return d.target.y;}); + node.attr("transform", function (d) {return "translate(" + d.x + ", " + d.y + ")";}); +} + +/** +* initializes the dragging of the node +* @param {object} node - node +*/ +function start_drag(node) { + d3.select(this).raise(); + if (!d3.event.active) + simulation.alphaTarget(0.3).restart() + node.fx = node.x; + node.fy = node.y; + fix_nodes(node); +} + +/** +* applies the dragging to the node +* @param {object} node - node +*/ +function dragged(node) { + node.fx = d3.event.x; + node.fy = d3.event.y; + fix_nodes(node); +} + +/** +* fix positions of all nodes except for the current node +* @param {object} this_node - node +*/ +function fix_nodes(this_node) { + node.each(function(d) { + if (this_node != d) { + d.fx = d.x; + d.fy = d.y; + } + }); +} + +/** +* applies the transformation (zooming or dragging) to the g element +*/ +function handle_zoom() { + d3.select('g').attr("transform", d3.event.transform); +} + +/** +* transforms svg so that the zoom is adapted to the size of the graph +*/ +function zoom_to() { + node_bounds = d3.selectAll("svg.graph").node().getBBox(); + svg_bounds = d3.select("rect").node().getBBox(); + + perc_x = width/(node_bounds.width+100); + perc_y = height/(node_bounds.height+100); + perc = d3.min([perc_x, perc_y]) + + d3.select('svg') + .call(zoom.scaleBy, perc); +} + +/** +* transforms svg so that the zoom and drag is reset +*/ +function reset_view() { + d3.select('svg') + .call(zoom.scaleTo, 1) + d3.select('svg') + .call(zoom.translateTo, 0.5 * width, 0.5 * height); + d3.select('svg') + .call(zoom.scaleBy, perc); +} + +/** +* save svg as png +*/ +function save_svg(){ + var svgString = get_svg_string(svg.node()); + svg_string_to_image(svgString, 2*width, 2*height, 'png', save); // passes Blob and filesize String to the callback + + function save( dataBlob, filesize ){ + saveAs(dataBlob, 'D3 vis exported to PNG.png'); // FileSaver.js function + } +}; + +/** +* generate svgString +* @param {object} svgNode - node +*/ +function get_svg_string(svgNode) { + svgNode.setAttribute('xlink', 'http://www.w3.org/1999/xlink'); + var cssStyleText = get_css_styles(svgNode); + append_css(cssStyleText, svgNode); + + var serializer = new XMLSerializer(); + var svgString = serializer.serializeToString(svgNode); + svgString = svgString.replace(/(\w+)?:?xlink=/g, 'xmlns:xlink='); // Fix root xlink without namespace + svgString = svgString.replace(/NS\d+:href/g, 'xlink:href'); // Safari NS namespace fix + + return svgString; + + function get_css_styles(parentElement) { + var selectorTextArr = []; + + // Add Parent element Id and Classes to the list + selectorTextArr.push('#' + parentElement.id); + for (var c = 0; c < parentElement.classList.length; c++) + if (!contains('.'+parentElement.classList[c], selectorTextArr)) + selectorTextArr.push('.'+parentElement.classList[c]); + + // Add Children element Ids and Classes to the list + var nodes = parentElement.getElementsByTagName("*"); + for (var i = 0; i < nodes.length; i++) { + var id = nodes[i].id; + if (!contains('#'+id, selectorTextArr)) + selectorTextArr.push('#' + id); + + var classes = nodes[i].classList; + for (var c = 0; c < classes.length; c++) + if (!contains('.'+classes[c], selectorTextArr)) + selectorTextArr.push('.'+classes[c]); + } + + // Extract CSS Rules + var extractedCSSText = ""; + for (var i = 0; i < document.styleSheets.length; i++) { + var s = document.styleSheets[i]; + + try { + if(!s.cssRules) continue; + } catch(e) { + if(e.name !== 'SecurityError') throw e; // for Firefox + continue; + } + + var cssRules = s.cssRules; + for (var r = 0; r < cssRules.length; r++) { + if (contains(cssRules[r].selectorText, selectorTextArr)) + extractedCSSText += cssRules[r].cssText; + } + } + + + return extractedCSSText; + + function contains(str,arr) { + return arr.indexOf(str) === -1 ? false : true; + } + + } + + function append_css(cssText, element) { + var styleElement = document.createElement("style"); + styleElement.setAttribute("type","text/css"); + styleElement.innerHTML = cssText; + var refNode = element.hasChildNodes() ? element.children[0] : null; + element.insertBefore(styleElement, refNode); + } +} + +/** +* convert svgString to image and export it +* @param {object} svgString - svgString +* @param {object} width - width of image +* @param {object} height - height of image +* @param {object} format - format to save image in +* @param {object} callback - callback function +*/ +function svg_string_to_image( svgString, width, height, format, callback ) { + var format = format ? format : 'png'; + + var imgsrc = 'data:image/svg+xml;base64,'+ btoa(unescape(encodeURIComponent(svgString))); // Convert SVG string to data URL + + var canvas = document.createElement("canvas"); + var context = canvas.getContext("2d"); + + canvas.width = width; + canvas.height = height; + + var image = new Image(); + image.onload = function() { + context.clearRect(0, 0, width, height); + context.drawImage(image, 0, 0, width, height); + + canvas.toBlob(function(blob) { + var filesize = Math.round(blob.length/1024) + ' KB'; + if (callback) callback(blob, filesize); + }); + + }; + + image.src = imgsrc; +} + diff --git a/assets/index.html b/assets/index.html new file mode 100644 index 0000000000000000000000000000000000000000..78560da48c01f39debc7a810b58a60672c0995dc --- /dev/null +++ b/assets/index.html @@ -0,0 +1,107 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="utf-8"> + + <!-- style specifications for button and div elements --> + <style type="text/css"> + button { + width: 100px; + height:20px; + display: flex; + justify-content: center; + position: absolute; + left: 455px; + top: 575px; + transition-duration: 0.4s; + border-radius:3px; + border:1px solid #909090; + } + + .button:hover { + background-color: #CACACA; + } + + button.resetZoom { + margin-left: 110px; + } + + button.save { + margin-left: 220px; + } + + button.abstract { + width:146px; + position:absolute; + top: 181px; + left: 1114px; + border-radius:0; + border:1px solid #909090; + } + + button.overview { + width:147px; + position:absolute; + display:inline-block; + top: 181px; + left: 968px; + border-radius:0; + border:1px solid #909090; + } + + div.legendbox { + width:270px; + height:170px; + padding: 10px; + /*border: 1px solid #999;*/ + position: absolute; + top: 10px; + left: 968px; + display: inline-block; + margin: 0; + } + + div.textbox { + width:270px; + min-height:200px; + max-height:370px; + padding: 10px; + border: 1px solid #999; + position: absolute; + top: 200px; + left: 968px; + display: inline-block; + overflow-y: scroll; + margin: 0; + } + </style> +</head> + +<body> + <!-- graph --> + <svg class="graph" width="960" height="560"></svg> + + <!-- legend --> + <div class="legendbox"> <svg class="legendsvg"></svg></div> + + <!-- textbox --> + <div class="textbox" id = "textbox">Click node</div> + <button id="overview" class="overview" onclick='display_abstract(false), highlight_button("overview")'>Overview</button> + <button id="abstract" class="abstract" onclick='display_abstract(true), highlight_button("abstract")'>Abstract</button> + + <!-- buttons --> + <button onclick="location.reload()">Reload Graph</button> + <button class="resetZoom" onclick="reset_view()">Reset View</button> + <button class="save" onclick="save_svg()">Save</button> + + <!-- D3 (version 5) --> + <script src="https://d3js.org/d3.v5.min.js"></script> + + <!-- scripts to save svg element as png --> + <script src="https://cdn.rawgit.com/eligrey/canvas-toBlob.js/f1a01896135ab378aa5c0118eadd81da55e698d8/canvas-toBlob.js"></script> + <script src="https://cdn.rawgit.com/eligrey/FileSaver.js/e9d941381475b5df8b7d7691013401e171014e89/FileSaver.min.js"></script> + + <!-- javascript for force-directed graph --> + <script type="text/javascript" id="cn" src="cn.js"></script> +</body> +</html> \ No newline at end of file diff --git a/assets/json_text.json b/assets/json_text.json new file mode 100644 index 0000000000000000000000000000000000000000..b2e4c5f1d417d21f23729da7e833989a82a9eeaf --- /dev/null +++ b/assets/json_text.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.6b00709", "name": "Matched Molecular Series: Measuring SAR Similarity", "author": ["Emanuel S. R. Ehmki", "Christian Kramer"], "year": "May 1, 2017", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00269", "name": "Matched Molecular Series Analysis for ADME Property Prediction", "author": ["Mahendra Awale", "Sereina Riniker", "Christian Kramer"], "year": "May 5, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 6}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00290", "name": "Identification of Bioisosteric Substituents by a Deep Neural Network", "author": ["Peter Ertl"], "year": "June 15, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 2, "citations": 2}], "links": [{"source": "https://doi.org/10.1021/acs.jcim.0c00269", "target": "https://doi.org/10.1021/acs.jcim.6b00709"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00290", "target": "https://doi.org/10.1021/acs.jcim.0c00269"}]} \ No newline at end of file diff --git a/citation_parser_ui.py b/citation_parser_ui.py index 453ead98cdb0543e35c58a3e0adfd734e34067bc..7431a912ca0f5a6d4bd31a269914aa034e083348 100644 --- a/citation_parser_ui.py +++ b/citation_parser_ui.py @@ -1,3 +1,4 @@ +import os import base64 import re import dash @@ -8,6 +9,7 @@ from dash.dependencies import Input, Output, State from dash.exceptions import PreventUpdate from input.interface import InputInterface import input.publication +from verarbeitung.process_main import Processing app = dash.Dash(__name__) @@ -76,7 +78,11 @@ app.layout = html.Div([ value=[]) ]), # Layer 4: For the Graph - html.Div([ + html.Div( + [html.Iframe( + src="assets/index.html", + style={"height": "600px", "width": "100%"}, + ), html.Div(id='test-output') ]) ]) @@ -125,6 +131,7 @@ def update_input_checklist(input_value,btn1,btn2,filecontents,all_inputs, # if clear-all-button was pressed: if 'clear-all-button' in changed_id: + os.remove('assets/json_text.json') return list(),list(),'','' # if clear-selected-button was pressed: @@ -147,7 +154,7 @@ def update_input_checklist(input_value,btn1,btn2,filecontents,all_inputs, i = InputInterface() pub = i.get_pub_light(input_value) except Exception as err: - return options,selected_inputs,'','{}'.format(err) + return all_inputs,selected_inputs,'','{}'.format(err) # Creates a more readable string to display in the checklist rep_str = pub.contributors[0] + ',' + pub.journal + \ ',' + pub.publication_date @@ -240,16 +247,8 @@ def generate_output(n_clicks,all_inputs,selected_inputs, raise PreventUpdate elif 'Update Automatically' in additional_options \ or 'start-button' in changed_id: - s = '' - for i in range(len(all_inputs)): - x = all_inputs[i]['value'] - if x in selected_inputs: - s += x*(abs(int(forward_depth)-int(backward_depth))) - else: - s += x*(int(forward_depth)+int(backward_depth)) - return s - else: - raise PreventUpdate + input_links = [x['value'] for x in all_inputs] + Processing(input_links,int(forward_depth),int(backward_depth),'assets/json_text.json') if __name__ == '__main__': - app.run_server(debug=True) + app.run_server(debug=False) diff --git a/input/get/__pycache__/__init__.cpython-38.pyc b/input/get/__pycache__/__init__.cpython-38.pyc index a1e24ad908499dfeb45afebf60601d0704dbbbcb..f12fb9c33e874aa1c55d94725f867ac6503d407e 100644 Binary files a/input/get/__pycache__/__init__.cpython-38.pyc and b/input/get/__pycache__/__init__.cpython-38.pyc differ diff --git a/input/get/__pycache__/acs.cpython-38.pyc b/input/get/__pycache__/acs.cpython-38.pyc index f3585b798ebbeab22676f00cc409ef48cd6b6019..121d1856184f02775d5bb84d081dcd91a648ea67 100644 Binary files a/input/get/__pycache__/acs.cpython-38.pyc and b/input/get/__pycache__/acs.cpython-38.pyc differ diff --git a/input/get/__pycache__/journal_fetcher.cpython-38.pyc b/input/get/__pycache__/journal_fetcher.cpython-38.pyc index 3be8ddceae79b7f027f194a2c4cec6c1fe9575a5..72a774c45762b3d4cb37a7089f4d8275dab0e533 100644 Binary files a/input/get/__pycache__/journal_fetcher.cpython-38.pyc and b/input/get/__pycache__/journal_fetcher.cpython-38.pyc differ diff --git a/input/get/__pycache__/nature.cpython-38.pyc b/input/get/__pycache__/nature.cpython-38.pyc index 6008e587da3dab81c3c79bb42345b63f0226ebbd..3afb8454a05c8afe9b1f2aba3f534d66cbd8ae3a 100644 Binary files a/input/get/__pycache__/nature.cpython-38.pyc and b/input/get/__pycache__/nature.cpython-38.pyc differ diff --git a/start_script.py b/start_script.py new file mode 100644 index 0000000000000000000000000000000000000000..9e674cfb87b30c403c641119bc4cc344e2bc6acc --- /dev/null +++ b/start_script.py @@ -0,0 +1,19 @@ +import sys +import gc +from pathlib import Path +from verarbeitung.process_main import Processing +#from verarbeitung.dev_files.print_graph_test import try_known_publications, try_delete_nodes + + +doi_list = [] +#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') +#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') +#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') +doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') +doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709') +error_list = Processing(doi_list, 2, 2, 'test.json') +print(error_list) + +del doi_list +del error_list +gc.collect() diff --git a/test.json b/test.json new file mode 100644 index 0000000000000000000000000000000000000000..4ae068da7d62b6aca3b24f94807ac9404404f08f --- /dev/null +++ b/test.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Input", "depth": 0, "citations": 8}, {"doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", "name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", "author": ["Ilaria Proietti Silvestri", "Paul J. J. Colbon"], "year": "July 16, 2021", "journal": "ACS Med. Chem. Lett.", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", "name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", "author": ["Paul D. Leeson", "A. Patricia Bento", "Anna Gaulton", "Anne Hersey", "Emma J. Manners", "Chris J. Radoux", "Andrew R. Leach"], "year": "May 13, 2021", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jcim.6b00709", "name": "Matched Molecular Series: Measuring SAR Similarity", "author": ["Emanuel S. R. Ehmki", "Christian Kramer"], "year": "May 1, 2017", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00269", "name": "Matched Molecular Series Analysis for ADME Property Prediction", "author": ["Mahendra Awale", "Sereina Riniker", "Christian Kramer"], "year": "May 5, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 6}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00290", "name": "Identification of Bioisosteric Substituents by a Deep Neural Network", "author": ["Peter Ertl"], "year": "June 15, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 2, "citations": 2}], "links": [{"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jmedchem.1c00416", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00269", "target": "https://doi.org/10.1021/acs.jcim.6b00709"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00290", "target": "https://doi.org/10.1021/acs.jcim.0c00269"}]} \ No newline at end of file diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py index 87badf462a3303681f6df16fb7eb207fa1dbc966..95afa3553e9b0927196bcb94d792b5b5be82e83d 100644 --- a/verarbeitung/construct_new_graph/add_citations_rec.py +++ b/verarbeitung/construct_new_graph/add_citations_rec.py @@ -120,7 +120,7 @@ def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_ty if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly citation_pub_obj = get_pub(citation.doi_url, test_var) if (type(citation_pub_obj) != Publication): - print(pub) + #print(pub) continue if (cit_type == "Citation"): diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py index ef17df69a10048cc4c6d2d726c90737d79c174c6..3807842cb6d96275194dfa4f715763abc2d7d738 100644 --- a/verarbeitung/construct_new_graph/initialize_graph.py +++ b/verarbeitung/construct_new_graph/initialize_graph.py @@ -75,7 +75,8 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t for pub_doi in doi_input_list: #iterates over every incoming doi pub = get_pub(pub_doi, test_var) if (type(pub) != Publication): - print(pub) + #print(pub) + error_doi_list.append(pub_doi) continue # checks if publication already exists in nodes @@ -163,13 +164,14 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va # creates empty lists to save nodes and edges - global nodes, edges + global nodes, edges, error_doi_list if update_var: nodes = input_nodes edges = input_edges else: nodes = [] edges = [] + error_doi_list = [] # initializes nodes/edges from input and gets a list with publication objects for citations and references returned references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) @@ -188,4 +190,7 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va del edges gc.collect() - return(new_nodes,new_edges) + if test_var: + return(new_nodes, new_edges) + else: + return(new_nodes, new_edges, error_doi_list) diff --git a/verarbeitung/get_pub_from_input.py b/verarbeitung/get_pub_from_input.py index 2766ba836e2ae807c0950ac60389a4f620c39ba5..6bdce3b190a1f15156d0ce11cd8bf044486e5bb9 100644 --- a/verarbeitung/get_pub_from_input.py +++ b/verarbeitung/get_pub_from_input.py @@ -40,9 +40,7 @@ def get_pub(pub_doi, test_var): else: inter = Input() try: - pub = inter.get_publication(pub_doi) #creates an object of class Publication - except AttributeError: - pub = inter.get_publication(pub_doi) + pub = inter.get_publication(pub_doi) #creates an object of class Publication) except ValueError: return(ValueError) except IndexError: diff --git a/verarbeitung/process_main.py b/verarbeitung/process_main.py index db17e1f5694fb87e805a073c8839054cedc95c47..78df2dbf3f0a3c4f84a27620a523048d9e57de27 100644 --- a/verarbeitung/process_main.py +++ b/verarbeitung/process_main.py @@ -45,10 +45,12 @@ def Processing(url_list, search_depth, search_height, json_file = 'json_text.jso json_file = Path(json_file) if json_file.is_file(): - nodes, edges = update_graph(url_list, json_file, search_depth, search_height) + nodes, edges, error_doi_list = update_graph(url_list, json_file, search_depth, search_height) else: - nodes, edges = init_graph_construction(url_list, search_depth, search_height) + nodes, edges, error_doi_list = init_graph_construction(url_list, search_depth, search_height) # exports graph to given json file name output_to_json(nodes, edges, json_file) + + return error_doi_list \ No newline at end of file diff --git a/verarbeitung/start_script.py b/verarbeitung/start_script.py deleted file mode 100644 index 6e5854f4ffff81c2056595cdd0365c7909acf3cd..0000000000000000000000000000000000000000 --- a/verarbeitung/start_script.py +++ /dev/null @@ -1,12 +0,0 @@ -import sys -from pathlib import Path -from verarbeitung.process_main import Processing -from verarbeitung.dev_files.print_graph_test import try_known_publications, try_delete_nodes - - -doi_list = [] -doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') -#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') -doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') -doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') -Processing(doi_list, 2, 2, 'test.json') diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py index 35603414f4d22d0612e5dd02b73498261d458a88..5acb05f81f34277231d8b3447d0b89cc34b2f11e 100644 --- a/verarbeitung/update_graph/connect_new_input.py +++ b/verarbeitung/update_graph/connect_new_input.py @@ -149,5 +149,4 @@ def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes #add_citations(nodes, edges, citations_obj_list, 1, new_search_height, "Citation", test_var) #add_citations(nodes, edges, references_obj_list, 1, new_search_depth, "Reference", test_var) - init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = nodes, input_edges = edges) - + nodes, edges, error_doi_list = init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = nodes, input_edges = edges) diff --git a/verarbeitung/update_graph/update_depth.py b/verarbeitung/update_graph/update_depth.py index 40fc687c5855dfd5f76d1a12582e3539f109fdbc..2f0a1d8208ef02a759ac75371c624a0db6b80feb 100644 --- a/verarbeitung/update_graph/update_depth.py +++ b/verarbeitung/update_graph/update_depth.py @@ -18,7 +18,9 @@ sys.path.append("../../") from verarbeitung.construct_new_graph.add_citations_rec import add_citations from verarbeitung.construct_new_graph.initialize_graph import complete_inner_edges +from verarbeitung.get_pub_from_input import get_pub from .update_edges import back_to_valid_edges +from input.publication import Publication def reduce_max_height(max_height): @@ -62,7 +64,7 @@ def get_old_height_depth(): max_height = max(max_height, pub.group) return(max_height, max_depth) -def get_old_max_references(old_depth): +def get_old_max_references(old_depth, test_var): ''' :param old_depth: old maximum depth to search for citations :type old_depth: int @@ -72,13 +74,14 @@ def get_old_max_references(old_depth): old_max_references = [] for pub in processed_input_list: if (abs(pub.group) == old_depth): - for reference in pub.references: - for ref_pub in processed_input_list: - if reference.doi_url == ref_pub.doi_url: - old_max_references.append(ref_pub) + pub = get_pub(pub.doi_url, test_var) + if (type(pub) != Publication): + #print(pub) + continue + old_max_references.append(pub) return(old_max_references) -def get_old_max_citations(old_height): +def get_old_max_citations(old_height, test_var): ''' :param old_height: old maximum height to search for citations :type old_height: int @@ -88,10 +91,11 @@ def get_old_max_citations(old_height): old_max_citations = [] for pub in processed_input_list: if (abs(pub.group) == old_height): - for citation in pub.citations: - for cit_pub in processed_input_list: - if citation.doi_url == cit_pub.doi_url: - old_max_citations.append(cit_pub) + pub = get_pub(pub.doi_url, test_var) + if (type(pub) != Publication): + #print(pub) + continue + old_max_citations.append(pub) return(old_max_citations) def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var): @@ -120,20 +124,22 @@ def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var): old_height, old_depth = get_old_height_depth() - # removes publications and links from recursion levels which aren't needed anymore + # removes publications and links from recursion levels which aren't needed anymore or adds new ones if (old_depth > new_depth): reduce_max_depth(new_depth) - elif (old_height > new_height): + elif (old_depth < new_depth): + old_max_references = get_old_max_references(old_depth, test_var) + add_citations(processed_input_list, valid_edges, old_max_references, old_depth, new_depth, "Reference", test_var) + + if (old_height > new_height): reduce_max_height(new_height) + elif (old_height < new_height): + old_max_citations = get_old_max_citations(old_height, test_var) + add_citations(processed_input_list, valid_edges, old_max_citations, old_height, new_height, "Citation", test_var) + + - # adds publications and links for new recursion levels - elif (old_depth < new_depth): - old_max_references = get_old_max_references(old_depth) - add_citations(processed_input_list, valid_edges, old_max_references, old_depth+1, new_depth, "Reference", test_var) - elif (old_height < new_height): - old_max_citations = get_old_max_citations(old_height) - add_citations(processed_input_list, valid_edges, old_max_citations, old_height+1, new_height, "Citation", test_var) back_to_valid_edges(valid_edges, processed_input_list) # adds edges between reference group and citation group of known publications diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index 7bbb9072377afc5a52ca83341c7ad322566544c5..2aaf2140dcb6daf69a30f47acfdbd079a6e41374 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -59,7 +59,8 @@ def get_new_input_dois(new_input, test_var): # retrieves information and adds to new list if successful pub = get_pub(new_node, test_var) if (type(pub) != Publication): - print(pub) + #print(pub) + error_doi_list.append(new_node) continue new_input_dois.append(pub.doi_url) @@ -87,9 +88,10 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes old_obj_input_list , old_edges_list = input_from_json(json_file) # one global list to save the process of removing unneeded publications and one to save valid edges - global processed_list, valid_edges + global processed_list, valid_edges, error_doi_list processed_list = old_obj_input_list valid_edges = old_edges_list + error_doi_list = [] # get dois from lists to compare for differences @@ -108,4 +110,7 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes if (len(inserted_nodes) > 0): connect_old_and_new_input(processed_list, valid_edges, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var) - return(processed_list, valid_edges) + if test_var: + return(processed_list, valid_edges) + else: + return(processed_list, valid_edges, error_doi_list)