Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • baw8330/projekt-cis-biochemie-2021-22
  • bax5890/projekt-cis-biochemie-2021-22
2 results
Show changes
Commits on Source (72)
Showing
with 696 additions and 313 deletions
import base64
import re
import dash
from dash import dcc
from dash import html
from dash import callback_context
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate
from input.interface import InputInterface
import input.publication
app = dash.Dash(__name__)
# List of options when inputting data and generating the graph
additional_options = ['Update Automatically','Smart Input']
# Reads the contents of info_box.txt.
# They can later be displayed by pressing the corresponding button.
f = open('info_box.txt', 'r')
boxcontent = f.read()
f.close()
app.layout = html.Div([
# Layer 0: For the Header and Help Function(s)
html.Div([
html.Button(id='show-info',children='Show Info',n_clicks=0),
html.Div(id='info-box')
]),
# Layer 1: For all mandatory Inputs
html.Div([
"Input: ",
# A simple box for inputting a string.
# Value is transmitted upon pressing return or clicking out of the box.
dcc.Input(id='input-string', value='', type='text',debounce=True),
# Forward recursion. Values between 1 and 10 can be entered.
dcc.Input(id='forward-depth',value='1',type='number',min='1',max='10'),
# Backward recursion. Values between 1 and 10 can be entered.
dcc.Input(id='backward-depth',value='1',type='number',min='1',max='10'),
# Upload box. Can be used via drag-and-drop or byclicking on it to open a file viewer.
dcc.Upload(
id="upload-data",
children=html.Div(
["Drag and drop or click to select a file to upload."]),
style={
"width": "30%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin": "10px",
})
]),
# Layer 2: For the checklist, Remove-/Start-Buttons and input-error-message
html.Div([
# All input DOIs are collected in this checklist.
# It is initialized to avoid error messages.
dcc.Checklist(id='input-checklist',options=[],
labelStyle = dict(display='block'),value=[]),
# Displays error message if 'Smart Input' is active.
html.Div(id='input-err',style={'color':'red'}),
# Clears the entire list.
html.Button(id='clear-all-button',children='Clear All'),
# Clear all selected elements.
html.Button(id='clear-selected-button',children='Clear Selected'),
# Starts the process that generates a graph.
html.Button(id='start-button',children='Generate Graph')
]),
# Layer 3: For additional Options (e.g. Topological Sort)
html.Div([
html.H4('Additional Options'),
# A checklist of all additional options that are listed above.
dcc.Checklist(id='additional-options',
options=[{'label':k,'value':k} for k in additional_options],
value=[])
]),
# Layer 4: For the Graph
html.Div([
html.Div(id='test-output')
])
])
@app.callback(
Output('input-checklist','options'),
Output('input-checklist','value'),
Output('input-string','value'),
Output('input-err','children'),
Input('input-string','value'),
Input('clear-all-button','n_clicks'),
Input('clear-selected-button','n_clicks'),
Input('upload-data','contents'),
State('input-checklist','options'),
State('input-checklist','value'),
State('additional-options','value')
)
def update_input_checklist(input_value,btn1,btn2,filecontents,all_inputs,
selected_inputs,additional_options):
'''
Most important callback function. Updates the checklist that holds all inputs.
State of the checklist as input is needed so that previews entries are readded.
input-string is required as Output to clear the input box after each input.
Different actions are performed depending on which input triggered the callback.
The value-attribute of input-checklist must be updates so that the values
of deleted elements no longer appear in the list of selected elements.
:param input_value: given by dcc.Input
:type input_value: string
:param btn1: signals pressing of clear-all-button
:type btn1: int
:param btn2: signals pressing of clear-selected-button
:type btn2: int
:param filecontents: the contents of an uploaded file
:type filecontents: bit-string
:param all_inputs: all labels and values from the checklist,
regardless if they have been checked or not
:type all_inputs: list of dictionaries with 2 entries each
:param selected_inputs: values of all checked elements
:type selected_inputs: list of strings
:param addtitional_options: all checked additional options
:type additional_options: list of strings
'''
# changed_id is used to determine which Input has triggered the callback
changed_id = [p['prop_id'] for p in callback_context.triggered][0]
# if clear-all-button was pressed:
if 'clear-all-button' in changed_id:
return list(),list(),'',''
# if clear-selected-button was pressed:
if 'clear-selected-button' in changed_id:
all_inputs = [i for i in all_inputs if i['value'] not in selected_inputs]
return all_inputs,list(),'',''
# when a new element is added via dcc.Input
if 'input-string' in changed_id:
# Creates a list of previously added inputs to make sure nothing is added twice
currValues = [x['value'] for x in all_inputs]
if input_value not in currValues:
# if 'Smart Input' is selected, the input will be checked for validity
# and a more readable string will be returned
if 'Smart Input' in additional_options:
try:
# Attempts to call get_publication. If unsuccesful,
# the DOI is not added and an error message is returned
i = InputInterface()
pub = i.get_pub_light(input_value)
except Exception as err:
return options,selected_inputs,'','{}'.format(err)
# Creates a more readable string to display in the checklist
rep_str = pub.contributors[0] + ',' + pub.journal + \
',' + pub.publication_date
all_inputs.append({'label':rep_str, 'value':input_value})
# if 'Smart Input' is not selected, the input value is added as is,
# without checking for validity.
else:
all_inputs.append({'label':input_value,'value':input_value})
return all_inputs,selected_inputs,'',''
# when a txt-file is uploaded
if 'upload-data.contents' in changed_id:
if filecontents:
# Skips the info portion that is added when a file is uploaded
found = base64.b64decode(re.search(',(.+?)$', filecontents).group(1))
# Returns the binary string into a proper text
text = found.decode('utf-8')
# Creates a list of inputs by splitting the lines
list_of_inputs = (text.strip().split('\n'))
CurrValues = [x['value'] for x in all_inputs]
# For every line the same actions as for a single input are performed
for input_value in list_of_inputs:
if input_value not in CurrValues:
if 'Smart Input' in additional_options:
try:
i = InputInterface()
pub = i.get_pub_light(input_value)
except Exception as err:
return all_inputs,selected_inputs,'','{}'.format(err)
rep_str = pub.contributors[0] + ',' + pub.journal + \
',' + pub.publication_date
all_inputs.append({'label':rep_str, 'value':input_value})
else:
all_inputs.append({'label':input_value,'value':input_value})
return all_inputs,selected_inputs,'',''
# when the programm is first started:
# if this is not done, the input_checklist will be generated
# with one element that contains an empty string
if input_value == '':
return list(),list(),'',''
@app.callback(
Output('info-box','children'),
Input('show-info','n_clicks')
)
def show_hide_info_box(n_clicks):
'''
This callback shows and hides the (first) info-box by, checking how often
the button has been pressed. The text was loaded at the top.
:param n_clicks: number of times show-info has been clicked.
'type n_clicks: int
'''
if n_clicks % 2 == 0:
return ''
else:
return html.Div(boxcontent, style={'whiteSpace': 'pre-line'})
@app.callback(
Output('test-output','children'),
Input('start-button','n_clicks'),
Input('input-checklist','options'),
Input('input-checklist','value'),
Input('forward-depth','value'),
Input('backward-depth','value'),
State('additional-options','value')
)
def generate_output(n_clicks,all_inputs,selected_inputs,
forward_depth,backward_depth,additional_options):
'''
Basic structure for a callback that generates an output. This is only a
proof of concept and has noting to do with the intended output yet.
:param n_clicks: how often has Generate Graph been clicked
:type n_clicks: int
:param all_inputs: all labels and values from the checklist,
regardless if they have been checked or not
:type all_inputs: list of dictionaries with 2 entries each
:param selected_inputs: values of all checked elements
:type selected_inputs: list of strings
:param forward_depth: forward recursion depth
:type forward_depth: unsigned int
:param backward_depth: backward recursion depth
:type backward_depth: unsigned int
:param additional_options: value of all selected additional options
:type additional_options: list of strings
'''
changed_id = [p['prop_id'] for p in callback_context.triggered][0]
if n_clicks is None:
raise PreventUpdate
elif 'Update Automatically' in additional_options \
or 'start-button' in changed_id:
s = ''
for i in range(len(all_inputs)):
x = all_inputs[i]['value']
if x in selected_inputs:
s += x*(abs(int(forward_depth)-int(backward_depth)))
else:
s += x*(int(forward_depth)+int(backward_depth))
return s
else:
raise PreventUpdate
if __name__ == '__main__':
app.run_server(debug=True)
English
Show Info: Can be activated and deactivated by clicking on the button.
Input: input by entering a DOI ("Digital Object Identifier")
Drag and drop or click to select a file to upload: entering multiple DOI by txt-file is only possible if every DOI has its own line.
Recursion:
Clear All: clearing all inputs
Clear Selected: clearing all selected inputs
Generate Graph: generates the graph
Update Automatically: automatically updates the graph for every new input
Smart Input: checks the correctness of the entered DOI and shows a nicer depiction: Author, Journal, publication date.
German
Show Info: Durch wiederholtes klicken kann das Fenster ein und aus geblendet werden.
Input: Die Eingabe erfolgt in Form eines DOI ("Digital Object Identifier")
Drag and drop or click to select a file to upload: Mehrere DOI in einem txt-Dokument müssen untereinander angeordnet sein.
Recursion:
Clear All: alle Eingaben werden gelöscht
Clear Selected: alle markierten Eingaben werden gelöscht
Generate Graph: generiert den zugehörigen Graphen
Update Automatically: automatische Aktualisierung des Graphen nach neuer Eingabe
Smart Input: direkte Überprüfung der Eingabe auf Richtigkeit zudem wird nicht mehr der DOI angezeigt sondern: Der Autor, Das Journal, Das Veröffentlichungsdatum.
File added
File added
File added
File added
# Projekt CiS-Biochemie 2021-22 UI
# Benötigt:
- Dash
- Pandas
- beautifulsoup4
- requests
# Starten des Programms:
Ausführen von citation_parser_ui.py und einfügen des entstandenen Liks in einen Browser.
Danach müsste sich die Benutzeroberfläche im Browser öffnen.
# Übersicht der Benutzeroberfläche:
- Show Info: Durch wiederholtes klicken kann das Fenster ein und aus geblendet werden.
- Input: Die Eingabe erfolgt in Form eines DOI ("Digital Object Identifier")
- Drag and drop or click to select a file to upload: Mehrere DOI in einem txt-Dokument (genau ein DOI pro Zeile).
- Recursion: die beiden noch unbeschrifteten Felder rechts neben Input sind für die Rekursionstiefen in beide Richtungen
- Clear All: alle Eingaben werden gelöscht
- Clear Selected: alle markierten Eingaben werden gelöscht
- Generate Graph: generiert den zugehörigen Graphen (generiert momentan nur einen string)
- Update Automatically: automatische Aktualisierung des Graphen bei jeder neuen Eingabe
- Smart Input: direkte Überprüfung der Eingabe auf Richtigkeit zudem wird nicht mehr der DOI angezeigt sondern:
Der Autor, Das Journal, Das Veröffentlichungsdatum. (muss vor Hinzufügen aktiviert worden sein)
## Autoren
- Isabelle Siebels
- Sebastian David
import dash
from dash import dcc
from dash import html
from dash import callback_context
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate
from input.interface import InputInterface
import input.publication
app = dash.Dash(__name__)
additional_options = ['Update Automatically']
app.layout = html.Div([
# Layer 0: For the Header and Help Function(s)
html.Div([
html.Button(id='show-info',children='Show Info',n_clicks=0),
html.Div(id='info-box')
]),
# Layer 1: For all mandatory Inputs
html.Div([
"Input: ",
dcc.Input(id='input-string', value='', type='text',debounce=True),
dcc.Input(id='forward-depth',value='1',type='number',min='1',max='10'),
dcc.Input(id='backward-depth',value='1',type='number',min='1',max='10')
]),
# Layer 2: For the checklist, Remove-/Start-Buttons and input-error-message
html.Div([
dcc.Checklist(id='input-checklist',options=[],labelStyle = dict(display='block'),value=[]),
html.Div(id='input-err',style={'color':'red'}),
html.Button(id='clear-all-button',children='Clear All'),
html.Button(id='clear-selected-button',children='Clear Selected'),
html.Button(id='start-button',children='Generate Graph')
]),
# Layer 3: For additional Options (e.g. Topological Sort)
html.Div([
html.H4('Additional Options'),
dcc.Checklist(id='additional-options',
options=[{'label':k,'value':k} for k in additional_options],
value=[])
]),
# Layer 4: For the Graph
html.Div([
html.Div(id='test-output')
])
])
'''
Most important callback function. Updates the checklist that holds all inputs.
input-string is required as Output to clear the input box after each input
'''
@app.callback(
Output('input-checklist','options'),
Output('input-checklist','value'),
Output('input-string','value'),
Output('input-err','children'),
Input('input-string','value'),
Input('clear-all-button','n_clicks'),
Input('clear-selected-button','n_clicks'),
State('input-checklist','options'),
State('input-checklist','value')
)
def update_input_checklist(input_value,btn1,btn2,all_inputs,selected_inputs):
'''
:param input_value: given by dcc.Input
:type input_value: string
:param btn1: signals pressing of clear-all-button
:param btn2: signals pressing of clear-selected-button
:param all_inputs: all labels and values from the checklist,
regardless if they have been checked or not
:type all_inputs: list of dictionaries with 2 entries each
:param selected_inputs: values of all checked elements
:type selected_inputs: list of strings
'''
changed_id = [p['prop_id'] for p in callback_context.triggered][0]
# if clear-all-button was pressed:
if 'clear-all-button' in changed_id:
return list(),list(),'',''
# if clear-selected-button was pressed:
if 'clear-selected-button' in changed_id:
all_inputs = [i for i in all_inputs if i['value'] not in selected_inputs]
return all_inputs,list(),'',''
# when the programm is first started:
if input_value == '':
app.layout['input-checklist'].options.clear()
return list(),list(),'',''
# when a new element is added via dcc.Input
if 'input-string' in changed_id:
options = all_inputs
currValues = [x['value'] for x in options]
if input_value not in currValues:
try:
i = InputInterface()
pub = i.get_pub_light(input_value)
except Exception as err:
return options,selected_inputs,'','{}'.format(err)
rep_str = pub.contributors[0] + ',' + pub.journal + ',' + pub.publication_date
options.append({'label':rep_str, 'value':input_value})
return options,selected_inputs,'',''
'''
This callback shows and hides the (first) help-box
'''
@app.callback(
Output('info-box','children'),
Input('show-info','n_clicks')
)
def show_hide_info_box(n_clicks):
if n_clicks % 2 == 0:
return ''
else:
return 'Hier koennte Ihre Werbung stehen'
'''
Basic structure for a callback that generates an output
'''
@app.callback(
Output('test-output','children'),
Input('start-button','n_clicks'),
Input('input-checklist','options'),
Input('input-checklist','value'),
Input('forward-depth','value'),
Input('backward-depth','value'),
State('additional-options','value')
)
def generate_output(n_clicks,all_inputs,selected_inputs,
forward_depth,backward_depth,additional_options):
'''
:param n_clicks: how often has Generate Graph been clicked
:type n_clicks: int
:param all_inputs: all labels and values from the checklist,
regardless if they have been checked or not
:type all_inputs: list of dictionaries with 2 entries each
:param selected_inputs: values of all checked elements
:type selected_inputs: list of strings
:param forward_depth: forward recursion depth
:type forward_depth: unsigned int
:param backward_depth: backward recursion depth
:type backward_depth: unsigned int
:param additional_options: value of all selected additional options
:type additional_options: list of strings
'''
changed_id = [p['prop_id'] for p in callback_context.triggered][0]
if n_clicks is None:
raise PreventUpdate
elif 'Update Automatically' in additional_options \
or 'start-button' in changed_id:
s = ''
for i in range(len(all_inputs)):
x = all_inputs[i]['value']
if x in selected_inputs:
s += x*(abs(int(forward_depth)-int(backward_depth)))
else:
s += x*(int(forward_depth)+int(backward_depth))
return s
else:
raise PreventUpdate
if __name__ == '__main__':
app.run_server(debug=True)
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output, State
import base64
import re
app = dash.Dash(__name__)
list_of_inputs = dict()
app.layout = html.Div([
html.H4("Add all lines in a file to a list"),
html.Div([
dcc.Upload(
id="upload-data",
children=html.Div(
["Drag and drop or click to select a file to upload."]
),
style={
"width": "30%",
"height": "60px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "5px",
"textAlign": "center",
"margin": "10px",
}),
]),
dcc.Checklist(id='input-checklist',options=list(),labelStyle = dict(display='block'),value=[]),
])
@app.callback(
Output('input-checklist','options'),
Input('upload-data','filename'),
Input('upload-data','contents'),
State('input-checklist','options')
)
def update_input_list(uploaded_filenames,uploaded_file_contents,all_inputs):
if uploaded_file_contents is not None:
string = uploaded_file_contents
#cutting the first part of the String away to decode
found = base64.b64decode(re.search(',(.+?)$', string).group(1))
print(found.decode('utf-8'))
uploaded_file_contents = found.decode('utf-8')
list_of_inputs = (uploaded_file_contents.split())
#das hier sollte es untereinander anzeigen, bekomme ich allerdings nicht auf die Seite...
#return (*list_of_inputs, sep="\n")
options = all_inputs
if not options:
options = list()
CurrValues = [x['value'] for x in options]
# würde auch funktionieren
# return (found.decode('utf-8'))
for i in list_of_inputs:
if i not in CurrValues:
options.append({'label':i, 'value':i})
return options
if __name__ == '__main__':
app.run_server(debug=True)
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
#CodeCounter
.VSCodeCounter/
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to generate a graph representing citations between multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
from bs4 import BeautifulSoup as bs
import requests as req
import sys
from pathlib import Path
from input_fj import input
from input_test import input_test_func
from json_demo import output_to_json
# adds every publication from input list to graph structure
# doi_input_list: list of publication dois from user
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
references_pub_obj_list = []
citations_pub_obj_list = []
for pub_doi in doi_input_list:
#checks if its a test and chooses input function accordingly
if(test_var):
pub = input_test_func(pub_doi)
else:
pub = input(pub_doi)
# checks if publication already exists in nodes
not_in_nodes = True
for node in nodes: # checks if a pub is already in nodes
if (pub.doi_url == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
nodes.append(pub)
pub.group = "input"
else:
doi_input_list.remove(pub_doi)
# inserts references as publication objects into list and
# inserts first depth references into nodes/edges if maximum search depth > 0
for reference in create_graph_structure_references(pub, 0, search_depth_max, test_var):
references_pub_obj_list.append(reference)
# inserts citations as publication objects into list and
# inserts first height citations into nodes if maximum search height > 0
for citation in create_graph_structure_citations(pub, 0, search_height_max, test_var):
citations_pub_obj_list.append(citation)
return(references_pub_obj_list, citations_pub_obj_list)
# adds edges between citation and reference group
def complete_inner_edges(test_var):
for node in nodes:
if (node.group == "depth"):
for citation in node.citations:
for cit in nodes:
if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges):
edges.append([citation.doi_url, node.doi_url])
if (node.group == "height"):
for reference in node.references:
for ref in nodes:
if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges):
edges.append([node.doi_url,reference.doi_url])
# adds a node for every publication unknown
# adds edges for references between publications
def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
references_pub_obj_list = []
for reference in pub.references:
not_in_nodes = True
for node in nodes:
# checks every reference for duplication
if (reference.doi_url == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (search_depth < search_depth_max):
#checks if its a test and chooses input function accordingly
if (test_var):
reference_pub_obj = input_test_func(reference.doi_url)
else:
reference_pub_obj = input(reference.doi_url)
reference_pub_obj.group = "depth"
nodes.append(reference_pub_obj)
edges.append([pub.doi_url,reference_pub_obj.doi_url])
references_pub_obj_list.append(reference_pub_obj)
# adds edge only if citation already exists
elif [pub.doi_url,reference.doi_url] not in edges:
edges.append([pub.doi_url,reference.doi_url])
return references_pub_obj_list
# recursive function to implement height-first-search on references
# references_pub_obj_list: input list of references as publication objects
# search_depth: current search_depth of height-first-search
# search_depth_max: maximal search_depth for dfs
def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):
# adds next level to nodes/edges
for pub in references_pub_obj_list:
new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var)
# If the maximum height has not yet been reached, calls function recursivly with increased height
if (search_depth < search_depth_max):
process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var)
# adds a node for every publication unknown
# adds edges for citations between publications
def create_graph_structure_citations(pub, search_height, search_height_max, test_var):
citations_pub_obj_list = []
for citation in pub.citations:
not_in_nodes = True
for node in nodes:
# checks every citation for duplication
if (citation.doi_url == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (search_height < search_height_max):
#checks if its a test and chooses input function accordingly
if (test_var):
citation_pub_obj = input_test_func(citation.doi_url)
else:
citation_pub_obj = input(citation.doi_url)
citation_pub_obj.group = "height"
nodes.append(citation_pub_obj)
edges.append([citation_pub_obj.doi_url,pub.doi_url])
citations_pub_obj_list.append(citation_pub_obj)
# adds only edge if citation already exists
elif [citation.doi_url,pub.doi_url] not in edges:
edges.append([citation.doi_url,pub.doi_url])
return citations_pub_obj_list
# recursive function to implement height-first-search on citations
# citations_pub_obj_list: input list of citations as publication objects
# search_height: current search_height of height-first-search
# search_height_max: maximal search_height for dfs
def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var):
# adds next level to nodes/edges
for pub in citations_pub_obj_list:
new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var)
# If the maximum height has not yet been reached, calls function recursivly with increased height
if (search_height < search_height_max):
process_citations_rec(new_citation_pub_obj_list, search_height+1, search_height_max, test_var)
# main function to call. Needs as input:
# doi_input_list: input list of dois
# search_height: max search height to process to
# search_depth: max search depth to process to
# test_var: only needed for unit test as True, default is False
def process_main(doi_input_list, search_height, search_depth, test_var = False):
# ERROR-Handling doi_array = NULL
if (len(doi_input_list) == 0):
print("Error, no input data")
# ERROR- if a negative number is entered for height
if (search_height < 0):
print("Error, search_height of search must be positive")
# ERROR- if a negative number is entered for depth
if (search_depth < 0):
print("Error, search_depth of search must be positive")
# create empty array for the nodes
# create empty array for the edges
global nodes, edges
nodes = []
edges = []
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
# function calls to begin recursive processing up to max depth/height
process_citations_rec(citations_obj_list, 1, search_height, test_var)
process_references_rec(references_obj_list, 1, search_depth, test_var)
# adds edges between reference group and citation group of known publications
complete_inner_edges(test_var)
# calls a skript to save nodes and edges of graph in .json file
output_to_json(nodes,edges)
# only for unit tests
if (test_var == True):
doi_nodes_list = []
for node in nodes:
doi_nodes_list.append(node.doi_url)
return(doi_nodes_list, edges)
# a function to print nodes and edges from a graph
def print_graph(nodes, edges):
print("Knoten:\n")
for node in nodes:
print(node.title, "\n")
print("\nKanten:\n")
for edge in edges:
print(edge,"\n")
# program test, because there is no connection to UI yet.
def try_known_publications():
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
#arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
#arr.append('https://doi.org/10.1021/ci700007b')
#arr.append('https://doi.org/10.1021/acs.jcim.5b00292')
#url = sys.argv[1]
#arr.append[url]
nodes,edges = process_main(doi_list,2,2)
print_graph(nodes, edges)
\ No newline at end of file
import unittest
from Processing import process_main
class ProcessingTest(unittest.TestCase):
def testCycle(self):
nodes, edges = process_main(['doiz1'],1,1,True)
self.assertCountEqual(nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']])
nodes, edges = process_main(['doiz1'],2,2,True)
self.assertCountEqual(nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']])
#def testBigCycle(self):
#def testEmptyHeight(self):
#def testEmptyDepth(self):
def testEmptyDepthHeight(self):
nodes, edges = process_main(['doi1'],0,0,True)
self.assertCountEqual(nodes,['doi1'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi1', 'doi2'],0,0,True)
self.assertCountEqual(nodes, ['doi1','doi2'])
self.assertCountEqual(edges, [['doi1', 'doi2']])
nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True)
self.assertCountEqual(nodes, ['doi1','doi2', 'doi3'])
self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']])
def testInnerEdges(self):
nodes, edges = process_main(['doi_ie1'],1,1,True)
self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3'])
self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']])
def testRightHeight(self):
nodes, edges = process_main(['doi_h01'],1,0,True)
self.assertCountEqual(nodes,['doi_h01'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi_h02'],1,0,True)
self.assertCountEqual(nodes,['doi_h02','doi_h1'])
self.assertCountEqual(edges, [['doi_h1','doi_h02']])
nodes, edges = process_main(['doi_h02'],2,0,True)
self.assertCountEqual(nodes,['doi_h02','doi_h1','doi_h2'])
self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']])
def testRightDepth(self):
nodes, edges = process_main(['doi_d01'],0,1,True)
self.assertCountEqual(nodes,['doi_d01'])
self.assertCountEqual(edges, [])
nodes, edges = process_main(['doi_d02'],0,1,True)
self.assertCountEqual(nodes,['doi_d02','doi_d1'])
self.assertCountEqual(edges, [['doi_d02','doi_d1']])
nodes, edges = process_main(['doi_d02'],0,2,True)
self.assertCountEqual(nodes,['doi_d02','doi_d1','doi_d2'])
self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']])
if __name__ == "__main__":
unittest.main()
\ No newline at end of file
# Projekt CiS-Projekt 2021/22
Processing-Package to generate theoretical graph for citations and references of given input publications.
## Usage/Examples
```python
from verarbeitung.process_main import Processing
def main(url_list):
Processing(url_list)
```
Grundlegender Prozess:
Es wird von der UI eine Liste an DOIs an die Verarbeitung übergeben und
diese wird dann umgewandelt in eine Knoten-und Kantenmenge, welche die Zitierungen darstellen.
Die Informationen über die Paper und die Zitierungen kommen von der Input Gruppe über den Aufruf
von der Funktion Publication. Die Knoten- und Kantenmengen werden in Form einer
Json Datei an den Output übergeben.
## Files and functions in directory
get_pub_from_input.py:
```python
def get_pub(pub_doi, test_var)
```
- Gibt für eine DOI ein Klassenobjekt zurück, in dem alle nötigen Informationen gespeichert sind.
process_main.py:
```python
def Processing(url_list)
```
- Überprüft, ob bereits eine Json Datei existiert und ruft dann entweder die Funktion auf, um
einen neuen Graphen zu erstellen oder die Funktion um einen Vorhandenen zu updaten.
start.script.py:
- Wird benötigt, um die Dateien ordnerübergreifend aufzurufen. Nur fürs interne Testen der
Funktionalität
<name>.json:
- sind momentan Beispiele, die an den Output übergeben werden könnten.
## Testing
python -m unittest discover verarbeitung/test -v
## Authors
- Donna Löding
- Alina Molkentin
- Xinyi Tang
- Judith Große
- Malte Schokolowski
\ No newline at end of file
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted