Skip to content
Snippets Groups Projects
Commit 5b246d2c authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

Aufraeumen

parent 396593b9
No related branches found
No related tags found
2 merge requests!7Main,!5Main
...@@ -10,12 +10,13 @@ import requests as req ...@@ -10,12 +10,13 @@ import requests as req
import sys import sys
from pathlib import Path from pathlib import Path
from input_fj import input from input_fj import input
from json_demo import output_to_json
def process_main(array, depth): def process_main(doi_input_array, depth):
# ERROR-Handling doi_array = NULL # ERROR-Handling doi_array = NULL
if (len(array) == 0): if (len(doi_input_array) == 0):
print("Error, no input data") print("Error, no input data")
# ERROR- wenn für die Tiefe eine negative Zahl eingegeben wird # ERROR- wenn für die Tiefe eine negative Zahl eingegeben wird
...@@ -30,14 +31,21 @@ def process_main(array, depth): ...@@ -30,14 +31,21 @@ def process_main(array, depth):
edges = [] edges = []
# Jede Publikation aus dem Input-Array wird in den Knoten-Array(nodes) eingefügt. # Jede Publikation aus dem Input-Array wird in den Knoten-Array(nodes) eingefügt.
for pub in array: for pub_doi in doi_input_array:
if (pub not in nodes): pub = input(pub_doi)
not_in_nodes = True
for node in nodes:
if (pub.doi_url == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
nodes.append(pub) nodes.append(pub)
else: else:
array.remove(pub) doi_input_array.remove(pub_doi)
process_rec_depth(array, 0, depth) process_rec_depth(doi_input_array, 0, depth)
output_to_json(nodes,edges)
return(nodes,edges) return(nodes,edges)
...@@ -56,9 +64,14 @@ def process_rec_depth(array, depth, depth_max): ...@@ -56,9 +64,14 @@ def process_rec_depth(array, depth, depth_max):
# Wenn die citation noch nicht im Knoten-Array(nodes) existiert UND die maximale Tiefe # Wenn die citation noch nicht im Knoten-Array(nodes) existiert UND die maximale Tiefe
# noch nicht erreicht wurde, wird diese als Knoten im Knoten-Array gespeichert. Zusätzlich # noch nicht erreicht wurde, wird diese als Knoten im Knoten-Array gespeichert. Zusätzlich
# wird die Verbindung zur Publikation als Tupel im Kanten-Array(edges) gespeichert. # wird die Verbindung zur Publikation als Tupel im Kanten-Array(edges) gespeichert.
if (citation.doi_url not in nodes): not_in_nodes = True
for node in nodes:
if (citation.doi_url == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (depth <= depth_max): if (depth <= depth_max):
nodes.append(citation.doi_url) nodes.append(citation)
edges.append([pub.doi_url,citation.doi_url]) edges.append([pub.doi_url,citation.doi_url])
# Wenn die citaion bereits im Knoten-Array existiert, wird nur die Verbindung zur Publikation # Wenn die citaion bereits im Knoten-Array existiert, wird nur die Verbindung zur Publikation
...@@ -85,8 +98,8 @@ def process_rec_depth(array, depth, depth_max): ...@@ -85,8 +98,8 @@ def process_rec_depth(array, depth, depth_max):
# Programmtest, weil noch keine Verbindung zum Input besteht. # Programmtest, weil noch keine Verbindung zum Input besteht.
arr = [] arr = []
arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332') arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
#arr.append('https://doi.org/10.1021/acs.jcim.0c00741') #arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
#arr.append('https://doi.org/10.1021/ci700007b') #arr.append('https://doi.org/10.1021/ci700007b')
...@@ -97,8 +110,8 @@ arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') ...@@ -97,8 +110,8 @@ arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
nodes,edges = process_main(arr,1) nodes,edges = process_main(arr,1)
print("Knoten:\n") print("Knoten:\n")
for vortex in nodes: for node in nodes:
print(vortex, "\n") print(node.title, "\n")
print("\nKanten:\n") print("\nKanten:\n")
for edge in edges: for edge in edges:
print(edge,"\n") print(edge,"\n")
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 3 16:54:43 2021
@author: Malte Schokolowski
"""
from bs4 import BeautifulSoup as bs
import requests as req
import sys
from pathlib import Path
import input_test as inp
def process_main(array, depth):
#ERROR-Handling doi_array = NULL, Tiefe < 0 oder 1 ??? +
if (depth < 0):
print("Error, depth of search must be positive")
# leeres Array für die Knoten wird erstellt
# leeres Array für die Kanten wird erstellt
global V, E
V = []
E = []
# Füge in Knoten-Array alle Starterknoten ein
for pub_doi in array:
pub = inp.input(pub_doi)
V.append(pub)
#print("\n")
process_rec(array, 0, depth)
return(V,E)
def process_rec(array, depth, depth_max):
depth += 1
for pub_doi in array:
# Input aufrufen und speichern
pub = inp.input(pub_doi)
# Klasseninstanz bestehend aus u.a.
# Name, Autoren, DOI, Jahr,
# was_wir_zitiert_haben, wo_wir_zitiert_wurden
for citation in pub._citations:
#print(pub.doi_url, ".\t", citation.doi_url, "\n")
#Knoten j erstellen, wenn noch unbekannt
if (citation not in V):
if (depth <= depth_max):
V.append(citation)
#print(citation.doi_url, "\n")
E.append([pub,citation])
#print(pub.doi_url, ".\t", citation.doi_url, "\n")
else:
E.append([pub,citation]) # Kante erstellen, wenn citation bekannt, also wenn beide im Input sind oder bei Zyklus
#print(pub.doi_url, ".\t", citation.doi_url, "\n")
#for k in wo_wir_zitiert_wurden:
#if (i != k):
#Knoten k erstellen, wenn noch unbekannt
#Kante erstellen von k nach i
if (depth < depth_max):
cit_arr = []
for citation in pub._citations:
if ("acs" in citation.doi_url):
cit_arr.append(citation.doi_url)
process_rec(cit_arr, depth, depth_max)
# Knoten- und Kantenmenge zurückgeben
# {1,2,3,4,5} oder
# {{1="paper1",0}, {2 = "paper2"},1} oder
# {1="paper1", 2 = "paper2"}
# {(1,2),(2,3),(2,4)}
arr = []
arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
#arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
#arr.append('https://doi.org/10.1021/acs.accounts.1c00440')
#arr.append('https://doi.org/10.1021/ci700007b')
#arr.append('https://doi.org/10.1021/acs.jcim.5b00292')
#url = sys.argv[1]
#arr.append[url]
V,E = process_main(arr,1)
for vortex in V:
#print(vortex, "\n")
print(vortex.doi_url, "\n")
print("\n")
for i in range(len(E)):
#print(edge,"\n")
print(E[i][0].doi_url, ", ",E[i][1].doi_url, "\n")
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 3 16:54:43 2021
@author: Malte Schokolowski
"""
from bs4 import BeautifulSoup as bs
import requests as req
import sys
from pathlib import Path
import input_test as inp
def process_main(array, depth):
#ERROR-Handling doi_array = NULL, Tiefe < 0 oder 1 ??? +
if (depth < 0):
print("Error, depth of search must be positive")
# leeres Array für die Knoten wird erstellt
# leeres Array für die Kanten wird erstellt
global V, E
V = []
E = []
# Füge in Knoten-Array alle Starterknoten ein
for pub in array:
V.append(pub)
#print("\n")
process_rec(array, 0, depth)
return(V,E)
def process_rec(array, depth, depth_max):
depth += 1
for pub_doi in array:
# Input aufrufen und speichern
#print(pub_doi)
pub = inp.input(pub_doi)
#for cit in pub._citations:
#print(pub.doi_url, cit.doi_url)
# Klasseninstanz bestehend aus u.a.
# Name, Autoren, DOI, Jahr,
# was_wir_zitiert_haben, wo_wir_zitiert_wurden
for citation in pub._citations:
#print(pub.doi_url, ".\t", citation.doi_url, "\n")
#Knoten j erstellen, wenn noch unbekannt
if (citation.doi_url not in V):
if (depth <= depth_max):
V.append(citation.doi_url)
#print(citation.doi_url, "\n")
E.append([pub.doi_url,citation.doi_url])
#print(pub.doi_url, ".\t", citation.doi_url, "\n")
else:
E.append([pub.doi_url,citation.doi_url]) # Kante erstellen, wenn citation bekannt, also wenn beide in gleicher Tiefe sind oder bei Zyklus
#print(pub.doi_url, ".\t", citation.doi_url, "\n")
#for k in wo_wir_zitiert_wurden:
#if (i != k):
#Knoten k erstellen, wenn noch unbekannt
#Kante erstellen von k nach i
if (depth < depth_max):
cit_arr = []
for citation in pub._citations:
if ("acs" in citation.doi_url):
cit_arr.append(citation.doi_url)
process_rec(cit_arr, depth, depth_max)
#else:
#print("--- %s seconds ---" % (time.time() - start_time))
#process_rec(wo_wir_zitiert_wurden, depth -1)'''
# Knoten- und Kantenmenge zurückgeben
# {1,2,3,4,5} oder
# {{1="paper1",0}, {2 = "paper2"},1} oder
# {1="paper1", 2 = "paper2"}
# {(1,2),(2,3),(2,4)}
arr = []
arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
#arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
#arr.append('https://doi.org/10.1021/ci700007b')
#arr.append('https://doi.org/10.1021/acs.jcim.5b00292')
#url = sys.argv[1]
#arr.append[url]
V,E = process_main(arr,2)
for vortex in V:
print(vortex, "\n")
print("\n")
for edge in E:
print(edge,"\n")
\ No newline at end of file
#!/usr/bin/env python3
"""
Functions for information retrieval of articles from the ACS journal JCIM
"""
__author__ = "Florian Jochens"
__email__ = "fj@andaco.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
from bs4 import BeautifulSoup as bs
import requests as req
import sys
from pathlib import Path
class Publication:
#_registry = []
_citations = []
def __init__(self, title, publication_date, contributors, doi_url,
subjects, num_citations):
#self._registry.append(self)
self.title = title
self.publication_date = publication_date
self.contributors = contributors
self.doi_url = doi_url
self.subjects = subjects
self.num_citations = num_citations
class Citation:
def __init__(self, title, journal, contributors, doi_url):
self.title = title
self.journal = journal
self.contributors = contributors
self.doi_url = doi_url
def get_article_info(soup):
header = soup.find('div', class_ = 'article_header-left pull-left')
article_title = header.find('span', class_ = 'hlFld-Title').text
publication_date = header.find('span', class_ = 'pub-date-value').text
for link in header.find('div', class_ = 'article_header-doiurl'):
doi_url = link.get('href')
subs = header.find('div', class_ = 'article_header-taxonomy')
subjects = []
for sub in subs.find_all('a'):
subjects.append(sub.get('title'))
cons = header.find('ul', class_ = 'loa')
contributors = []
for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'):
contributors.append(con.text)
numc = header.find('div', class_ = 'articleMetrics_count')
if not numc.a:
num_citations = 0
else:
num_citations = numc.a.text
pub = Publication(article_title, publication_date, contributors, doi_url,
subjects, num_citations)
return pub
def get_download_url():
export = soup.find('div', class_ = 'cit-download-dropdown_content')
url = 'https://pubs.acs.org'
for link in export.find_all('a'):
if link.get('title') == 'Citation and references':
url += link.get('href')
print(url)
return url
def download(url): # Download citation and references file
if url.find('='):
filename = url.rsplit('=', 1)[1]
path = Path(('./files/' + filename))
if path.is_file():
print("File already exists")
else:
print("File does not exist")
def get_citation_info(pub, num_citations, soup):
pub._citations = []
details = soup.find('ol', class_ = 'cited-content_cbyCitation')
titles = []
for title in details.find_all('span',
class_ = 'cited-content_cbyCitation_article-title'):
titles.append(title.text.replace('.', ''))
journal_names = []
for name in details.find_all('span',
class_ = 'cited-content_cbyCitation_journal-name'):
journal_names.append(name.text)
doi_urls = []
for url in details.find_all('a'):
doi_urls.append(url.get('href'))
contributors = []
for contrib in details.find_all('span',
class_ = 'cited-content_cbyCitation_article-contributors'):
contributors.append(contrib.text)
for i in range(0, int(num_citations)):
pub._citations.append(Citation(titles[i], journal_names[i],
contributors[i], doi_urls[i]))
def print_pub_info(pub):
print(f'''Article title: {pub.title}
Publication date: {pub.publication_date}
DOI-URL: {pub.doi_url}
Subjects:''')
print(*(pub.subjects), sep = ", ")
print('\nContributors:')
print(*(pub.contributors), sep = ", ")
if int(pub.num_citations) > 0:
if int(pub.num_citations) == 1:
print(f'\nThis publication is cited by the following publication:\n')
else:
print(f'\nThis publication is cited by the following {pub.num_citations} publications:\n')
for citation in pub._citations:
print(f'''
Title: {citation.title}
Journal: {citation.journal}
Contributors: {citation.contributors}
DOI-URL: {citation.doi_url}
''')
else:
print('\nThis publication is not cited by any other publication.')
def input(url):
html_text = req.get(url).text
soup = bs(html_text, 'html.parser')
pub = get_article_info(soup)
if int(pub.num_citations) > 0:
get_citation_info(pub, int(pub.num_citations), soup)
return pub
#if len(sys.argv) != 2:
# sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0]))
# exit(1)
#url = sys.argv[1]
#pub = input(url)
#print_pub_info(pub)
{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "doi": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems ", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"name": "Disconnected Maximum Common Substructures under Constraints ", "doi": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design ", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms ", "doi": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"name": "Machine learning accelerates quantum mechanics predictions of molecular crystals ", "doi": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID ", "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies ", "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"name": "BonMOLi\u00e8re: Small-Sized Libraries of Readily Purchasable Compounds, Optimized to Produce Genuine Hits in Biological Screens across the Protein Space ", "doi": "https://doi.org/10.3390/ijms22157773"}, {"name": "Accelerating high-throughput virtual screening through molecular pool-based active learning ", "doi": "https://doi.org/10.1039/D0SC06805E"}, {"name": "Compound Screening ", "doi": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}], "links": [{"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.3390/ijms22157773"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1039/D0SC06805E"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}]}
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
@author: Malte Schokolowski
"""
def process_main(doi_array, depth):
#ERROR-Handling doi_array = NULL, Tiefe < 0 oder 1 ???
# leeres Array für die Knoten wird erstellt
# leeres Array für die Kanten wird erstellt
# Füge in Knoten-Array alle Starterknoten ein
process_rec(doi_array, depth)
def process_rec(doi_array, depth):
for i in range(len(doi_array)):
# Input aufrufen und speichern
# Klasseninstanz bestehend aus u.a.
# Name, Autoren, DOI, Jahr,
# was_wir_zitiert_haben, wo_wir_zitiert_wurden
for j in range(len(was_wir_zitiert_haben)):
#Knoten j erstellen, wenn noch unbekannt
#Kante erstellen von i nach j
for k in range(len(wo_wir_zitiert_wurden)):
if (i != k):
#Knoten k erstellen, wenn noch unbekannt
#Kante erstellen von k nach i
process_rec(was_wir_zitiert_haben, depth-1)
process_rec(wo_wir_zitiert_wurden, depth -1)
# Knoten- und Kantenmenge zurückgeben
# {1,2,3,4,5} oder
# {{1="paper1",0}, {2 = "paper2"},1} oder
# {1="paper1", 2 = "paper2"}
# {(1,2),(2,3),(2,4)}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment