Skip to content
Snippets Groups Projects
Commit 66157006 authored by Große, Judith's avatar Große, Judith Committed by Malte Schokolowski
Browse files

added import from json

parent f6798fdc
No related branches found
No related tags found
1 merge request!11merge verarbeitung to main repo
#!/usr/bin/env python3
def back_to_valid_edges(Kanten_aus_Json, Geloechte_Knoten):
'''
:param Kanten_aus_Json: list of edges from the old graph
:type Kanten_aus_Json: list
:param Geloechte_Knoten: list of deleted nodes from the old graph
:type Geloechte_Knoten: list
function that deletes edges, if one ore two including nodes are deleted nodes
'''
list_of_edges_from_json = Kanten_aus_Json
list_of_valid_edges = list_of_edges_from_json
list_of_deleted_nodes = Geloechte_Knoten
for deleted_node in list_of_deleted_nodes: #iterates over all deleted nodes
for edge in list_of_edges_from_json: #iterates over all edges from old graph
for node in edge: #checks for both including nodes if one of them was delted
if node == deleted_node: # if one of them is a deleted node
list_of_valid_edges.remove(edge) #removes the edge
break #ist überflüssig, nur fürs verständnis
return(list_of_valid_edges)
#Kanten_Menge_Ganz = [["doi_1","doi_2"],["doi_3","doi_4"],["doi_5","doi_6"]]
#Geloeschte = ["doi_2","doi_1","doi_4"]
#print(back_to_valid_edges(Kanten_Menge_Ganz,Geloeschte))
#Im Anschluss muss mit den Hinzugefügten Knoten Processing aufgerufen werden
#!/usr/bin/env python3
from collections import Counter
def doi_listen_vergleichen(alte,neue):
'''
:param alte: list of dois from old graph
:type alte: list
:param neue: list of dois from new graph
:type neue: list
function to calculate, which nodes from the old graph are deleted and which are added
'''
dois_from_old_graph = alte #WICHTIG: Keine doppelten DOIs
dois_from_new_graph = neue
deleted_nodes = []
common_nodes = []
inserted_nodes = []
all_dois = dois_from_old_graph + dois_from_new_graph
for doi in all_dois: # iterates over the merged list of new and old dois
if ((Counter(all_dois)[doi]) == 2) & (doi not in common_nodes): # If the doi occurs twice the node is in the old and the new graph
common_nodes.append(doi) #appends the doi to common ones, if its not alredy in it
elif ((doi in dois_from_old_graph) & (doi not in dois_from_new_graph)): #If the doi occurs once and it is from old graph it is a deleted node
deleted_nodes.append(doi) #appends the doi to deleted ones
elif ((doi in dois_from_new_graph) & (doi not in dois_from_old_graph)): #if the doi occurs ince and it is from new graph it is a inserted node
inserted_nodes.append(doi) #appends the doi to the inserted ones
return(common_nodes, inserted_nodes, deleted_nodes)
#Test Prints
#liste_1 = ["doi_1","doi_2","doi_3","doi_4","doi_5"]
#liste_2 = ["doi_1","doi_2","doi_3","doi_6","doi_7"]
#print("gemeinsame Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[0])
#print("hinzugefügte Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[1])
#print("gelöschte Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[2])
#!/usr/bin/env python3
import json
class Publication:
def __init__(self, doi_url, title, contributors, journal, publication_date, group):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
self.group = group
def input_from_json(json_file):
'''
:param json_file: Json-Datei for the old graph
:type json_file: Json File
'''
with open(json_file,'r') as file: #opens the json file with reading permission
python_dict2 = json.load(file) #saves the information in a dictionary
list_of_nodes_with_all_info = python_dict2["nodes"]
list_of_edges_in_json_format = python_dict2["links"]
list_of_node_objects = []
list_of_edges = []
for node in list_of_nodes_with_all_info: #iterates over the list of nodes
pub = Publication(node["doi"],node["name"],node["author"],node["journal"],node["year"], node["group"]) #creates for the nodes the objects class Publication
list_of_node_objects.append(pub) #appends the objects to a list
# Es fehlt für Jedes Objekt noch die Liste der References und Citations
# Iteriert über die Liste der Kanten_dictionaries und speichert sie als Liste
for edge in list_of_edges_in_json_format: #iterates over the list of edges
new_list = [edge["source"],edge["target"]] #converts the edges to other representation
list_of_edges.append(new_list) #appends the edges to a list
return(list_of_node_objects, list_of_edges)
#!/usr/bin/env python3
"""
Functions for information retrieval of articles from the ACS journal JCIM
"""
__author__ = "Florian Jochens"
__email__ = "fj@andaco.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
from bs4 import BeautifulSoup as bs
import requests as req
import sys
from pathlib import Path
class Publication:
#_registry = []
_citations = []
_references = []
def __init__(self, title, publication_date, contributors, doi_url,
subjects = None, num_citations = None):
#self._registry.append(self)
self.title = title
self.publication_date = publication_date
self.contributors = contributors
self.doi_url = doi_url
self.subjects = subjects
self.num_citations = num_citations
#self._citations = []
#self._references = []
class Citation:
def __init__(self, title, journal, contributors, doi_url):
self.title = title
self.journal = journal
self.contributors = contributors
self.doi_url = doi_url
class References:
def __init__(self, title, journal, contributors, doi_url):
self.title = title
self.journal = journal
self.contributors = contributors
self.doi_url = doi_url
def get_article_info(soup):
header = soup.find('div', class_ = 'article_header-left pull-left')
article_title = header.find('span', class_ = 'hlFld-Title').text
publication_date = header.find('span', class_ = 'pub-date-value').text
for link in header.find('div', class_ = 'article_header-doiurl'):
doi_url = link.get('href')
subs = header.find('div', class_ = 'article_header-taxonomy')
subjects = []
for sub in subs.find_all('a'):
subjects.append(sub.get('title'))
cons = header.find('ul', class_ = 'loa')
contributors = []
for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'):
contributors.append(con.text)
numc = header.find('div', class_ = 'articleMetrics_count')
if not numc.a:
num_citations = 0
else:
num_citations = numc.a.text
pub = Publication(article_title, publication_date, contributors, doi_url,
subjects, num_citations)
return pub
def get_download_url():
export = soup.find('div', class_ = 'cit-download-dropdown_content')
url = 'https://pubs.acs.org'
for link in export.find_all('a'):
if link.get('title') == 'Citation and references':
url += link.get('href')
print(url)
return url
def download(url): # Download citation and references file
if url.find('='):
filename = url.rsplit('=', 1)[1]
path = Path(('./files/' + filename))
if path.is_file():
print("File already exists")
else:
print("File does not exist")
def get_citation_info(pub, num_citations, soup):
pub._citations = []
details = soup.find('ol', class_ = 'cited-content_cbyCitation')
titles = []
for title in details.find_all('span',
class_ = 'cited-content_cbyCitation_article-title'):
titles.append(title.text.replace('.', ''))
journal_names = []
for name in details.find_all('span',
class_ = 'cited-content_cbyCitation_journal-name'):
journal_names.append(name.text)
doi_urls = []
for url in details.find_all('a'):
doi_urls.append(url.get('href'))
contributors = []
for contrib in details.find_all('span',
class_ = 'cited-content_cbyCitation_article-contributors'):
contributors.append(contrib.text)
for i in range(0, int(num_citations)):
pub._citations.append(Citation(titles[i], journal_names[i],
contributors[i], doi_urls[i]))
def print_pub_info(pub):
print(f'''Article title: {pub.title}
Publication date: {pub.publication_date}
DOI-URL: {pub.doi_url}
Subjects:''')
print(*(pub.subjects), sep = ", ")
print('\nContributors:')
print(*(pub.contributors), sep = ", ")
if int(pub.num_citations) > 0:
if int(pub.num_citations) == 1:
print(f'\nThis publication is cited by the following publication:\n')
else:
print(f'\nThis publication is cited by the following {pub.num_citations} publications:\n')
for citation in pub._citations:
print(f'''
Title: {citation.title}
Journal: {citation.journal}
Contributors: {citation.contributors}
DOI-URL: {citation.doi_url}
''')
else:
print('\nThis publication is not cited by any other publication.')
def input(url):
html_text = req.get(url).text
soup = bs(html_text, 'html.parser')
pub = get_article_info(soup)
if int(pub.num_citations) > 0:
get_citation_info(pub, int(pub.num_citations), soup)
return pub
#if len(sys.argv) != 2:
# sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0]))
# exit(1)
#url = sys.argv[1]
#pub = input(url)
#print_pub_info(pub)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment