Skip to content
Snippets Groups Projects
Select Git revision
  • c0727d9083e8bb02c02b657b000c108f81de6e1a
  • main default protected
2 results

add_citations_rec.py

Blame
  • Forked from Ockenden, Samuel / CiS Projekt
    Source project has a limited visibility.
    Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    DatasetLoader.py 8.99 KiB
    from requests.exceptions import RetryError
    
    from drugstone import models
    from nedrex.static import get_metadata, get_license
    
    LICENSE_FILE = "./data/license.txt"
    
    
    def get_ppi_string():
        dataset, _ = models.PPIDataset.objects.get_or_create(
            name='STRING',
            link='https://string-db.org/',
            version='11.0',
            licenced=False
        )
        return dataset
    
    
    def get_ppi_apid():
        dataset, _ = models.PPIDataset.objects.get_or_create(
            name='APID',
            link='http://cicblade.dep.usal.es:8080/APID/',
            version='January 2019',
            licenced=False
        )
        return dataset
    
    
    def get_ppi_biogrid():
        dataset, _ = models.PPIDataset.objects.get_or_create(
            name='BioGRID',
            link='https://thebiogrid.org/',
            version='4.0',
            licenced=False
        )
        return dataset
    
    
    def get_nedrex_version():
        version = get_today_version()
        try:
            real_version = get_metadata()['version']
            if real_version != "0.0.0":
                version = real_version
        except RetryError:
            pass
        return version
    
    
    def get_nedrex_source_version(source):
        metadata = get_metadata()['source_databases']
        # TODO remove once fixed in nedrex db
        if 'drug_central' in metadata:
            metadata['drugcentral'] = metadata['drug_central']
    
        return metadata[source]['date']
    
    
    def get_drug_target_nedrex(url, licenced):
        dataset, _ = models.PDIDataset.objects.get_or_create(
            name='NeDRex',
            link=url,
            version=get_nedrex_version(),
            licenced=licenced
        )
        return dataset
    
    
    def get_ppi_nedrex(url, licenced):
        dataset, _ = models.PPIDataset.objects.get_or_create(
            name='NeDRex',
            link=url,
            version=get_nedrex_version(),
            licenced=licenced
        )
        return dataset
    
    
    def get_protein_disorder_nedrex(url, licenced):
        dataset, _ = models.PDisDataset.objects.get_or_create(
            name='NeDRex',
            link=url,
            version=get_nedrex_version(),
            licenced=licenced
        )
        return dataset
    
    
    def get_drug_disorder_nedrex(url, licenced):
        dataset, _ = models.DrDiDataset.objects.get_or_create(
            name='NeDRex',
            link=url,
            version=get_nedrex_version(),
            licenced=licenced
        )
        return dataset
    
    
    def write_license(text):
        with open(LICENSE_FILE, 'w') as fh:
            fh.write(text)
    
    
    def update_license():
        try:
            license = get_license()
            write_license(license)
            return license
        except RetryError:
            print(f'License could not be retreived.')
            return ""
    
    
    def import_license():
        try:
            license = ""
            with open(LICENSE_FILE, 'r') as fh:
                for line in fh:
                    license += line
            return license
        except FileNotFoundError:
            print(f'No license doc there yet! Make sure to run an update first!')
        return ""
    
    
    def get_drug_target_chembl():
        dataset, _ = models.PDIDataset.objects.get_or_create(
            name='ChEMBL',
            link='https://www.ebi.ac.uk/chembl/',
            version='27',
            licenced=False
        )
        return dataset
    
    
    def get_drug_target_dgidb():
        dataset, _ = models.PDIDataset.objects.get_or_create(
            name='DGIdb',
            link='https://www.dgidb.org/',
            version='4.2.0',
            licenced=False
        )
        return dataset
    
    
    def get_drug_target_drugbank():
        dataset, _ = models.PDIDataset.objects.get_or_create(
            name='DrugBank',
            link='https://go.drugbank.com/',
            version='5.1.7',
            licenced=True
        )
        return dataset
    
    
    def get_disorder_protein_disgenet():
        dataset, _ = models.PDisDataset.objects.get_or_create(
            name='DisGeNET',
            link='https://www.disgenet.org/home/',
            version='6.0',
            licenced=False
        )
        return dataset
    
    
    def get_drug_disorder_drugbank():
        dataset, _ = models.DrDiDataset.objects.get_or_create(
            name='DrugBank',
            link='https://go.drugbank.com/',
            version='5.1.8',
            licenced=False
        )
        return dataset
    
    
    def get_today_version():
        import datetime
        now = datetime.date.today()
        version = f'{now.year}-{now.month}-{now.day}_temp'
        return version
    
    
    def get_ppi_nedrex_dataset(url, licenced, source):
        version = get_today_version()
        try:
            version = get_nedrex_source_version(source)
        except RetryError:
            pass
    
        dataset, _ = models.PPIDataset.objects.get_or_create(
            name=source,
            link=url,
            version=version,
            licenced=licenced
        )
        return dataset
    
    
    def get_pdi_nedrex_dataset(url, licenced, source):
        version = get_today_version()
        try:
            version = get_nedrex_source_version(source)
        except RetryError:
            pass
    
        dataset, _ = models.PDIDataset.objects.get_or_create(
            name=source,
            link=url,
            version=version,
            licenced=licenced
        )
        return dataset
    
    
    def get_pdis_nedrex_dataset(url, licenced, source):
        version = get_today_version()
        try:
            version = get_nedrex_source_version(source)
        except RetryError:
            pass
    
        dataset, _ = models.PDisDataset.objects.get_or_create(
            name=source,
            link=url,
            version=version,
            licenced=licenced
        )
        return dataset
    
    
    def get_drdi_nedrex_dataset(url, licenced, source):
        version = get_today_version()
        try:
            version = get_nedrex_source_version(source)
        except RetryError:
            pass
    
        dataset, _ = models.DrDiDataset.objects.get_or_create(
            name=source,
            link=url,
            version=version,
            licenced=licenced
        )
        return dataset
    
    
    def is_licenced_ppi_source(source):
        version = get_today_version()
        try:
            version = get_nedrex_source_version(source)
        except RetryError:
            pass
    
        try:
            models.PPIDataset.objects.get(name=source, version=version, licenced=False).link
        except:
            return True
        return False
    
    
    def is_licenced_pdi_source(source):
        version = get_today_version()
        try:
            version = get_nedrex_source_version(source)
        except RetryError:
            pass
    
        try:
            models.PDIDataset.objects.get(name=source, version=version, licenced=False).link
        except:
            return True
        return False
    
    
    def is_licenced_pdis_source(source):
        version = get_today_version()
        try:
            version = get_nedrex_source_version(source)
        except RetryError:
            pass
    
        try:
            models.PDisDataset.objects.get(name=source, version=version, licenced=False).link
        except:
            return True
        return False
    
    
    def is_licenced_drdi_source(source):
        version = get_today_version()
        try:
            version = get_nedrex_source_version(source)
        except RetryError:
            pass
    
        try:
            models.DrDiDataset.objects.get(name=source, version=version, licenced=False).link
        except:
            return True
        return False
    
    
    def remove_old_pdi_data(new_datasets, licenced):
        if new_datasets is None:
            return
        for dataset in new_datasets:
            print("Deleting all except "+str(dataset))
            try:
                for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced):
                    print("Testing: "+str(d))
                    if d != dataset:
                        print("Deleting: "+str(d))
                        d.delete()
            except Exception as e:
                print("Error when trying to delete old datasets")
                print(e)
                continue
    
    
    def remove_old_ppi_data(new_datasets, licenced):
        if new_datasets is None:
            return
        for dataset in new_datasets:
            print("Deleting all except " + str(dataset))
            try:
                for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced):
                    print("Testing: " + str(d))
                    if d != dataset:
                        print("Deleting: " + str(d))
                        d.delete()
            except Exception as e:
                print("Error when trying to delete old datasets")
                print(e)
                continue
    
    
    def remove_old_pdis_data(new_datasets, licenced):
        if new_datasets is None:
            return
        for dataset in new_datasets:
            print("Deleting all except " + str(dataset))
            try:
                for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced):
                    print("Testing: " + str(d))
                    if d != dataset:
                        print("Deleting: " + str(d))
                        d.delete()
            except Exception as e:
                print("Error when trying to delete old datasets")
                print(e)
                continue
    
    
    def remove_old_drdi_data(new_datasets, licenced):
        if new_datasets is None:
            return
        for dataset in new_datasets:
            print("Deleting all except " + str(dataset))
            try:
                for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced):
                    print("Testing: " + str(d))
                    if d != dataset:
                        print("Deleting: " + str(d))
                        d.delete()
            except Exception as e:
                print("Error when trying to delete old datasets")
                print(e)
                continue