Select Git revision
Gallenkamp, Fabian authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
app.py 50.83 KiB
import os,sys
import os.path as op
import zipfile
import io
import pathlib
from flask import Flask, flash, send_file, url_for
from flask_sqlalchemy import SQLAlchemy
from jinja2 import Template
from markupsafe import Markup
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.sql import expression, functions
from werkzeug.utils import redirect
from wtforms import validators
import flask_admin as admin
from flask_admin.base import MenuLink
from flask_admin.contrib import sqla
from flask_admin.contrib.sqla import filters
from flask_admin.contrib.sqla.form import InlineModelConverter
from flask_admin.contrib.sqla.fields import InlineModelFormList
from flask_admin.contrib.sqla.filters import BaseSQLAFilter, FilterEqual
from flask_admin.actions import action
# Create application
app = Flask(__name__)
# set optional bootswatch theme
# see http://bootswatch.com/3/ for available swatches
app.config['FLASK_ADMIN_SWATCH'] = 'cerulean'
# Create dummy secrey key so we can use sessions
app.config['SECRET_KEY'] = '123456790'
# Create in-memory database
app.config['DATABASE_FILE'] = 'sample_db.sqlite'
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///' + app.config['DATABASE_FILE']
app.config['SQLALCHEMY_ECHO'] = True
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
db = SQLAlchemy(app)
# Create M2M table
software_features_table = db.Table('software_features', db.Model.metadata,
db.Column('software_id', db.Integer, db.ForeignKey('software.id')),
db.Column('feature_id', db.Integer, db.ForeignKey('feature.id')))
# Create N2one table
software_languages_table = db.Table('software_languages', db.Model.metadata,
db.Column('software_id', db.Integer, db.ForeignKey('software.id')),
db.Column('language_id', db.Integer, db.ForeignKey('language.id')))
# Create N2one table
software_programminglanguages_table = db.Table('software_programminglanguages', db.Model.metadata,
db.Column('software_id', db.Integer, db.ForeignKey('software.id')),
db.Column('programming_id', db.Integer, db.ForeignKey('programminglanguage.id')))
class License(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(100))
version = db.Column(db.String(100))
def __str__(self):
return "{}".format(self.name)if not self.version else "{}-{}".format(self.name, self.version)
def __repr__(self):
return "{}: {}".format(self.id, self.__str__())
class Feature(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Unicode(64))
def __str__(self):
return "{}".format(self.name)
class Language(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Unicode(64))
def __str__(self):
return "{}".format(self.name)
class Programminglanguage(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Unicode(64))
version = db.Column(db.String(100))
def __str__(self):
return "{}".format(self.name)
class SoftwareCategory(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(120))
short_description = db.Column(db.Text)
def __str__(self):
return "{}".format(self.name)
class Software(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(120))
short_description = db.Column(db.String(120))
developer = db.Column(db.String(120))
maintainer = db.Column(db.String(120))
lastchanged = db.Column(db.Date)
price = db.Column(db.Integer())
modelprice = type = db.Column(db.Enum('yearly', 'once', name='modelprice'))
license_id = db.Column(db.Integer(), db.ForeignKey(License.id))
license = db.relationship(License, backref='softwares')
softwarecategory_id = db.Column(db.Integer(), db.ForeignKey(SoftwareCategory.id))
softwarecategory = db.relationship(SoftwareCategory, backref='software')
features = db.relationship('Feature', secondary=software_features_table)
languages = db.relationship('Language', secondary=software_languages_table)
programminglanguages = db.relationship('Programminglanguage', secondary=software_programminglanguages_table)
architecture = db.Column(db.Enum('standalone', 'package', 'framework', 'app', 'SaaS', 'other', name='software_types'))
def __str__(self):
return "{}".format(self.name)
class Link(db.Model):
id = db.Column(db.Integer, primary_key=True)
type = db.Column(db.Enum('repository', 'website', 'developer', 'tutorial', 'wiki', 'faq', 'other', name='link_types'))
url = db.Column(db.String(120))
software_id = db.Column(db.Integer(), db.ForeignKey(Software.id))
software = db.relationship(Software, backref='links')
comment = db.Column(db.String(120))
def __str__(self):
return "<a href='#'>{}</a>:{}".format(self.type,self.url)
# Flask views
@app.route('/')
def index():
return redirect(url_for("admin.index"))
# Custom filter class
class FilterLastNameBrown(BaseSQLAFilter):
def apply(self, query, value, alias=None):
if value == '1':
return query.filter(self.column == "Brown")
else:
return query.filter(self.column != "Brown")
def operation(self):
return 'is Brown'
'''
# Customized User model admin
inline_form_options = {
'form_label': "Info item",
'form_columns': ['id', 'key', 'value'],
'form_args': None,
'form_extra_fields': None,
}'''
class AdvancedSoftwareView(sqla.ModelView):
column_sortable_list = ('name', ('license', ("license.name", "license.version")) , ('softwarecategory', 'softwarecategory.name'), 'lastchanged', )
column_list = ('name', 'license', 'softwarecategory', 'links', 'architecture','programminglanguages', )
inline_models = (Link,)
column_hide_backrefs = False
page_size = 100
def _links_formatter(view, context, model, name):
form_links = []
for link in model.links:
form_link = "<a href='{}' target='_blank'>{}</a>".format(link.url,link.type)
if link.comment:
form_link = "<a href='{}' target='_blank'>{}-{}</a>".format(link.url,link.type,link.comment)
form_links.append(form_link)
return Markup(','.join(form_links))
column_formatters = {
'links': _links_formatter
}
@action('advancedexport', 'AdvancedExport')
def action_advancedexport(self, ids):
try:
# Generate sub pages
with open('templates/export/software.jinja2', "r", encoding="utf-8") as file_:
template = Template(file_.read())
softwares = Software.query.filter(Software.id.in_(ids))
for software_tool in softwares:
template.stream(name=software_tool.name).dump('../digitale-Methoden-wiki/Tool_' + software_tool.name.replace(' ','') + '.asciidoc', encoding='utf-8')
softwareincategory = []
software_categorys = SoftwareCategory.query.all()
for software_category in software_categorys:
softwares = Software.query.filter(Software.softwarecategory_id == software_category.id)
softwareincategory.append((software_category,softwares))
# Generate overview page
with open('templates/export/softwares.jinja2', "r", encoding="utf-8") as file_:
template = Template(file_.read())
template.stream(softwareincategory=softwareincategory).dump('../digitale-Methoden-wiki/Softwareliste.asciidoc', encoding='utf-8')
base_path = pathlib.Path('../digitale-Methoden-wiki/')
data = io.BytesIO()
with zipfile.ZipFile(data, mode='w') as z:
for f_name in base_path.iterdir():
z.write(f_name)
data.seek(0)
return send_file(
data,
mimetype='application/zip',
as_attachment=True,
attachment_filename='data.zip'
)
flash("Done")
except Exception as ex:
if not self.handle_view_exception(ex):
raise
flash("Not done")
'''
class UserAdmin(sqla.ModelView):
action_disallowed_list = ['delete', ]
column_display_pk = True
column_list = [
'id',
'last_name',
'first_name',
'email',
'pets',
]
column_default_sort = [('last_name', False), ('first_name', False)] # sort on multiple columns
# custom filter: each filter in the list is a filter operation (equals, not equals, etc)
# filters with the same name will appear as operations under the same filter
column_filters = [
FilterEqual(column=User.last_name, name='Last Name'),
FilterLastNameBrown(column=User.last_name, name='Last Name',
options=(('1', 'Yes'), ('0', 'No')))
]
inline_models = [(UserInfo, inline_form_options), ]
# setup create & edit forms so that only 'available' pets can be selected
def create_form(self):
return self._use_filtered_parent(
super(UserAdmin, self).create_form()
)
def edit_form(self, obj):
return self._use_filtered_parent(
super(UserAdmin, self).edit_form(obj)
)
def _use_filtered_parent(self, form):
form.pets.query_factory = self._get_parent_list
return form
def _get_parent_list(self):
# only show available pets in the form
return Pet.query.filter_by(available=True).all()
'''
'''
# Customized Post model admin
class PostAdmin(sqla.ModelView):
column_list = ['id', 'user', 'title', 'date', 'tags']
column_default_sort = ('date', True)
column_sortable_list = [
'id',
'title',
'date',
('user', ('user.last_name', 'user.first_name')), # sort on multiple columns
]
column_labels = dict(title='Post Title') # Rename 'title' column in list view
column_searchable_list = [
'title',
'tags.name',
'user.first_name',
'user.last_name',
]
column_labels = {
'title': 'Title',
'tags.name': 'tags',
'user.first_name': 'user\'s first name',
'user.last_name': 'last name',
}
column_filters = [
'user',
'title',
'date',
'tags',
filters.FilterLike(Post.title, 'Fixed Title', options=(('test1', 'Test 1'), ('test2', 'Test 2'))),
]
can_export = True
export_max_rows = 1000
export_types = ['csv', 'xls']
# Pass arguments to WTForms. In this case, change label for text field to
# be 'Big Text' and add required() validator.
form_args = dict(
text=dict(label='Big Text', validators=[validators.data_required()])
)
form_ajax_refs = {
'user': {
'fields': (User.first_name, User.last_name)
},
'tags': {
'fields': (Tag.name,),
'minimum_input_length': 0, # show suggestions, even before any user input
'placeholder': 'Please select',
'page_size': 5,
},
}
def __init__(self, session):
# Just call parent class with predefined model.
super(PostAdmin, self).__init__(Post, session)
'''
class TreeView(sqla.ModelView):
form_excluded_columns = ['children', ]
class ScreenView(sqla.ModelView):
column_list = ['id', 'width', 'height',
'number_of_pixels'] # not that 'number_of_pixels' is a hybrid property, not a field
column_sortable_list = ['id', 'width', 'height', 'number_of_pixels']
# Flask-admin can automatically detect the relevant filters for hybrid properties.
column_filters = ('number_of_pixels',)
# Create admin
admin = admin.Admin(app, name='Softwaresammlung: Digitale Methoden', template_mode='bootstrap3')
# Add views
admin.add_view(AdvancedSoftwareView(Software, db.session))
admin.add_view(sqla.ModelView(Feature, db.session))
admin.add_view(sqla.ModelView(License, db.session, category="Other"))
admin.add_view(sqla.ModelView(Link, db.session, category="Other"))
admin.add_view(sqla.ModelView(SoftwareCategory, db.session, category="Other"))
admin.add_sub_category(name="Andere Sammlungen", parent_name="Other")
admin.add_link(MenuLink(name="CRAN-R", url='https://cran.r-project.org/web/views/', category='Andere Sammlungen', target="_blank"))
admin.add_link(MenuLink(name="ROpenSci", url='https://ropensci.org/packages/', category='Andere Sammlungen', target="_blank"))
def build_sample_db():
"""
Populate a small db with some example entries.
"""
import random
import datetime
db.drop_all()
db.create_all()
lic_unknown = License(name="Unbekannt")
lic_bsd = License(name="BSD")
lic_gpl2 = License(name="GPL", version="2.0")
lic_gpl3 = License(name="GPL", version="3.0")
lic_agpl3 = License(name="AGPL", version="3.0")
lic_lgpl = License(name="LGPL")
lic_apache2 = License(name="Apache", version="2.0")
lic_mit = License(name="MIT")
lic_byncnd3 = License(name="CC BY-NC-ND", version="3.0")
lic_ccdl = License(name="CCDL", version="1.0")
lic_prop = License(name="Proprietär")
db.session.add(lic_gpl3)
db.session.add(lic_gpl3)
db.session.add(lic_agpl3)
db.session.add(lic_apache2)
db.session.add(lic_mit)
db.session.add(lic_prop)
prol_r = Programminglanguage(name="R")
prol_py = Programminglanguage(name="Python")
prol_cy = Programminglanguage(name="Cython")
prol_java = Programminglanguage(name="Java")
prol_objc = Programminglanguage(name="Objective-C")
prol_jupyternb = Programminglanguage(name="Jupyter Notebook")
prol_js = Programminglanguage(name="Javascript")
prol_c = Programminglanguage(name="C")
db.session.add(prol_r)
db.session.add(prol_py)
db.session.add(prol_java)
db.session.add(prol_objc)
db.session.add(prol_jupyternb)
db.session.add(prol_js)
db.session.add(prol_c)
cat_tracking = SoftwareCategory(name="datenschutzkonformes Tracking", short_description="Sammlung von Sensordaten/Logdaten oder Nutzungsdaten mit expliziter Einverständnis mittels Software auf dem Gerät.")
cat_scraping = SoftwareCategory(name="Scraping", short_description="Tools im Zusammenhang mit Web-Scraping.")
cat_int = SoftwareCategory(name="Forschungsplattformen", short_description="Integrierte Forschungsumgebungen/Plattformen für sozialwissenschaftliche Forschung.")
cat_qda = SoftwareCategory(name="QDA-Software", short_description="Computer-gestützte Analyse qualitativer Daten.")
cat_tm = SoftwareCategory(name="Automatisierte Inhaltsanalyse/Text Mining", short_description="")
cat_senti = SoftwareCategory(name="Sentiment Analysis", short_description="")
cat_topic = SoftwareCategory(name="Topic-Modellierung", short_description="")
cat_visu = SoftwareCategory(name="Visualisierung", short_description="")
cat_kollab_anno = SoftwareCategory(name="Kollaboratives Annotieren", short_description="")
cat_kollab_write = SoftwareCategory(name="Kollaboratives Schreiben", short_description="")
cat_stat = SoftwareCategory(name="Statisik-Programme", short_description="Zur statistischen Modellierung einsetzbare Software.")
cat_repo = SoftwareCategory(name="Forschungsdatenspeicherung", short_description="")
cat_now = SoftwareCategory(name="Nowcasting", short_description="")
cat_net = SoftwareCategory(name="Netzwerkanalysen", short_description="social network analysis")
cat_esmema = SoftwareCategory(name="ESM/EMA-Studien", short_description="Datenerhebung in 'natürlicher' Umgebung.")
cat_transkript = SoftwareCategory(name="Audio-Transkription", short_description="Transkriptionssoftware")
cat_search = SoftwareCategory(name="Audio-Transkribtion", short_description="Transkriptionssoftware")
cat_misc = SoftwareCategory(name="Weiteres", short_description="Zu speziell zum Einordnen..")
db.session.add(cat_tracking)
db.session.add(cat_scraping)
db.session.add(cat_int)
aware = Software(name="AWARE",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_tracking,
architecture="framework",
license=lic_apache2)
db.session.add(aware)
db.session.add(Link(software=aware, type="website", url="http://www.awareframework.com/", comment=""))
db.session.add(Link(software=aware, type="repository", url="https://github.com/denzilferreira/aware-client", comment="android"))
db.session.add(Link(software=aware, type="repository", url="https://github.com/tetujin/aware-client-ios", comment="iOS"))
db.session.add(Link(software=aware, type="repository", url="https://github.com/tetujin/aware-client-osx", comment="OSX"))
db.session.add(Link(software=aware, type="repository", url="https://github.com/denzilferreira/aware-server", comment="server"))
meili = Software(name="MEILI",
short_description="",
developer="Adrian C. Prelipcean",
maintainer="Adrian C. Prelipcean",
softwarecategory=cat_tracking,
architecture="framework",
license=lic_gpl3)
db.session.add(meili)
db.session.add(Link(software=meili, type="repository", url="https://github.com/Badger-MEILI",
comment="group"))
passivedatakit = Software(name="Passive Data Kit",
short_description="",
developer="Chris Karr",
maintainer="Chris Karr",
softwarecategory=cat_tracking,
architecture="framework",
license=lic_apache2,
programminglanguages=[prol_py,prol_java])
db.session.add(passivedatakit)
db.session.add(Link(software=passivedatakit, type="website", url="https://passivedatakit.org/", comment=""))
db.session.add(Link(software=passivedatakit, type="repository", url="https://github.com/audaciouscode/PassiveDataKit-Django", comment="djangoserver"))
db.session.add(Link(software=passivedatakit, type="repository", url="https://github.com/audaciouscode/PassiveDataKit-Android", comment="android"))
db.session.add(Link(software=passivedatakit, type="repository", url="https://github.com/audaciouscode/PassiveDataKit-iOS", comment="iOS"))
rselenium = Software(name="RSelenium",
developer="John Harrison",
maintainer="Ju Yeong Kim",
softwarecategory=cat_scraping,
architecture="package",
license=lic_agpl3)
db.session.add(rselenium)
db.session.add(Link(software=rselenium, type="repository", url="https://github.com/ropensci/RSelenium", comment=""))
amcat = Software(name="AmCAT",
short_description="The Amsterdam Content Analysis Toolkit (AmCAT) is an open source infrastructure that makes it easy to do large-scale automatic and manual content analysis (text analysis) for the social sciences and humanities.",
developer="Chris Karr",
maintainer="Ju Yeong Kim",
softwarecategory=cat_int,
architecture="SaaS",
license=lic_agpl3)
db.session.add(amcat)
db.session.add(Link(software=amcat, type="website", url="http://vanatteveldt.com/amcat/", comment="entwickler"))
db.session.add(Link(software=amcat, type="repository", url="https://github.com/amcat/amcat", comment=""))
db.session.add(Link(software=amcat, type="wiki", url="http://wiki.amcat.nl/3.4:AmCAT_Navigator_3", comment=""))
cosmos = Software(name="COSMOS",
short_description="COSMOS Open Data Analytics software",
developer="",
maintainer="",
softwarecategory=cat_int,
architecture="standalone",
license=lic_prop)
db.session.add(cosmos)
db.session.add(Link(software=cosmos, type="website", url="http://socialdatalab.net/COSMOS", comment=""))
lcm = Software(name="LCM",
short_description="Leipzig Corpus Miner a decentralized SaaS application for the analysis of very large amounts of news texts ",
developer="Gregor Wiedeman, Andreas Niekler",
maintainer="",
softwarecategory=cat_int,
architecture="framework",
license=lic_lgpl)
db.session.add(lcm)
db.session.add(Link(software=lcm, type="website", url="http://lcm.informatik.uni-leipzig.de/generic.html", comment=""))
ilcm = Software(name="iLCM",
short_description="The iLCM(LCM=Leipzig Corpus Miner) project pursues the development of an integrated research environment for the analysis of structured and unstructured data in a ‘Software as a Service’ architecture (SaaS). The research environment addresses requirements for the quantitative evaluation of large amounts of qualitative data using text mining methods and requirements for the reproducibility of data-driven research designs in the social sciences.",
developer="Gregor Wiedeman, Andreas Niekler",
maintainer="",
architecture="SaaS",
softwarecategory=cat_int,
license=lic_lgpl)
db.session.add(ilcm)
db.session.add(Link(software=ilcm, type="website", url="https://ilcm.informatik.uni-leipzig.de/", comment=""))
atlasti = Software(name="ATLAS.ti",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_qda,
architecture="standalone",
license=lic_prop)
db.session.add(atlasti)
db.session.add(Link(software=atlasti, type="website", url="https://atlasti.com/de/produkt/what-is-atlas-ti/", comment=""))
leximancer = Software(name="Leximancer",
short_description="Leximancer automatically analyses your text documents to identify the high level concepts in your text documents, delivering the key ideas and actionable insights you need with powerful interactive visualisations and data exports.",
developer="",
maintainer="",
softwarecategory=cat_qda,
architecture="standalone",
license=lic_prop)
db.session.add(leximancer)
db.session.add(Link(software=leximancer, type="website", url="https://info.leximancer.com/", comment=""))
tool = Software(name="MAXQDA",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_qda,
architecture="standalone",
license=lic_prop)
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://www.rrz.uni-hamburg.de/services/software/alphabetisch/maxqda.html", comment=""))
tool = Software(name="NVivo",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_qda,
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://www.qsrinternational.com/nvivo/who-uses-nvivo/academics", comment=""))
tool = Software(name="QDAMiner",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_qda,
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://provalisresearch.com/products/qualitative-data-analysis-software/", comment=""))
tool = Software(name="ORA Pro",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_qda,
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://netanomics.com/", comment=""))
tool = Software(name="Quirkos",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_qda,
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://www.quirkos.com/", comment=""))
db.session.add(Link(software=tool, type="repository", url="", comment=""))
tool = Software(name="RQDA",
short_description="It includes a number of standard Computer-Aided Qualitative Data Analysis features. In addition it seamlessly integrates with R, which means that a) statistical analysis on the coding is possible, and b) functions for data manipulation and analysis can be easily extended by writing R functions. To some extent, RQDA and R make an integrated platform for both quantitative and qualitative data analysis.",
developer="Ronggui Huang",
maintainer="Ronggui Huang",
softwarecategory=cat_qda,
license=lic_bsd,
programminglanguages=[prol_r])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://rqda.r-forge.r-project.org/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/Ronggui/RQDA", comment=""))
tool = Software(name="TAMS",
short_description="Text Analysis Markup System (TAMS) is both a system of marking documents for qualitative analysis and a series of tools for mining information based on that syntax.",
developer="",
maintainer="",
softwarecategory=cat_qda,
license=lic_gpl2,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://sourceforge.net/projects/tamsys", comment=""))
tool = Software(name="Apache OpenNLP",
short_description="OpenNLP supports the most common NLP tasks, such as tokenization, sentence segmentation, part-of-speech tagging, named entity extraction, chunking, parsing, language detection and coreference resolution.",
developer="",
maintainer="",
softwarecategory=cat_tm,
license=lic_apache2,
architecture="package",
programminglanguages=[prol_java])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://opennlp.apache.org/", comment=""))
db.session.add(Link(software=tool, type="repository", url="", comment=""))
tool = Software(name="GATE",
short_description="GATE - General Architecture for Text Engineering",
developer="",
maintainer="",
softwarecategory=cat_tm,
architecture="package",
license=lic_lgpl,
programminglanguages=[prol_java])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://gate.ac.uk/overview.html", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/GateNLP/gate-core", comment=""))
tool = Software(name="NLTK",
short_description="NLTK is a leading platform for building Python programs to work with human language data. It provides easy-to-use interfaces to over 50 corpora and lexical resources such as WordNet, along with a suite of text processing libraries for classification, tokenization, stemming, tagging, parsing, and semantic reasoning, wrappers for industrial-strength NLP libraries, and an active discussion forum.",
developer="",
maintainer="",
softwarecategory=cat_tm,
architecture="package",
license=lic_apache2,
programminglanguages=[prol_py])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://www.nltk.org/index.html", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/nltk/nltk", comment=""))
tool = Software(name="Gensim",
short_description="Gensim is a Python library for topic modelling, document indexing and similarity retrieval with large corpora. Target audience is the natural language processing (NLP) and information retrieval (IR) community.",
developer="",
maintainer="",
softwarecategory=cat_tm,
architecture="package",
license=lic_lgpl,
programminglanguages=[prol_py])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://pypi.org/project/gensim/", comment=""))
db.session.add(Link(software=tool, type="repository", url="", comment=""))
tool = Software(name="Pandas",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_tm,
architecture="package",
license=lic_bsd,
programminglanguages=[prol_py])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://pandas.pydata.org/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/pandas-dev/pandas", comment=""))
tool = Software(name="spaCy",
short_description=" spaCy excels at large-scale information extraction tasks. It's written from the ground up in carefully memory-managed Cython. Independent research has confirmed that spaCy is the fastest in the world. If your application needs to process entire web dumps, spaCy is the library you want to be using.",
developer="",
maintainer="",
softwarecategory=cat_tm,
architecture="package",
license=lic_mit,
programminglanguages=[prol_cy])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://spacy.io/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/explosion/spaCy", comment=""))
tool = Software(name="RapidMiner",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_tm,
architecture="framework",
license=lic_agpl3,
programminglanguages=[prol_java])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://rapidminer.com/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/rapidminer/rapidminer-studio", comment=""))
tool = Software(name="tm",
short_description="",
developer="Ingo Feinerer, Kurt Hornik",
maintainer="Ingo Feinerer, Kurt Hornik",
softwarecategory=cat_tm,
architecture="package",
license=lic_gpl3,
programminglanguages=[prol_r])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://tm.r-forge.r-project.org/", comment=""))
db.session.add(Link(software=tool, type="website", url="https://cran.r-project.org/package=tm", comment="cran"))
db.session.add(Link(software=tool, type="repository", url="", comment=""))
tool = Software(name="Stanford CoreNLP",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_tm,
architecture="framework",
license=lic_gpl3,
programminglanguages=[prol_java])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://stanfordnlp.github.io/CoreNLP/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/stanfordnlp/CoreNLP", comment=""))
tool = Software(name="xtas",
short_description="the eXtensible Text Analysis Suite(xtas) is a collection of natural language processing and text mining tools, brought together in a single software package with built-in distributed computing and support for the Elasticsearch document store.",
developer="",
maintainer="",
softwarecategory=cat_tm,
architecture="framework",
license=lic_apache2,
programminglanguages=[prol_py])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://nlesc.github.io/xtas/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/NLeSC/xtas", comment=""))
tool = Software(name="MALLET",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_topic,
architecture="package",
license=lic_apache2,
programminglanguages=[prol_java])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://mallet.cs.umass.edu/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/mimno/Mallet", comment=""))
tool = Software(name="TOME",
short_description="TOME is a tool to support the interactive exploration and visualization of text-based archives, supported by a Digital Humanities Startup Grant from the National Endowment for the Humanities (Lauren Klein and Jacob Eisenstein, co-PIs). Drawing upon the technique of topic modeling—a machine learning method for identifying the set of topics, or themes, in a document set—our tool allows humanities scholars to trace the evolution and circulation of these themes across networks and over time.",
developer="",
maintainer="",
softwarecategory=cat_topic,
architecture="package",
license=lic_unknown,
programminglanguages=[prol_py, prol_jupyternb])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://dhlab.lmc.gatech.edu/tome/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/GeorgiaTechDHLab/TOME/", comment=""))
tool = Software(name="Stm",
short_description="The Structural Topic Model (STM) allows researchers to estimate topic models with document-level covariates. The package also includes tools for model selection, visualization, and estimation of topic-covariate regressions. Methods developed in Roberts et al (2014) <doi:10.1111/ajps.12103> and Roberts et al (2016) <doi:10.1080/01621459.2016.1141684>.",
developer="",
maintainer="",
softwarecategory=cat_topic,
architecture="package",
license=lic_mit,
programminglanguages=[prol_r])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://structuraltopicmodel.com", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/bstewart/stm", comment=""))
tool = Software(name="lexicoder",
short_description="Lexicoder performs simple deductive content analyses of any kind of text, in almost any language. All that is required is the text itself, and a dictionary. Our own work initially focused on the analysis of newspaper stories during election campaigns, and both television and newspaper stories about public policy issues. The software can deal with almost any text, however, and lots of it. Our own databases typically include up to 100,000 news stories. Lexicoder processes these data, even with a relatively complicated coding dictionary, in about fifteen minutes. The software has, we hope, a wide range of applications in the social sciences. It is not the only software that conducts content analysis, of course - there are many packages out there, some of which are much more sophisticated than this one. The advantage to Lexicoder, however, is that it can run on any computer with a recent version of Java (PC or Mac), it is very simple to use, it can deal with huge bodies of data, it can be called from R as well as from the Command Line, and its free.",
developer="",
maintainer="",
softwarecategory=cat_senti,
architecture="package",
license=lic_prop,
programminglanguages=[prol_java])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://www.lexicoder.com/index.html", comment=""))
tool = Software(name="OpinionFinder",
short_description="OpinionFinder is a system that processes documents and automatically identifies subjective sentences as well as various aspects of subjectivity within sentences, including agents who are sources of opinion, direct subjective expressions and speech events, and sentiment expressions. OpinionFinder was developed by researchers at the University of Pittsburgh, Cornell University, and the University of Utah. In addition to OpinionFinder, we are also releasing the automatic annotations produced by running OpinionFinder on a subset of the Penn Treebank.",
developer="",
maintainer="",
softwarecategory=cat_senti,
architecture="package",
license=lic_unknown,
programminglanguages=[prol_java])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://mpqa.cs.pitt.edu/opinionfinder/", comment=""))
db.session.add(Link(software=tool, type="repository", url="", comment=""))
tool = Software(name="Readme",
short_description="The ReadMe software package for R takes as input a set of text documents (such as speeches, blog posts, newspaper articles, judicial opinions, movie reviews, etc.), a categorization scheme chosen by the user (e.g., ordered positive to negative sentiment ratings, unordered policy topics, or any other mutually exclusive and exhaustive set of categories), and a small subset of text documents hand classified into the given categories. ",
developer="",
maintainer="",
softwarecategory=cat_senti,
architecture="package",
license=lic_byncnd3,
programminglanguages=[prol_r])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://gking.harvard.edu/readme", comment=""))
tool = Software(name="Gephi",
short_description="Gephi is an award-winning open-source platform for visualizing and manipulating large graphs.",
developer="",
maintainer="",
softwarecategory=cat_visu,
architecture="package",
license=lic_gpl3,
programminglanguages=[prol_java])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/gephi/gephi/", comment=""))
tool = Software(name="WebAnno",
short_description="WebAnno is a multi-user tool supporting different roles such as annotator, curator, and project manager. The progress and quality of annotation projects can be monitored and measuered in terms of inter-annotator agreement. Multiple annotation projects can be conducted in parallel.",
developer="",
maintainer="",
softwarecategory=cat_kollab_anno,
architecture="package",
license=lic_apache2,
programminglanguages=[prol_py])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://webanno.github.io/webanno/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/webanno/webanno", comment=""))
tool = Software(name="FidusWriter",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_kollab_write,
architecture="package",
license=lic_agpl3,
programminglanguages=[prol_py,prol_js])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://fiduswriter.org", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/fiduswriter/fiduswriter", comment=""))
tool = Software(name="dataverse",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_repo,
architecture="framework",
license=lic_apache2,
programminglanguages=[prol_java])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://dataverse.org/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://github.com/IQSS/dataverse", comment=""))
tool = Software(name="gretl",
short_description="Is a cross-platform software package for econometric analysis",
developer="",
maintainer="",
softwarecategory=cat_stat,
architecture="package",
license=lic_gpl3,
programminglanguages=[prol_c])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://gretl.sourceforge.net/", comment=""))
db.session.add(Link(software=tool, type="repository", url="https://sourceforge.net/p/gretl/git/ci/master/tree/", comment=""))
tool = Software(name="MLwiN",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_stat,
architecture="package",
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://www.bristol.ac.uk/cmm/software/mlwin/", comment=""))
db.session.add(Link(software=tool, type="repository", url="", comment=""))
tool = Software(name="SPSS",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_stat,
architecture="package",
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
tool = Software(name="STATA",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_stat,
architecture="package",
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
tool = Software(name="Nowcasting",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_now,
architecture="package",
license=lic_gpl3,
programminglanguages=[prol_r])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://cran.r-project.org/package=nowcasting", comment="cran"))
db.session.add(Link(software=tool, type="repository", url="https://github.com/nmecsys/nowcasting", comment=""))
tool = Software(name="AutoMap",
short_description="AutoMap enables the extraction of information from texts using Network Text Analysis methods. AutoMap supports the extraction of several types of data from unstructured documents. The type of information that can be extracted includes: content analytic data (words and frequencies), semantic network data (the network of concepts), meta-network data (the cross classification of concepts into their ontological category such as people, places and things and the connections among these classified concepts), and sentiment data (attitudes, beliefs). Extraction of each type of data assumes the previously listed type of data has been extracted.",
developer="",
maintainer="",
softwarecategory=cat_net,
architecture="package",
license=lic_prop,
programminglanguages=[prol_java])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://www.casos.cs.cmu.edu/projects/automap/software.php", comment=""))
tool = Software(name="NodeXL",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_net,
architecture="package",
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://www.smrfoundation.org/nodexl/", comment=""))
tool = Software(name="ORA Pro",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_net,
architecture="package",
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="", comment=""))
db.session.add(Link(software=tool, type="repository", url="", comment=""))
tool = Software(name="Pajek",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_net,
architecture="package",
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://mrvar.fdv.uni-lj.si/pajek/", comment=""))
tool = Software(name="NetworkX",
short_description="Data structures for graphs, digraphs, and multigraphs Many standard graph algorithms Network structure and analysis measures Generators for classic graphs, random graphs, and synthetic networks Nodes can be 'anything' (e.g., text, images, XML records) Edges can hold arbitrary data (e.g., weights, time-series) Open source 3-clause BSD license Well tested with over 90% code coverage Additional benefits from Python include fast prototyping, easy to teach, and multi-platform",
developer="",
maintainer="",
softwarecategory=cat_net,
architecture="package",
license=lic_bsd,
programminglanguages=[prol_py])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="", comment=""))
db.session.add(Link(software=tool, type="repository", url="", comment=""))
tool = Software(name="UCINET",
short_description="UCINET 6 for Windows is a software package for the analysis of social network data. It was developed by Lin Freeman, Martin Everett and Steve Borgatti. It comes with the NetDraw network visualization tool.",
developer="",
maintainer="",
softwarecategory=cat_net,
architecture="package",
license=lic_prop,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="https://sites.google.com/site/ucinetsoftware/home", comment=""))
tool = Software(name="LuceneSolr",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_search,
architecture="package",
license=lic_apache2,
programminglanguages=[])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="http://lucene.apache.org/solr/", comment=""))
db.session.add(Link(software=tool, type="repository", url="", comment=""))
paco = Software(name="paco",
short_description="",
developer="Bob Evans",
maintainer="Bob Evans",
softwarecategory=cat_esmema,
architecture="framework",
license=lic_apache2,
programminglanguages=[prol_objc, prol_java])
db.session.add(paco)
db.session.add(Link(software=paco, type="website", url="https://www.pacoapp.com/", comment=""))
db.session.add(Link(software=paco, type="repository", url="https://github.com/google/paco", comment=""))
f4analyse = Software(name="f4analyse",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_transkript,
architecture="standalone",
license=lic_prop)
db.session.add(f4analyse)
db.session.add(Link(software=f4analyse, type="website", url="https://www.audiotranskription.de/f4-analyse", comment=""))
'''
tool = Software(name="",
short_description="",
developer="",
maintainer="",
softwarecategory=cat_,
architecture="package",
license=lic_,
programminglanguages=[prol_])
db.session.add(tool)
db.session.add(Link(software=tool, type="website", url="", comment=""))
db.session.add(Link(software=tool, type="repository", url="", comment=""))
'''
db.session.commit()
return
if __name__ == '__main__':
# Build a sample db on the fly, if one does not exist yet.
app_dir = op.realpath(os.path.dirname(__file__))
database_path = op.join(app_dir, app.config['DATABASE_FILE'])
if not os.path.exists(database_path):
build_sample_db()
# Start app
app.run(debug=True)