diff --git a/include/distance.h b/include/distance.h
new file mode 100644
index 0000000000000000000000000000000000000000..e7f63c37262fc1c87119de0934fac771ce91f04c
--- /dev/null
+++ b/include/distance.h
@@ -0,0 +1,15 @@
+#ifndef CDIM_H
+#define CDIM_H
+
+#include <math.h>
+#include <stdlib.h>
+
+/** Hellinger distance for stochastic vectors.
+ */
+int
+hellinger (const double *pva,
+ const double *pvb,
+ const size_t len,
+ double *dist);
+
+#endif /* CDIM_H */
diff --git a/setup.py b/setup.py
index 122ae772409a93195ce186106492e5ca5cae2fb7..fcc58b63f8d819fcf323745efb4d856a85d299c0 100644
--- a/setup.py
+++ b/setup.py
@@ -11,6 +11,10 @@ ext_features = Extension('_features',
'src/apollon/signal/_features_module.c'],
include_dirs = ['include', np.get_include()])
+ext_som_dist = Extension('_distance',
+ sources = ['src/apollon/som/distance.c',
+ 'src/apollon/som/_distance_module.c'],
+ include_dirs = ['include', np.get_include()])
-setup(ext_modules = [ext_features])
+setup(ext_modules = [ext_features, ext_som_dist])
diff --git a/src/apollon/som/_distance_module.c b/src/apollon/som/_distance_module.c
new file mode 100644
index 0000000000000000000000000000000000000000..fa4fce02d9bb3d65f62d7b1059342ad214b5568a
--- /dev/null
+++ b/src/apollon/som/_distance_module.c
@@ -0,0 +1,164 @@
+#define NPY_NO_DEPRECATED_API NPY_1_8_API_VERSION
+#define PY_ARRAY_UNIQUE_SYMBOL comsar_NP_ARRAY_API
+#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__)
+#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
+#pragma GCC optimize("tree-vectorize")
+#pragma GCC optimize("unsafe-math-optimizations")
+#pragma GCC optimize("unroll-loops")
+#pragma GCC diagnostic warning "-Wall"
+#endif
+#endif
+#include <Python.h>
+#include <numpy/arrayobject.h>
+#include "distance.h"
+
+
+/* Compute the Hellinger distance for two one-dimensional arrays.
+ *
+ * Params:
+ * inp_a One-dimensional array.
+ * inp_b One-dimensional array.
+ * Returns:
+ * float
+ */
+static PyObject *
+apollon_som_distance_hellinger (PyObject* self, PyObject* args)
+{
+ int status = 0;
+ npy_intp n_elem = 0;
+ double dist = 0.0;
+ PyObject *op_prob_a = NULL;
+ PyObject *op_prob_b = NULL;
+
+ PyArrayObject *prob_a = NULL;
+ PyArrayObject *prob_b = NULL;
+
+ if (!PyArg_ParseTuple (args, "OO", &op_prob_a, &op_prob_b))
+ {
+ return NULL;
+ }
+
+ prob_a = (PyArrayObject *) PyArray_ContiguousFromAny (op_prob_a, NPY_DOUBLE, 1, 1);
+ if (prob_a == NULL)
+ {
+ PyErr_SetString (PyExc_RuntimeError, "Could not convert first input.\n");
+ Py_RETURN_NONE;
+ }
+
+ prob_b = (PyArrayObject *) PyArray_ContiguousFromAny (op_prob_b, NPY_DOUBLE, 1, 1);
+ if (prob_b == NULL)
+ {
+ PyErr_SetString (PyExc_RuntimeError, "Could not convert second input.\n");
+ Py_RETURN_NONE;
+ }
+
+ n_elem = PyArray_SIZE (prob_a);
+ status = hellinger (
+ (double *) PyArray_DATA (prob_a),
+ (double *) PyArray_DATA (prob_b),
+ (size_t) n_elem,
+ &dist);
+
+ if (status < 0)
+ {
+ PyErr_SetString (PyExc_ValueError, "Correlogram failed.");
+ Py_RETURN_NONE;
+ }
+
+ return Py_BuildValue("d", dist);
+}
+
+
+/* Compute the Hellinger distance for stochastic matrices
+ *
+ * Params:
+ * stma One-dimensional array.
+ * stmb One-dimensional array.
+ * Returns:
+ * Numpy array of floats.
+ */
+static PyObject *
+apollon_som_distance_hellinger_stm (PyObject* self, PyObject* args)
+{
+ int status = 0;
+ npy_intp len = 0;
+ npy_intp stride = 0;
+ PyObject *op_stma = NULL;
+ PyObject *op_stmb = NULL;
+ PyArrayObject *stma = NULL;
+ PyArrayObject *stmb = NULL;
+ PyArrayObject *dists = NULL;
+
+ if (!PyArg_ParseTuple (args, "OO", &op_stma, &op_stmb))
+ {
+ return NULL;
+ }
+
+ stma = (PyArrayObject *) PyArray_ContiguousFromAny (op_stma, NPY_DOUBLE, 1, 1);
+ if (stma == NULL)
+ {
+ PyErr_SetString (PyExc_RuntimeError, "Could not convert first input.\n");
+ Py_RETURN_NONE;
+ }
+
+ stmb = (PyArrayObject *) PyArray_ContiguousFromAny (op_stmb, NPY_DOUBLE, 1, 1);
+ if (stmb == NULL)
+ {
+ PyErr_SetString (PyExc_RuntimeError, "Could not convert second input.\n");
+ Py_RETURN_NONE;
+ }
+
+ len = PyArray_SIZE (stma);
+ stride = (npy_intp) sqrt ((double) len);
+ dists = (PyArrayObject *) PyArray_ZEROS(1, &stride, NPY_DOUBLE, 0);
+ if (dists == NULL)
+ {
+ PyErr_SetString (PyExc_MemoryError, "Could not allocate output array.\n");
+ Py_RETURN_NONE;
+ }
+
+ double *dists_ptr = (double *) PyArray_DATA (dists);
+ double *stma_ptr = (double *) PyArray_DATA (stma);
+ double *stmb_ptr = (double *) PyArray_DATA (stmb);
+
+ for (npy_intp i = 0; i < stride; i++)
+ {
+ status = hellinger (stma_ptr, stmb_ptr, (size_t) stride, dists_ptr);
+ stma_ptr+=stride;
+ stmb_ptr+=stride;
+ dists_ptr++;
+ }
+
+ if (status < 0)
+ {
+ PyErr_SetString (PyExc_ValueError, "hellinger failed.");
+ Py_RETURN_NONE;
+ }
+
+ return (PyObject *) dists;
+}
+
+
+static PyMethodDef
+Distance_Methods[] = {
+ {"hellinger", apollon_som_distance_hellinger, METH_VARARGS,
+ "hellinger (prob_a, prob_b)"},
+ {"hellinger_stm", apollon_som_distance_hellinger_stm, METH_VARARGS,
+ "hellinger (stma, stmb)"},
+ {NULL, NULL, 0, NULL}
+};
+
+static struct PyModuleDef
+_distance_module = {
+ PyModuleDef_HEAD_INIT,
+ "_distance",
+ NULL,
+ -1,
+ Distance_Methods
+};
+
+PyMODINIT_FUNC
+PyInit__distance(void) {
+ import_array();
+ return PyModule_Create (&_distance_module);
+}
diff --git a/src/apollon/som/distance.c b/src/apollon/som/distance.c
new file mode 100644
index 0000000000000000000000000000000000000000..2bc77950f9332accb1e39baacd0f69db2568d7e0
--- /dev/null
+++ b/src/apollon/som/distance.c
@@ -0,0 +1,26 @@
+#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__)
+#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
+#pragma GCC optimize("tree-vectorize")
+#pragma GCC optimize("unsafe-math-optimizations")
+#pragma GCC optimize("unroll-loops")
+#pragma GCC diagnostic warning "-Wall"
+#endif
+#endif
+#include <stdio.h>
+#include "distance.h"
+
+
+int
+hellinger (const double *pva,
+ const double *pvb,
+ const size_t n_elements,
+ double *dist)
+{
+ for (size_t i = 0; i < n_elements; i++)
+ {
+ double diff = sqrt (pva[i]) - sqrt (pvb[i]);
+ *dist += diff * diff;
+ }
+ *dist = sqrt (*dist / 2);
+ return 1;
+}
diff --git a/tests/som/test_distance.py b/tests/som/test_distance.py
new file mode 100644
index 0000000000000000000000000000000000000000..e80cb9bb0e7445fb8a362a3197106e23839fe6bc
--- /dev/null
+++ b/tests/som/test_distance.py
@@ -0,0 +1,32 @@
+import unittest
+
+from hypothesis import strategies as hst
+import numpy as np
+from scipy.spatial import distance
+
+import _distance as asd
+
+
+class TestHellinger(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def test_unit_distance(self):
+ comp = np.array([[1.0, 0.0, 0.0]])
+ sample = np.array([[0.0, 1.0, 0.0],
+ [0.0, 0.0, 1.0],
+ [0.0, 0.5, 0.5]])
+ res = distance.cdist(comp, sample, metric=asd.hellinger)
+ self.assertTrue(np.all(res == 1.))
+
+
+class TestHellinger_stm(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def test_zero_dist_on_eq_dist(self):
+ n_rows = 5
+ sample = np.eye(n_rows).ravel()
+ res = asd.hellinger_stm(sample, sample)
+ self.assertTrue(np.all(res == 0.0))
+