Skip to content
Snippets Groups Projects
Commit 76e1bcdb authored by Blaß, Michael's avatar Blaß, Michael :speech_balloon:
Browse files

Implemented hellinger distance.

parent f8c67bdc
No related branches found
No related tags found
No related merge requests found
#ifndef CDIM_H
#define CDIM_H
#include <math.h>
#include <stdlib.h>
/** Hellinger distance for stochastic vectors.
*/
int
hellinger (const double *pva,
const double *pvb,
const size_t len,
double *dist);
#endif /* CDIM_H */
......@@ -11,6 +11,10 @@ ext_features = Extension('_features',
'src/apollon/signal/_features_module.c'],
include_dirs = ['include', np.get_include()])
ext_som_dist = Extension('_distance',
sources = ['src/apollon/som/distance.c',
'src/apollon/som/_distance_module.c'],
include_dirs = ['include', np.get_include()])
setup(ext_modules = [ext_features])
setup(ext_modules = [ext_features, ext_som_dist])
#define NPY_NO_DEPRECATED_API NPY_1_8_API_VERSION
#define PY_ARRAY_UNIQUE_SYMBOL comsar_NP_ARRAY_API
#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__)
#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
#pragma GCC optimize("tree-vectorize")
#pragma GCC optimize("unsafe-math-optimizations")
#pragma GCC optimize("unroll-loops")
#pragma GCC diagnostic warning "-Wall"
#endif
#endif
#include <Python.h>
#include <numpy/arrayobject.h>
#include "distance.h"
/* Compute the Hellinger distance for two one-dimensional arrays.
*
* Params:
* inp_a One-dimensional array.
* inp_b One-dimensional array.
* Returns:
* float
*/
static PyObject *
apollon_som_distance_hellinger (PyObject* self, PyObject* args)
{
int status = 0;
npy_intp n_elem = 0;
double dist = 0.0;
PyObject *op_prob_a = NULL;
PyObject *op_prob_b = NULL;
PyArrayObject *prob_a = NULL;
PyArrayObject *prob_b = NULL;
if (!PyArg_ParseTuple (args, "OO", &op_prob_a, &op_prob_b))
{
return NULL;
}
prob_a = (PyArrayObject *) PyArray_ContiguousFromAny (op_prob_a, NPY_DOUBLE, 1, 1);
if (prob_a == NULL)
{
PyErr_SetString (PyExc_RuntimeError, "Could not convert first input.\n");
Py_RETURN_NONE;
}
prob_b = (PyArrayObject *) PyArray_ContiguousFromAny (op_prob_b, NPY_DOUBLE, 1, 1);
if (prob_b == NULL)
{
PyErr_SetString (PyExc_RuntimeError, "Could not convert second input.\n");
Py_RETURN_NONE;
}
n_elem = PyArray_SIZE (prob_a);
status = hellinger (
(double *) PyArray_DATA (prob_a),
(double *) PyArray_DATA (prob_b),
(size_t) n_elem,
&dist);
if (status < 0)
{
PyErr_SetString (PyExc_ValueError, "Correlogram failed.");
Py_RETURN_NONE;
}
return Py_BuildValue("d", dist);
}
/* Compute the Hellinger distance for stochastic matrices
*
* Params:
* stma One-dimensional array.
* stmb One-dimensional array.
* Returns:
* Numpy array of floats.
*/
static PyObject *
apollon_som_distance_hellinger_stm (PyObject* self, PyObject* args)
{
int status = 0;
npy_intp len = 0;
npy_intp stride = 0;
PyObject *op_stma = NULL;
PyObject *op_stmb = NULL;
PyArrayObject *stma = NULL;
PyArrayObject *stmb = NULL;
PyArrayObject *dists = NULL;
if (!PyArg_ParseTuple (args, "OO", &op_stma, &op_stmb))
{
return NULL;
}
stma = (PyArrayObject *) PyArray_ContiguousFromAny (op_stma, NPY_DOUBLE, 1, 1);
if (stma == NULL)
{
PyErr_SetString (PyExc_RuntimeError, "Could not convert first input.\n");
Py_RETURN_NONE;
}
stmb = (PyArrayObject *) PyArray_ContiguousFromAny (op_stmb, NPY_DOUBLE, 1, 1);
if (stmb == NULL)
{
PyErr_SetString (PyExc_RuntimeError, "Could not convert second input.\n");
Py_RETURN_NONE;
}
len = PyArray_SIZE (stma);
stride = (npy_intp) sqrt ((double) len);
dists = (PyArrayObject *) PyArray_ZEROS(1, &stride, NPY_DOUBLE, 0);
if (dists == NULL)
{
PyErr_SetString (PyExc_MemoryError, "Could not allocate output array.\n");
Py_RETURN_NONE;
}
double *dists_ptr = (double *) PyArray_DATA (dists);
double *stma_ptr = (double *) PyArray_DATA (stma);
double *stmb_ptr = (double *) PyArray_DATA (stmb);
for (npy_intp i = 0; i < stride; i++)
{
status = hellinger (stma_ptr, stmb_ptr, (size_t) stride, dists_ptr);
stma_ptr+=stride;
stmb_ptr+=stride;
dists_ptr++;
}
if (status < 0)
{
PyErr_SetString (PyExc_ValueError, "hellinger failed.");
Py_RETURN_NONE;
}
return (PyObject *) dists;
}
static PyMethodDef
Distance_Methods[] = {
{"hellinger", apollon_som_distance_hellinger, METH_VARARGS,
"hellinger (prob_a, prob_b)"},
{"hellinger_stm", apollon_som_distance_hellinger_stm, METH_VARARGS,
"hellinger (stma, stmb)"},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef
_distance_module = {
PyModuleDef_HEAD_INIT,
"_distance",
NULL,
-1,
Distance_Methods
};
PyMODINIT_FUNC
PyInit__distance(void) {
import_array();
return PyModule_Create (&_distance_module);
}
#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__)
#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
#pragma GCC optimize("tree-vectorize")
#pragma GCC optimize("unsafe-math-optimizations")
#pragma GCC optimize("unroll-loops")
#pragma GCC diagnostic warning "-Wall"
#endif
#endif
#include <stdio.h>
#include "distance.h"
int
hellinger (const double *pva,
const double *pvb,
const size_t n_elements,
double *dist)
{
for (size_t i = 0; i < n_elements; i++)
{
double diff = sqrt (pva[i]) - sqrt (pvb[i]);
*dist += diff * diff;
}
*dist = sqrt (*dist / 2);
return 1;
}
import unittest
from hypothesis import strategies as hst
import numpy as np
from scipy.spatial import distance
import _distance as asd
class TestHellinger(unittest.TestCase):
def setUp(self):
pass
def test_unit_distance(self):
comp = np.array([[1.0, 0.0, 0.0]])
sample = np.array([[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0],
[0.0, 0.5, 0.5]])
res = distance.cdist(comp, sample, metric=asd.hellinger)
self.assertTrue(np.all(res == 1.))
class TestHellinger_stm(unittest.TestCase):
def setUp(self):
pass
def test_zero_dist_on_eq_dist(self):
n_rows = 5
sample = np.eye(n_rows).ravel()
res = asd.hellinger_stm(sample, sample)
self.assertTrue(np.all(res == 0.0))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment