diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..9787c3bdf008a57ae3cb2e27a8261eb3f134ff73 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg index e40d558e5d94fa5322183991a9cefc953df01448..c9df0ea3baebf1708523e1a04f6fd8b8603f7dbc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = comsar -version = 0.0.1 +version = 0.0.2 description = Computational Music and Sound Archiving long_description = file: README.md licence = BSD-3-Clause @@ -10,9 +10,8 @@ keywords = hmm, som, comsar, music, analysis classifiers = Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.7 Programming Language :: Python :: Implementation :: CPython Topic :: Scientific/Engineering Topic :: Database @@ -22,16 +21,15 @@ classifiers = License :: OSI Approved :: BSD License [options] -zip_safe = True -include_package_data = True -packages = find: -scripts = scripts/comsar -setup_requires = numpy +zip_safe = False +package_dir = + =src +include_package_data = False +packages = find_namespace: +python_requires >= '3.7' install_requires = numpy - scipy >= "0.19.0" - matplotlib >= "2" - pandas >= "0.20" - setuptools >= "40.0.0" - sklearn >= "0.20" - + pandas + +[options.packages.find] +where = src diff --git a/comsar/__init__.py b/src/comsar/__init__.py similarity index 50% rename from comsar/__init__.py rename to src/comsar/__init__.py index 2ba8bbf8da4bfdf8938557f7661e8b3ee747f6c9..c6750dd4dbfa24dfdbdc50cbabcb977f7faf757f 100644 --- a/comsar/__init__.py +++ b/src/comsar/__init__.py @@ -1,6 +1,3 @@ import pkg_resources as _pkg -from .tracks.timbre import TimbreTrack -from .tracks.utilities import TrackResult - __version__ = _pkg.get_distribution('comsar').version diff --git a/comsar/tracks/__int__.py b/src/comsar/_tracks/__int__.py similarity index 100% rename from comsar/tracks/__int__.py rename to src/comsar/_tracks/__int__.py diff --git a/comsar/tracks/form.py b/src/comsar/_tracks/form.py similarity index 100% rename from comsar/tracks/form.py rename to src/comsar/_tracks/form.py diff --git a/comsar/tracks/pitch.py b/src/comsar/_tracks/pitch.py similarity index 100% rename from comsar/tracks/pitch.py rename to src/comsar/_tracks/pitch.py diff --git a/comsar/tracks/rhythm.py b/src/comsar/_tracks/rhythm.py similarity index 100% rename from comsar/tracks/rhythm.py rename to src/comsar/_tracks/rhythm.py diff --git a/comsar/tracks/timbre.py b/src/comsar/_tracks/timbre.py similarity index 51% rename from comsar/tracks/timbre.py rename to src/comsar/_tracks/timbre.py index 14e173ba89b0f3255f57efa1d300ef23977a9515..9098d134ee51aa1776dc490fa1015b571456e9f2 100644 --- a/comsar/tracks/timbre.py +++ b/src/comsar/_tracks/timbre.py @@ -2,7 +2,7 @@ License: BSD-3-Clasuse Copyright (C) 2020, Michael Blaß, michael.blass@uni-hamburg.de """ -from dataclasses import dataclass +from datetime import datetime from timeit import default_timer as timer from typing import Optional @@ -13,10 +13,10 @@ from apollon.audio import AudioFile from apollon.segment import Segmentation from apollon.signal import container, features from apollon.signal.spectral import StftSegments -from apollon.tools import time_stamp import comsar -from comsar.tracks.utilities import TrackMeta, TrackParams, TrackResult, TimbreTrackParams +from comsar._tracks.utilities import (TrackMeta, TrackResult, SourceMeta, + TimbreTrackParams, TimbreTrackCorrGramParams) STFT_DEFAULT = container.StftParams(fps=44100, window='hamming', n_fft=None, @@ -29,9 +29,90 @@ CORR_DIM_DEFAULT = container.CorrDimParams(delay=14, m_dim=80, n_bins=1000, CORR_GRAM_DEFAULT = container.CorrGramParams(wlen=2**10, n_delay=2**8, total=True) +class TimbreTrack: + """Compute timbre track of an audio file. + """ + def __init__(self, + stft_params: Optional[container.StftParams] = None, + corr_dim_params: Optional[container.CorrDimParams] = None, + ) -> None: + """ + Args: + """ + self.params = TimbreTrackParams(stft_params or STFT_DEFAULT, + corr_dim_params or CORR_DIM_DEFAULT) + self.cutter = Segmentation(self.params.stft.n_perseg, + self.params.stft.n_overlap, + self.params.stft.extend, + self.params.stft.pad) -class TimbreTrack: + self.stft = StftSegments(self.params.stft.fps, self.params.stft.window, + self.params.stft.n_fft) + + self.feature_names = ('SpectralCentroid', 'SpectralSpread', + 'SpectralFlux', 'Roughness', 'Sharpness', + 'SPL', 'CorrelationDimension') + + self.funcs = [features.spectral_centroid, + features.spectral_spread, + features.spectral_flux, + features.roughness_helmholtz, + features.sharpness, + features.spl, + features.cdim] + + self.pace = np.zeros(self.n_features) + self.verbose = False + + @property + def n_features(self) -> int: + """Number of features on track""" + return len(self.feature_names) + + def extract(self, path) -> pd.DataFrame: + """Perform extraction. + """ + snd = AudioFile(path) + if snd.fps != self.params.stft.fps: + snd.close() + raise ValueError('Sample rate of {snd!str} differs from init.') + + segs = self.cutter.transform(snd.data.squeeze()) + sxx = self.stft.transform(segs) + + args = [(sxx.frqs, sxx.power), + (sxx.frqs, sxx.power), + (sxx.abs,), + (sxx.d_frq, sxx.abs, 15000), + (sxx.frqs, sxx.abs), + (segs.data,), + (segs.data,)] + + kwargs = [{}, {}, {}, {}, {}, {}, self.params.corr_dim.to_dict()] + + out = np.zeros((segs.n_segs, self.n_features)) + for i, (fun, arg, kwarg) in enumerate(zip(self.funcs, args, kwargs)): + out[:, i] = self._worker(i, fun, arg, kwarg) + + file_meta = SourceMeta(*snd.file_name.split('.'), snd.hash) + track_meta = TrackMeta(comsar.__version__, datetime.utcnow(), + file_meta) + out = pd.DataFrame(data=out, columns=self.feature_names) + snd.close() + return TrackResult(track_meta, self.params, out) + + def _worker(self, idx, func, args, kwargs) -> np.ndarray: + print(self.feature_names[idx], end=' ... ') + pace = timer() + res = func(*args, **kwargs) + pace = timer() - pace + self.pace[idx] = pace + print(f'{pace:.4} s.') + return res + + +class TimbreTrackCorrGram: """Compute timbre track of an audio file. """ def __init__(self, @@ -41,7 +122,7 @@ class TimbreTrack: """ Args: """ - self.params = TimbreTrackParams(stft_params or STFT_DEFAULT, + self.params = TimbreTrackCorrGramParams(stft_params or STFT_DEFAULT, corr_dim_params or CORR_DIM_DEFAULT, corr_gram_params or CORR_GRAM_DEFAULT) @@ -52,9 +133,9 @@ class TimbreTrack: self.stft = StftSegments(self.params.stft.fps, self.params.stft.window, self.params.stft.n_fft) - self.feature_names = ('Spectral Centroid', 'Spectral Spread', - 'Spectral Flux', 'Roughness', 'Sharpness', - 'SPL', 'Correlation Dimension', 'Correlogram') + self.feature_names = ('SpectralCentroid', 'SpectralSpread', + 'SpectralFlux', 'Roughness', 'Sharpness', + 'SPL', 'CorrelationDimension', 'Correlogram') self.funcs = [features.spectral_centroid, features.spectral_spread, @@ -101,7 +182,7 @@ class TimbreTrack: out[:, i] = self._worker(i, fun, arg, kwarg) snd.close() - meta = TrackMeta(comsar.__version__, time_stamp(), snd.file_name) + meta = TrackMeta(comsar.__version__, datetime.utcnow(), snd.file_name) out = pd.DataFrame(data=out, columns=self.feature_names) return TrackResult(meta, self.params, out) diff --git a/comsar/tracks/utilities.py b/src/comsar/_tracks/utilities.py similarity index 81% rename from comsar/tracks/utilities.py rename to src/comsar/_tracks/utilities.py index 549acbfe4f438eb3564b23c6c7ba984be62ef864..c07c68ba2dcc056b1ca0d797bec2c1d0ff768ed5 100644 --- a/comsar/tracks/utilities.py +++ b/src/comsar/_tracks/utilities.py @@ -2,6 +2,7 @@ License: BSD-3-Clasuse Copyright (C) 2020, Michael Blaß, michael.blass@uni-hamburg.de """ +import datetime import pathlib import pickle from typing import ClassVar, Type, TypeVar, Union @@ -12,20 +13,30 @@ import pandas as pd from apollon import io from apollon import container -from apollon import signal +from apollon.signal import container as asc from apollon.tools import standardize from apollon import types T = TypeVar('T') + +@dataclass +class SourceMeta(container.Params): + """Source file meta data.""" + _schema: ClassVar[types.Schema] = None + name: str + extension: str + hash_: str + + @dataclass class TrackMeta(container.Params): """Track meta data.""" _schema: ClassVar[types.Schema] = None version: str - time_stamp: str - source: str + extraction_date: datetime.datetime + source: SourceMeta @dataclass @@ -37,9 +48,15 @@ class TrackParams(container.Params): @dataclass class TimbreTrackParams(TrackParams): """Parameter set for TimbreTrack""" - stft: signal.container.StftParams - corr_dim: signal.container.CorrDimParams - corr_gram: signal.container.CorrGramParams + stft: asc.StftParams + corr_dim: asc.CorrDimParams + + +@dataclass +class TimbreTrackCorrGramParams(TrackParams): + """Parameter set for TimbreTrack""" + stft: asc.StftParams + corr_dim: asc.CorrDimParams class TrackResult: @@ -84,13 +101,17 @@ class TrackResult: """Serialize TrackResults to dictionary.""" return {'meta': self._meta.to_dict(), 'params': self._params.to_dict(), - 'data': self._data.to_dict()} + 'data': self._data.to_dict(orient='list')} def to_json(self, path: Union[str, pathlib.Path]) -> None: """Serialize TrackResults to JSON.""" io.json.dump(self.to_dict(), path) + def to_mongo(self, db_con) -> None: + """Write TrackResults to open MongoDB connection:""" + pass + def to_pickle(self, path: Union[str, pathlib.Path]) -> None: """Serialize Track Results to pickle.""" path = pathlib.Path(path) diff --git a/comsar/cli/__init__.py b/src/comsar/cli/__init__.py similarity index 100% rename from comsar/cli/__init__.py rename to src/comsar/cli/__init__.py diff --git a/comsar/cli/apollon_hmm.py b/src/comsar/cli/apollon_hmm.py similarity index 100% rename from comsar/cli/apollon_hmm.py rename to src/comsar/cli/apollon_hmm.py diff --git a/comsar/cli/apollon_onsets.py b/src/comsar/cli/apollon_onsets.py similarity index 100% rename from comsar/cli/apollon_onsets.py rename to src/comsar/cli/apollon_onsets.py diff --git a/comsar/cli/apollon_position.py b/src/comsar/cli/apollon_position.py similarity index 100% rename from comsar/cli/apollon_position.py rename to src/comsar/cli/apollon_position.py diff --git a/comsar/cli/comsar_features.py b/src/comsar/cli/comsar_features.py similarity index 100% rename from comsar/cli/comsar_features.py rename to src/comsar/cli/comsar_features.py diff --git a/src/comsar/tracks.py b/src/comsar/tracks.py new file mode 100644 index 0000000000000000000000000000000000000000..41aee0798c76b89ce1baf81e5cf4e077c691cece --- /dev/null +++ b/src/comsar/tracks.py @@ -0,0 +1,2 @@ +from . _tracks.timbre import TimbreTrack +from . _tracks.timbre import TimbreTrackCorrGram diff --git a/tests/tracks/test_timbre.py b/tests/tracks/test_timbre.py index 4287cb6858eca9fa840d82eba02c76264b2c5515..9dabb3279c757efac6a1a761f984e3a040bccfd6 100644 --- a/tests/tracks/test_timbre.py +++ b/tests/tracks/test_timbre.py @@ -3,10 +3,7 @@ import numpy as np import pandas as pd from apollon.tools import time_stamp -from comsar.tracks.timbre import (TimbreTrack, - STFT_DEFAULT, CORR_DIM_DEFAULT, CORR_GRAM_DEFAULT) -import comsar -from comsar.tracks.utilities import TrackMeta, TrackParams, TrackResult, TimbreTrackParams +from comsar.tracks import TimbreTrack class TestTimbreTrack(unittest.TestCase): @@ -15,5 +12,3 @@ class TestTimbreTrack(unittest.TestCase): def test_nfeatures(self): self.assertIsInstance(self.track.n_features, int) - - diff --git a/tests/tracks/test_utilities.py b/tests/tracks/test_utilities.py index 5e85e29e3e0e6c80666313f07b4407afdf54741c..107be3436ad825b939a444f6f324b9c06ab25ddc 100644 --- a/tests/tracks/test_utilities.py +++ b/tests/tracks/test_utilities.py @@ -4,15 +4,14 @@ import unittest from hypothesis import given -from comsar import TrackResult -from comsar.tracks.timbre import TimbreTrackParams -from .. utils import timbre_track_results +from comsar._tracks.utilities import TimbreTrackParams, TrackResult +from utils import timbre_track_results class TestTrackResult(unittest.TestCase): def setUp(self) -> None: self.tf_descr, self.tf_name = tempfile.mkstemp(suffix='.json', - text=True) + text=True) @given(timbre_track_results()) def test_init(self, ttr) -> None: diff --git a/tests/utils.py b/tests/utils.py index cd2c3b4c42359d6e4221013f493422c5ac5de9b7..2cac6809ec7835a5eeca0c2f126ecc0d5f30a5a9 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,3 +1,6 @@ +""" +Utilities for testing. +""" import string from hypothesis.strategies import (composite, integers, lists, sampled_from, @@ -7,8 +10,8 @@ import pandas as pd from apollon.tools import time_stamp import comsar -from comsar.tracks import timbre -from comsar.tracks.utilities import TrackMeta, TimbreTrackParams, TrackResult +from comsar._tracks import timbre +from comsar._tracks.utilities import TrackMeta, TimbreTrackParams, TrackResult def ascii_strings() -> SearchStrategy: @@ -16,10 +19,12 @@ def ascii_strings() -> SearchStrategy: return text(sampled_from(string.ascii_letters+string.digits), min_size=2, max_size=10) + def lists_of_strings() -> SearchStrategy: """Lists of unique ascii_strings.""" return lists(ascii_strings(), min_size=2, max_size=10, unique=True) + @composite def numerical_dataframes(draw) -> pd.DataFrame: """Generate pandas DataFrames. @@ -32,11 +37,14 @@ def numerical_dataframes(draw) -> pd.DataFrame: data = draw(arrays('float64', (n_rows, len(names)))) return pd.DataFrame(data=data, columns=names) + @composite def timbre_track_results(draw) -> TrackResult: - meta = TrackMeta(comsar.__version__, time_stamp(), 'testfile.wav') + """Mock the result of a timbre track extraction pipeline. + """ + meta = TrackMeta(comsar.__version__, time_stamp(), 'testfile.wav', + draw(ascii_strings())) params = TimbreTrackParams(timbre.STFT_DEFAULT, - timbre.CORR_DIM_DEFAULT, - timbre.CORR_GRAM_DEFAULT) + timbre.CORR_DIM_DEFAULT) data = draw(numerical_dataframes()) return TrackResult(meta, params, data)