extractor-service/extractor_service/extraction.py

#!/usr/bin/env python3
import os
import os.path
import logging

import json
import tempfile

import requests
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import MidTermFeatures
import numpy


class NumpyArrayEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, numpy.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)


def do_extraction(model_details: dict, file_path: str):
    logging.info("Running extraction...")

    sampling_rate, signal = audioBasicIO.read_audio_file(file_path)
    signal = audioBasicIO.stereo_to_mono(signal)

    if sampling_rate == 0:
        raise Exception("Could not read the file properly: Sampling rate zero")

    if signal.shape[0] / float(sampling_rate) <= model_details['mid_window']:
        raise Exception("Could not read the file properly: Signal shape is not good")

    # feature extraction:
    mid_features, s, _ = \
        MidTermFeatures.mid_feature_extraction(signal, sampling_rate,
                                               model_details['mid_window'] * sampling_rate,
                                               model_details['mid_step'] * sampling_rate,
                                               round(sampling_rate * model_details['short_window']),
                                               round(sampling_rate * model_details['short_step']))

    # long term averaging of mid-term statistics
    mid_features = mid_features.mean(axis=1)
    if model_details['compute_beat']:
        beat, beat_conf = MidTermFeatures.beat_extraction(s, model_details['short_step'])
        mid_features = numpy.append(mid_features, beat)
        mid_features = numpy.append(mid_features, beat_conf)

    # feature_vector = (mid_features - mean) / std    # normalization

    return mid_features


def run_everything(parameters: dict):
    tag = parameters['tag']
    logging.info(f"Downloading sample: {tag}")

    _, file_path = tempfile.mktemp(prefix=f"{tag}_", suffix=".wav", dir="extractor-service")
    r = requests.get(f"http://storage-service/object/{tag}")
    with open(file_path, 'wb') as f:
        f.write(r.content)

    logging.debug(f"Downloaded sample to {file_path}")

    logging.info("Getting default model details...")
    r = requests.get("http://model-service/model/$default/details")
    r.raise_for_status()

    model_details = r.json()

    logging.debug(f"Using model {model_details['id']}")

    # download done. Do extraction magic
    try:
        results = do_extraction(model_details, file_path)
    finally:
        os.remove(file_path)

    logging.info(f"Pushing results to Classifier service...")

    response = {
        "tag": tag,
        "features": results,
        "model": model_details['id']
    }

    logging.debug(f"Data being pushed: {str(response)}")

    r = requests.post(
        'http://classification-service/classify',
        data=json.dumps(response, cls=NumpyArrayEncoder),
        headers={'Content-Type': 'application/json'}
    )

    # r.raise_for_status() # An error in a service should not kill other services
    logging.info(f"Classification service response: {r.status_code}")
Did stuff 2020-03-30 20:40:14 +02:00			`#!/usr/bin/env python3`
			`import os`
			`import os.path`
			`import logging`
Implemented model_service stuff 2020-04-14 23:17:20 +02:00
			`import json`
			`import tempfile`

Did stuff 2020-03-30 20:40:14 +02:00			`import requests`
			`from pyAudioAnalysis import audioBasicIO`
Implemented model_service stuff 2020-04-14 23:17:20 +02:00			`from pyAudioAnalysis import MidTermFeatures`
			`import numpy`
Did stuff 2020-03-30 20:40:14 +02:00

Finished main stuff 2020-04-19 21:17:32 +02:00			`class NumpyArrayEncoder(json.JSONEncoder):`
Did stuff 2020-03-30 20:40:14 +02:00			`def default(self, obj):`
			`if isinstance(obj, numpy.ndarray):`
			`return obj.tolist()`
Finished main stuff 2020-04-19 21:17:32 +02:00			`return json.JSONEncoder.default(self, obj)`
Did stuff 2020-03-30 20:40:14 +02:00

Finished main stuff 2020-04-19 21:17:32 +02:00			`def do_extraction(model_details: dict, file_path: str):`
Did stuff 2020-03-30 20:40:14 +02:00			`logging.info("Running extraction...")`

Implemented model_service stuff 2020-04-14 23:17:20 +02:00			`sampling_rate, signal = audioBasicIO.read_audio_file(file_path)`
			`signal = audioBasicIO.stereo_to_mono(signal)`

			`if sampling_rate == 0:`
			`raise Exception("Could not read the file properly: Sampling rate zero")`

			`if signal.shape[0] / float(sampling_rate) <= model_details['mid_window']:`
			`raise Exception("Could not read the file properly: Signal shape is not good")`
Did stuff 2020-03-30 20:40:14 +02:00
Implemented model_service stuff 2020-04-14 23:17:20 +02:00			`# feature extraction:`
			`mid_features, s, _ = \`
			`MidTermFeatures.mid_feature_extraction(signal, sampling_rate,`
Finished main stuff 2020-04-19 21:17:32 +02:00			`model_details['mid_window'] * sampling_rate,`
			`model_details['mid_step'] * sampling_rate,`
			`round(sampling_rate * model_details['short_window']),`
			`round(sampling_rate * model_details['short_step']))`
Implemented model_service stuff 2020-04-14 23:17:20 +02:00
			`# long term averaging of mid-term statistics`
			`mid_features = mid_features.mean(axis=1)`
			`if model_details['compute_beat']:`
			`beat, beat_conf = MidTermFeatures.beat_extraction(s, model_details['short_step'])`
			`mid_features = numpy.append(mid_features, beat)`
			`mid_features = numpy.append(mid_features, beat_conf)`

Finished main stuff 2020-04-19 21:17:32 +02:00			`# feature_vector = (mid_features - mean) / std # normalization`
Implemented model_service stuff 2020-04-14 23:17:20 +02:00
			`return mid_features`
Did stuff 2020-03-30 20:40:14 +02:00

			`def run_everything(parameters: dict):`
			`tag = parameters['tag']`
			`logging.info(f"Downloading sample: {tag}")`

Implemented model_service stuff 2020-04-14 23:17:20 +02:00			`_, file_path = tempfile.mktemp(prefix=f"{tag}_", suffix=".wav", dir="extractor-service")`
Did stuff 2020-03-30 20:40:14 +02:00			`r = requests.get(f"http://storage-service/object/{tag}")`
			`with open(file_path, 'wb') as f:`
			`f.write(r.content)`

Finished main stuff 2020-04-19 21:17:32 +02:00			`logging.debug(f"Downloaded sample to {file_path}")`

			`logging.info("Getting default model details...")`
			`r = requests.get("http://model-service/model/$default/details")`
			`r.raise_for_status()`

			`model_details = r.json()`

			`logging.debug(f"Using model {model_details['id']}")`

Did stuff 2020-03-30 20:40:14 +02:00			`# download done. Do extraction magic`
			`try:`
Finished main stuff 2020-04-19 21:17:32 +02:00			`results = do_extraction(model_details, file_path)`
Did stuff 2020-03-30 20:40:14 +02:00			`finally:`
			`os.remove(file_path)`

Implemented model_service stuff 2020-04-14 23:17:20 +02:00			`logging.info(f"Pushing results to Classifier service...")`
Did stuff 2020-03-30 20:40:14 +02:00
			`response = {`
			`"tag": tag,`
Changed results to features to avoid confusion 2020-04-19 23:03:23 +02:00			`"features": results,`
Finished main stuff 2020-04-19 21:17:32 +02:00			`"model": model_details['id']`
Did stuff 2020-03-30 20:40:14 +02:00			`}`

			`logging.debug(f"Data being pushed: {str(response)}")`

Finished main stuff 2020-04-19 21:17:32 +02:00			`r = requests.post(`
			`'http://classification-service/classify',`
			`data=json.dumps(response, cls=NumpyArrayEncoder),`
			`headers={'Content-Type': 'application/json'}`
			`)`

			`# r.raise_for_status() # An error in a service should not kill other services`
Implemented model_service stuff 2020-04-14 23:17:20 +02:00			`logging.info(f"Classification service response: {r.status_code}")`