4
0
Fork 0
This repository has been archived on 2020-07-25. You can view files and clone it, but cannot push or open issues or pull requests.
extractor-service/extractor_service/extraction.py

97 lines
3.1 KiB
Python

#!/usr/bin/env python3
import os
import os.path
import logging
import json
import tempfile
import requests
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import MidTermFeatures
import numpy
class NumpyArrayEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, numpy.ndarray):
return obj.tolist()
return json.JSONEncoder.default(self, obj)
def do_extraction(model_details: dict, file_path: str):
logging.info("Running extraction...")
sampling_rate, signal = audioBasicIO.read_audio_file(file_path)
signal = audioBasicIO.stereo_to_mono(signal)
if sampling_rate == 0:
raise Exception("Could not read the file properly: Sampling rate zero")
if signal.shape[0] / float(sampling_rate) <= model_details['mid_window']:
raise Exception("Could not read the file properly: Signal shape is not good")
# feature extraction:
mid_features, s, _ = \
MidTermFeatures.mid_feature_extraction(signal, sampling_rate,
model_details['mid_window'] * sampling_rate,
model_details['mid_step'] * sampling_rate,
round(sampling_rate * model_details['short_window']),
round(sampling_rate * model_details['short_step']))
# long term averaging of mid-term statistics
mid_features = mid_features.mean(axis=1)
if model_details['compute_beat']:
beat, beat_conf = MidTermFeatures.beat_extraction(s, model_details['short_step'])
mid_features = numpy.append(mid_features, beat)
mid_features = numpy.append(mid_features, beat_conf)
# feature_vector = (mid_features - mean) / std # normalization
return mid_features
def run_everything(parameters: dict):
tag = parameters['tag']
logging.info(f"Downloading sample: {tag}")
_, file_path = tempfile.mkstemp(prefix=f"{tag}_", suffix=".wav")
r = requests.get(f"http://storage-service/object/{tag}")
with open(file_path, 'wb') as f:
f.write(r.content)
logging.debug(f"Downloaded sample to {file_path}")
logging.info("Getting default model details...")
r = requests.get("http://model-service/model/$default/details")
r.raise_for_status()
model_details = r.json()
logging.debug(f"Using model {model_details['id']}")
# download done. Do extraction magic
try:
results = do_extraction(model_details, file_path)
finally:
os.remove(file_path)
logging.info(f"Pushing results to Classifier service...")
response = {
"tag": tag,
"features": results,
"model": model_details['id']
}
logging.debug(f"Data being pushed: {str(response)}")
r = requests.post(
'http://classification-service/classify',
data=json.dumps(response, cls=NumpyArrayEncoder),
headers={'Content-Type': 'application/json'}
)
# r.raise_for_status() # An error in a service should not kill other services
logging.info(f"Classification service response: {r.status_code}")