4
0
This repository has been archived on 2020-07-25. You can view files and clone it, but cannot push or open issues or pull requests.
extractor-service/extractor_service/extraction.py

97 lines
3.1 KiB
Python
Raw Normal View History

2020-03-30 20:40:14 +02:00
#!/usr/bin/env python3
import os
import os.path
import logging
2020-04-14 23:17:20 +02:00
import json
import tempfile
2020-03-30 20:40:14 +02:00
import requests
from pyAudioAnalysis import audioBasicIO
2020-04-14 23:17:20 +02:00
from pyAudioAnalysis import MidTermFeatures
import numpy
2020-03-30 20:40:14 +02:00
2020-04-19 21:17:32 +02:00
class NumpyArrayEncoder(json.JSONEncoder):
2020-03-30 20:40:14 +02:00
def default(self, obj):
if isinstance(obj, numpy.ndarray):
return obj.tolist()
2020-04-19 21:17:32 +02:00
return json.JSONEncoder.default(self, obj)
2020-03-30 20:40:14 +02:00
2020-04-19 21:17:32 +02:00
def do_extraction(model_details: dict, file_path: str):
2020-03-30 20:40:14 +02:00
logging.info("Running extraction...")
2020-04-14 23:17:20 +02:00
sampling_rate, signal = audioBasicIO.read_audio_file(file_path)
signal = audioBasicIO.stereo_to_mono(signal)
if sampling_rate == 0:
raise Exception("Could not read the file properly: Sampling rate zero")
if signal.shape[0] / float(sampling_rate) <= model_details['mid_window']:
raise Exception("Could not read the file properly: Signal shape is not good")
2020-03-30 20:40:14 +02:00
2020-04-14 23:17:20 +02:00
# feature extraction:
mid_features, s, _ = \
MidTermFeatures.mid_feature_extraction(signal, sampling_rate,
2020-04-19 21:17:32 +02:00
model_details['mid_window'] * sampling_rate,
model_details['mid_step'] * sampling_rate,
round(sampling_rate * model_details['short_window']),
round(sampling_rate * model_details['short_step']))
2020-04-14 23:17:20 +02:00
# long term averaging of mid-term statistics
mid_features = mid_features.mean(axis=1)
if model_details['compute_beat']:
beat, beat_conf = MidTermFeatures.beat_extraction(s, model_details['short_step'])
mid_features = numpy.append(mid_features, beat)
mid_features = numpy.append(mid_features, beat_conf)
2020-04-19 21:17:32 +02:00
# feature_vector = (mid_features - mean) / std # normalization
2020-04-14 23:17:20 +02:00
return mid_features
2020-03-30 20:40:14 +02:00
def run_everything(parameters: dict):
tag = parameters['tag']
logging.info(f"Downloading sample: {tag}")
2020-04-14 23:17:20 +02:00
_, file_path = tempfile.mktemp(prefix=f"{tag}_", suffix=".wav", dir="extractor-service")
2020-03-30 20:40:14 +02:00
r = requests.get(f"http://storage-service/object/{tag}")
with open(file_path, 'wb') as f:
f.write(r.content)
2020-04-19 21:17:32 +02:00
logging.debug(f"Downloaded sample to {file_path}")
logging.info("Getting default model details...")
r = requests.get("http://model-service/model/$default/details")
r.raise_for_status()
model_details = r.json()
logging.debug(f"Using model {model_details['id']}")
2020-03-30 20:40:14 +02:00
# download done. Do extraction magic
try:
2020-04-19 21:17:32 +02:00
results = do_extraction(model_details, file_path)
2020-03-30 20:40:14 +02:00
finally:
os.remove(file_path)
2020-04-14 23:17:20 +02:00
logging.info(f"Pushing results to Classifier service...")
2020-03-30 20:40:14 +02:00
response = {
"tag": tag,
"features": results,
2020-04-19 21:17:32 +02:00
"model": model_details['id']
2020-03-30 20:40:14 +02:00
}
logging.debug(f"Data being pushed: {str(response)}")
2020-04-19 21:17:32 +02:00
r = requests.post(
'http://classification-service/classify',
data=json.dumps(response, cls=NumpyArrayEncoder),
headers={'Content-Type': 'application/json'}
)
# r.raise_for_status() # An error in a service should not kill other services
2020-04-14 23:17:20 +02:00
logging.info(f"Classification service response: {r.status_code}")