2020-03-30 20:40:14 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
import os
|
|
|
|
import os.path
|
|
|
|
import logging
|
2020-04-14 23:17:20 +02:00
|
|
|
|
|
|
|
import json
|
|
|
|
import tempfile
|
|
|
|
|
2020-03-30 20:40:14 +02:00
|
|
|
import requests
|
|
|
|
from pyAudioAnalysis import audioBasicIO
|
2020-04-14 23:17:20 +02:00
|
|
|
from pyAudioAnalysis import MidTermFeatures
|
|
|
|
import numpy
|
2020-03-30 20:40:14 +02:00
|
|
|
|
|
|
|
|
2020-04-19 21:17:32 +02:00
|
|
|
class NumpyArrayEncoder(json.JSONEncoder):
|
2020-03-30 20:40:14 +02:00
|
|
|
def default(self, obj):
|
|
|
|
if isinstance(obj, numpy.ndarray):
|
|
|
|
return obj.tolist()
|
2020-04-19 21:17:32 +02:00
|
|
|
return json.JSONEncoder.default(self, obj)
|
2020-03-30 20:40:14 +02:00
|
|
|
|
|
|
|
|
2020-04-19 21:17:32 +02:00
|
|
|
def do_extraction(model_details: dict, file_path: str):
|
2020-03-30 20:40:14 +02:00
|
|
|
logging.info("Running extraction...")
|
|
|
|
|
2020-04-14 23:17:20 +02:00
|
|
|
sampling_rate, signal = audioBasicIO.read_audio_file(file_path)
|
|
|
|
signal = audioBasicIO.stereo_to_mono(signal)
|
|
|
|
|
|
|
|
if sampling_rate == 0:
|
|
|
|
raise Exception("Could not read the file properly: Sampling rate zero")
|
|
|
|
|
|
|
|
if signal.shape[0] / float(sampling_rate) <= model_details['mid_window']:
|
|
|
|
raise Exception("Could not read the file properly: Signal shape is not good")
|
2020-03-30 20:40:14 +02:00
|
|
|
|
2020-04-14 23:17:20 +02:00
|
|
|
# feature extraction:
|
|
|
|
mid_features, s, _ = \
|
|
|
|
MidTermFeatures.mid_feature_extraction(signal, sampling_rate,
|
2020-04-19 21:17:32 +02:00
|
|
|
model_details['mid_window'] * sampling_rate,
|
|
|
|
model_details['mid_step'] * sampling_rate,
|
|
|
|
round(sampling_rate * model_details['short_window']),
|
|
|
|
round(sampling_rate * model_details['short_step']))
|
2020-04-14 23:17:20 +02:00
|
|
|
|
|
|
|
# long term averaging of mid-term statistics
|
|
|
|
mid_features = mid_features.mean(axis=1)
|
|
|
|
if model_details['compute_beat']:
|
|
|
|
beat, beat_conf = MidTermFeatures.beat_extraction(s, model_details['short_step'])
|
|
|
|
mid_features = numpy.append(mid_features, beat)
|
|
|
|
mid_features = numpy.append(mid_features, beat_conf)
|
|
|
|
|
2020-04-19 21:17:32 +02:00
|
|
|
# feature_vector = (mid_features - mean) / std # normalization
|
2020-04-14 23:17:20 +02:00
|
|
|
|
|
|
|
return mid_features
|
2020-03-30 20:40:14 +02:00
|
|
|
|
|
|
|
|
|
|
|
def run_everything(parameters: dict):
|
|
|
|
tag = parameters['tag']
|
|
|
|
logging.info(f"Downloading sample: {tag}")
|
|
|
|
|
2020-04-14 23:17:20 +02:00
|
|
|
_, file_path = tempfile.mktemp(prefix=f"{tag}_", suffix=".wav", dir="extractor-service")
|
2020-03-30 20:40:14 +02:00
|
|
|
r = requests.get(f"http://storage-service/object/{tag}")
|
|
|
|
with open(file_path, 'wb') as f:
|
|
|
|
f.write(r.content)
|
|
|
|
|
2020-04-19 21:17:32 +02:00
|
|
|
logging.debug(f"Downloaded sample to {file_path}")
|
|
|
|
|
|
|
|
logging.info("Getting default model details...")
|
|
|
|
r = requests.get("http://model-service/model/$default/details")
|
|
|
|
r.raise_for_status()
|
|
|
|
|
|
|
|
model_details = r.json()
|
|
|
|
|
|
|
|
logging.debug(f"Using model {model_details['id']}")
|
|
|
|
|
2020-03-30 20:40:14 +02:00
|
|
|
# download done. Do extraction magic
|
|
|
|
try:
|
2020-04-19 21:17:32 +02:00
|
|
|
results = do_extraction(model_details, file_path)
|
2020-03-30 20:40:14 +02:00
|
|
|
finally:
|
|
|
|
os.remove(file_path)
|
|
|
|
|
2020-04-14 23:17:20 +02:00
|
|
|
logging.info(f"Pushing results to Classifier service...")
|
2020-03-30 20:40:14 +02:00
|
|
|
|
|
|
|
response = {
|
|
|
|
"tag": tag,
|
2020-04-19 23:03:23 +02:00
|
|
|
"features": results,
|
2020-04-19 21:17:32 +02:00
|
|
|
"model": model_details['id']
|
2020-03-30 20:40:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
logging.debug(f"Data being pushed: {str(response)}")
|
|
|
|
|
2020-04-19 21:17:32 +02:00
|
|
|
r = requests.post(
|
|
|
|
'http://classification-service/classify',
|
|
|
|
data=json.dumps(response, cls=NumpyArrayEncoder),
|
|
|
|
headers={'Content-Type': 'application/json'}
|
|
|
|
)
|
|
|
|
|
|
|
|
# r.raise_for_status() # An error in a service should not kill other services
|
2020-04-14 23:17:20 +02:00
|
|
|
logging.info(f"Classification service response: {r.status_code}")
|