From 94b5066b16fb1fe69756b8264a069f0b7ad224a8 Mon Sep 17 00:00:00 2001
From: marcsello <punkosdmarcell@rocketmail.com>
Date: Tue, 14 Apr 2020 23:17:20 +0200
Subject: [PATCH] Implemented model_service stuff

---
 extractor_service/extraction.py | 56 ++++++++++++++++++++++++++-------
 1 file changed, 44 insertions(+), 12 deletions(-)

diff --git a/extractor_service/extraction.py b/extractor_service/extraction.py
index 3f7e363..c85421b 100644
--- a/extractor_service/extraction.py
+++ b/extractor_service/extraction.py
@@ -1,13 +1,16 @@
 #!/usr/bin/env python3
-import json
-from json import JSONEncoder
-import numpy
 import os
 import os.path
 import logging
+
+import json
+import tempfile
+from json import JSONEncoder
+
 import requests
 from pyAudioAnalysis import audioBasicIO
-from pyAudioAnalysis import ShortTermFeatures
+from pyAudioAnalysis import MidTermFeatures
+import numpy
 
 
 class NumpyArrayEncoder(JSONEncoder):
@@ -18,19 +21,48 @@ class NumpyArrayEncoder(JSONEncoder):
 
 
 def do_extraction(file_path: str):
+    logging.info("Getting default model details...")
+    r = requests.get("http://model-service/model/$default/details")
+    r.raise_for_status()
+
+    model_details = r.json()
+
     logging.info("Running extraction...")
 
-    [Fs, x] = audioBasicIO.read_audio_file(file_path)
-    F, f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.050 * Fs, 0.025 * Fs)
+    sampling_rate, signal = audioBasicIO.read_audio_file(file_path)
+    signal = audioBasicIO.stereo_to_mono(signal)
 
-    return {"F": F, "f_names": f_names}
+    if sampling_rate == 0:
+        raise Exception("Could not read the file properly: Sampling rate zero")
+
+    if signal.shape[0] / float(sampling_rate) <= model_details['mid_window']:
+        raise Exception("Could not read the file properly: Signal shape is not good")
+
+    # feature extraction:
+    mid_features, s, _ = \
+        MidTermFeatures.mid_feature_extraction(signal, sampling_rate,
+                                  model_details['mid_window'] * sampling_rate,
+                                  model_details['mid_step'] * sampling_rate,
+                                  round(sampling_rate * model_details['short_window']),
+                                  round(sampling_rate * model_details['short_step']))
+
+    # long term averaging of mid-term statistics
+    mid_features = mid_features.mean(axis=1)
+    if model_details['compute_beat']:
+        beat, beat_conf = MidTermFeatures.beat_extraction(s, model_details['short_step'])
+        mid_features = numpy.append(mid_features, beat)
+        mid_features = numpy.append(mid_features, beat_conf)
+
+    #feature_vector = (mid_features - mean) / std    # normalization
+
+    return mid_features
 
 
 def run_everything(parameters: dict):
     tag = parameters['tag']
     logging.info(f"Downloading sample: {tag}")
 
-    file_path = os.path.join("/tmp/extractor-service/", f"{tag}.wav")
+    _, file_path = tempfile.mktemp(prefix=f"{tag}_", suffix=".wav", dir="extractor-service")
     r = requests.get(f"http://storage-service/object/{tag}")
     with open(file_path, 'wb') as f:
         f.write(r.content)
@@ -41,7 +73,7 @@ def run_everything(parameters: dict):
     finally:
         os.remove(file_path)
 
-    logging.info(f"Pushing results to AI service...")
+    logging.info(f"Pushing results to Classifier service...")
 
     response = {
         "tag": tag,
@@ -50,6 +82,6 @@ def run_everything(parameters: dict):
 
     logging.debug(f"Data being pushed: {str(response)}")
 
-    # r = requests.post('http://ai-service/asd', data=json.dumps(results, cls=NumpyArrayEncoder), headers={'Content-Type': 'application/json'})
-
-    # r.raise_for_status()
+    r = requests.post('http://classification-service/classify', data=json.dumps(results, cls=NumpyArrayEncoder), headers={'Content-Type': 'application/json'})
+    #r.raise_for_status() # An error in a service should not kill other services
+    logging.info(f"Classification service response: {r.status_code}")