2021-11-23 01:41:24 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
import sentry_sdk
|
|
|
|
import os
|
|
|
|
import requests
|
|
|
|
import json
|
|
|
|
import uwsgi
|
2021-11-23 19:58:56 +01:00
|
|
|
import pickle
|
2021-12-02 21:43:58 +01:00
|
|
|
from threading import Thread
|
|
|
|
from queue import Queue
|
2021-11-23 01:41:24 +01:00
|
|
|
|
2021-11-23 19:58:56 +01:00
|
|
|
from urllib.parse import urljoin
|
2021-11-23 01:41:24 +01:00
|
|
|
from config import Config
|
|
|
|
from pyAudioAnalysis.audioTrainTest import load_model, load_model_knn, classifier_wrapper
|
|
|
|
from pyAudioAnalysis import audioBasicIO
|
|
|
|
from pyAudioAnalysis import MidTermFeatures
|
|
|
|
import numpy
|
|
|
|
|
2021-12-03 00:31:30 +01:00
|
|
|
from apscheduler.schedulers.background import BackgroundScheduler
|
|
|
|
|
2021-11-23 01:41:24 +01:00
|
|
|
if Config.SENTRY_DSN:
|
|
|
|
sentry_sdk.init(
|
|
|
|
dsn=Config.SENTRY_DSN,
|
|
|
|
send_default_pii=True,
|
|
|
|
release=Config.RELEASE_ID,
|
|
|
|
environment=Config.RELEASEMODE
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2021-11-23 21:33:06 +01:00
|
|
|
def json_datetime_dumper(o):
|
|
|
|
if hasattr(o, "isoformat"):
|
|
|
|
return o.isoformat()
|
|
|
|
else:
|
|
|
|
return str(o)
|
|
|
|
|
|
|
|
|
2021-11-23 01:41:24 +01:00
|
|
|
class ModelMemer:
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
self._loaded_model = None
|
|
|
|
|
|
|
|
def download_model_if_needed(self):
|
|
|
|
models_dir = "/tmp/svm_model"
|
|
|
|
os.makedirs(models_dir, exist_ok=True)
|
|
|
|
|
|
|
|
model_file = os.path.join(models_dir, "model")
|
|
|
|
means_file = os.path.join(models_dir, "modelMEANS")
|
|
|
|
|
|
|
|
if os.path.isfile(model_file) and self._loaded_model:
|
|
|
|
return
|
|
|
|
|
|
|
|
r = requests.get(Config.MODEL_INFO_URL)
|
|
|
|
r.raise_for_status()
|
|
|
|
self.model_details = r.json()
|
|
|
|
|
2021-11-23 21:33:06 +01:00
|
|
|
r = requests.get(urljoin(Config.MODEL_INFO_URL, self.model_details['files']['model']))
|
2021-11-23 01:41:24 +01:00
|
|
|
r.raise_for_status()
|
|
|
|
|
|
|
|
with open(model_file, 'wb') as f:
|
|
|
|
f.write(r.content)
|
|
|
|
|
2021-11-23 21:33:06 +01:00
|
|
|
r = requests.get(urljoin(Config.MODEL_INFO_URL, self.model_details['files']['means']))
|
2021-11-23 01:41:24 +01:00
|
|
|
r.raise_for_status()
|
|
|
|
|
|
|
|
with open(means_file, 'wb') as f:
|
|
|
|
f.write(r.content)
|
|
|
|
|
|
|
|
if self.model_details['type'] == 'knn':
|
|
|
|
self.classifier, self.mean, self.std, self.classes, self.mid_window, self.mid_step, self.short_window, \
|
|
|
|
self.short_step, self.compute_beat \
|
|
|
|
= load_model_knn(model_file)
|
|
|
|
|
|
|
|
else:
|
|
|
|
self.classifier, self.mean, self.std, self.classes, self.mid_window, self.mid_step, self.short_window, \
|
|
|
|
self.short_step, self.compute_beat \
|
|
|
|
= load_model(model_file)
|
|
|
|
|
|
|
|
target_class_name = self.model_details['target_class_name']
|
|
|
|
self.target_id = self.classes.index(target_class_name)
|
|
|
|
|
|
|
|
|
|
|
|
def run_classification(audio_file_path: str, memer: ModelMemer):
|
|
|
|
memer.download_model_if_needed()
|
|
|
|
|
|
|
|
# run extraction
|
|
|
|
sampling_rate, signal = audioBasicIO.read_audio_file(audio_file_path)
|
|
|
|
signal = audioBasicIO.stereo_to_mono(signal)
|
|
|
|
|
|
|
|
if sampling_rate == 0:
|
|
|
|
raise AssertionError("Could not read the file properly: Sampling rate zero")
|
|
|
|
|
|
|
|
if signal.shape[0] / float(sampling_rate) <= memer.mid_window:
|
|
|
|
raise AssertionError("Could not read the file properly: Signal shape is not good")
|
|
|
|
|
|
|
|
# feature extraction:
|
|
|
|
mid_features, s, _ = \
|
|
|
|
MidTermFeatures.mid_feature_extraction(signal, sampling_rate,
|
|
|
|
memer.mid_window * sampling_rate,
|
|
|
|
memer.mid_step * sampling_rate,
|
|
|
|
round(sampling_rate * memer.short_window),
|
|
|
|
round(sampling_rate * memer.short_step))
|
|
|
|
|
|
|
|
# long term averaging of mid-term statistics
|
|
|
|
mid_features = mid_features.mean(axis=1)
|
|
|
|
if memer.compute_beat:
|
|
|
|
beat, beat_conf = MidTermFeatures.beat_extraction(s, memer.short_step)
|
|
|
|
mid_features = numpy.append(mid_features, beat)
|
|
|
|
mid_features = numpy.append(mid_features, beat_conf)
|
|
|
|
|
|
|
|
feature_vector = (mid_features - memer.mean) / memer.std
|
|
|
|
class_id, probability = classifier_wrapper(
|
|
|
|
memer.classifier, memer.model_details['type'].lower(), feature_vector
|
|
|
|
)
|
|
|
|
class_id = int(class_id) # faszom
|
|
|
|
|
|
|
|
return bool((class_id == memer.target_id) and (probability[class_id] > 0.5))
|
|
|
|
|
|
|
|
|
2021-12-02 21:43:58 +01:00
|
|
|
def lapatolas(q: Queue):
|
|
|
|
while True:
|
|
|
|
message = uwsgi.mule_get_msg()
|
|
|
|
q.put(message)
|
|
|
|
|
|
|
|
|
2021-12-03 00:31:30 +01:00
|
|
|
def reporter(q: Queue):
|
|
|
|
report = {
|
|
|
|
"site": Config.REPORT_ALIAS,
|
|
|
|
"measurements": {
|
|
|
|
"queue": q.qsize()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
print("Reporting queue length of", report)
|
|
|
|
|
|
|
|
r = requests.post(Config.REPORT_URL, json=report)
|
|
|
|
|
|
|
|
r.raise_for_status()
|
|
|
|
if r.status_code != 201:
|
|
|
|
print(Config.REPORT_URL, "Wrong response:", r.status_code)
|
|
|
|
|
|
|
|
|
2021-11-23 01:41:24 +01:00
|
|
|
def main():
|
|
|
|
memer = ModelMemer()
|
2021-12-02 21:43:58 +01:00
|
|
|
requeue = Queue()
|
|
|
|
|
|
|
|
Thread(target=lapatolas, args=(requeue,), daemon=True).start()
|
2021-11-23 01:41:24 +01:00
|
|
|
|
2021-12-03 00:31:30 +01:00
|
|
|
scheduler = None
|
|
|
|
if Config.REPORT_URL:
|
|
|
|
scheduler = BackgroundScheduler()
|
|
|
|
scheduler.add_job(lambda: reporter(requeue), trigger='interval', seconds=Config.REPORT_INTERVAL)
|
|
|
|
scheduler.start()
|
|
|
|
|
|
|
|
Thread(target=reporter, args=(requeue,), daemon=True).start()
|
|
|
|
|
2021-11-23 01:41:24 +01:00
|
|
|
while True:
|
2021-12-02 21:43:58 +01:00
|
|
|
message = requeue.get(block=True)
|
2021-11-23 19:58:56 +01:00
|
|
|
task = pickle.loads(message)
|
2021-11-23 01:41:24 +01:00
|
|
|
audio_file_path = task['audio_file_path']
|
|
|
|
description = task['description']
|
|
|
|
try:
|
|
|
|
result = run_classification(audio_file_path, memer)
|
|
|
|
if result:
|
|
|
|
# upload to real input service
|
|
|
|
files = {
|
|
|
|
"file": (
|
|
|
|
os.path.basename(audio_file_path),
|
|
|
|
open(audio_file_path, 'rb').read(),
|
|
|
|
'audio/wave',
|
|
|
|
{'Content-length': os.path.getsize(audio_file_path)}
|
|
|
|
),
|
2021-11-23 21:33:06 +01:00
|
|
|
"description": (None, json.dumps(description, default=json_datetime_dumper), "application/json")
|
2021-11-23 01:41:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
r = requests.post(Config.INPUT_SERVICE_URL, files=files)
|
|
|
|
r.raise_for_status()
|
|
|
|
finally:
|
|
|
|
os.remove(audio_file_path)
|
|
|
|
|
2021-12-03 00:31:30 +01:00
|
|
|
# if scheduler:
|
|
|
|
# scheduler.stop()
|
|
|
|
|
2021-11-23 01:41:24 +01:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|