2020-07-28 18:10:19 +02:00
|
|
|
#!/usr/bin/env python3
|
2020-07-27 17:58:48 +02:00
|
|
|
from typing import Tuple
|
|
|
|
import tempfile
|
|
|
|
import os
|
|
|
|
import os.path
|
|
|
|
import shutil
|
|
|
|
|
|
|
|
import librosa
|
|
|
|
import librosa.display
|
|
|
|
import numpy
|
|
|
|
import matplotlib.pyplot
|
|
|
|
from keras.models import model_from_json
|
|
|
|
from keras import optimizers
|
|
|
|
from keras_preprocessing.image import ImageDataGenerator
|
|
|
|
|
|
|
|
|
|
|
|
class Classifier(object):
|
|
|
|
|
|
|
|
def __init__(self, model_filename: str, weights_filename: str):
|
|
|
|
with open(model_filename, 'r') as f:
|
|
|
|
self.loaded_model = model_from_json(f.read())
|
|
|
|
|
|
|
|
self.loaded_model.load_weights(weights_filename)
|
|
|
|
self.datagen = ImageDataGenerator(rescale=1. / 255., validation_split=0.25)
|
2020-09-14 15:16:05 +02:00
|
|
|
self.loaded_model.compile(optimizers.RMSprop(lr=0.0005, decay=1e-6), loss="categorical_crossentropy",
|
2020-07-27 17:58:48 +02:00
|
|
|
metrics=["accuracy"])
|
|
|
|
self.loaded_model.summary()
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def create_spectrogram(wav_filename: str) -> Tuple[str, str]:
|
|
|
|
matplotlib.pyplot.interactive(False)
|
|
|
|
clip, sample_rate = librosa.load(wav_filename, sr=None)
|
|
|
|
fig = matplotlib.pyplot.figure(figsize=[0.72, 0.72])
|
|
|
|
ax = fig.add_subplot(111)
|
|
|
|
ax.axes.get_xaxis().set_visible(False)
|
|
|
|
ax.axes.get_yaxis().set_visible(False)
|
|
|
|
ax.set_frame_on(False)
|
|
|
|
spectogram = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
|
|
|
|
librosa.display.specshow(librosa.power_to_db(spectogram, ref=numpy.max))
|
|
|
|
|
2021-07-28 15:40:51 +02:00
|
|
|
target_dir = tempfile.mkdtemp(dir="/dev/shm")
|
2020-09-16 19:44:20 +02:00
|
|
|
wav_basename = os.path.basename(wav_filename)
|
2020-07-27 17:58:48 +02:00
|
|
|
|
|
|
|
# Change extension to jpg... mert 110% biztos vagyok benne hogy a keras nem bírná beolvasni máshogy
|
2020-09-16 19:44:20 +02:00
|
|
|
file_name = os.path.join(target_dir, "unknown", f"{wav_basename[:-4]}.jpg")
|
2020-09-16 19:55:21 +02:00
|
|
|
os.mkdir(os.path.join(target_dir, "unknown"))
|
2020-07-27 17:58:48 +02:00
|
|
|
|
|
|
|
matplotlib.pyplot.savefig(file_name, dpi=400, bbox_inches='tight', pad_inches=0)
|
|
|
|
matplotlib.pyplot.close()
|
|
|
|
fig.clf()
|
|
|
|
matplotlib.pyplot.close(fig)
|
|
|
|
matplotlib.pyplot.close('all')
|
|
|
|
|
2020-09-16 19:55:21 +02:00
|
|
|
return target_dir, file_name # Az unknown nélkülivel kell visszatérni
|
2020-07-27 17:58:48 +02:00
|
|
|
|
2021-06-14 03:12:44 +02:00
|
|
|
def _run_predictor(self, directory: str) -> Tuple[str, dict]:
|
2020-07-27 17:58:48 +02:00
|
|
|
predict_generator = self.datagen.flow_from_directory(
|
|
|
|
directory=directory,
|
|
|
|
batch_size=128,
|
|
|
|
seed=42,
|
|
|
|
shuffle=False,
|
|
|
|
class_mode="categorical",
|
|
|
|
target_size=(64, 64))
|
|
|
|
|
|
|
|
prediction = self.loaded_model.predict_generator(predict_generator, steps=1)
|
|
|
|
|
|
|
|
predicted_class_indices = numpy.argmax(prediction, axis=1)
|
|
|
|
|
|
|
|
labels = {
|
|
|
|
'anser': 0,
|
|
|
|
'columba': 1,
|
|
|
|
'hirundo': 2,
|
|
|
|
'passer': 3,
|
|
|
|
'sturnus': 4,
|
|
|
|
'turdus': 5,
|
|
|
|
'upupa': 6
|
|
|
|
}
|
|
|
|
labels = dict((v, k) for k, v in labels.items())
|
|
|
|
|
2021-06-14 03:28:32 +02:00
|
|
|
labeled_predictions = {labels[i]: float(p) for i, p in enumerate(prediction[0])}
|
2020-07-27 17:58:48 +02:00
|
|
|
|
2021-06-14 03:12:44 +02:00
|
|
|
predicted_class_name = [labels[k] for k in predicted_class_indices][0] # eh?
|
2020-07-27 17:58:48 +02:00
|
|
|
|
2021-06-14 03:12:44 +02:00
|
|
|
return predicted_class_name, labeled_predictions
|
|
|
|
|
|
|
|
def predict(self, wav_filename: str) -> Tuple[str, dict]:
|
2020-07-27 17:58:48 +02:00
|
|
|
directory, _ = self.create_spectrogram(wav_filename)
|
|
|
|
|
|
|
|
result = self._run_predictor(directory)
|
|
|
|
shutil.rmtree(directory) # The image is no longer needed
|
|
|
|
|
|
|
|
return result
|