from typing import Tuple import tempfile import os import os.path import shutil import librosa import librosa.display import numpy import matplotlib.pyplot from keras.models import model_from_json from keras import optimizers from keras_preprocessing.image import ImageDataGenerator class Classifier(object): def __init__(self, model_filename: str, weights_filename: str): with open(model_filename, 'r') as f: self.loaded_model = model_from_json(f.read()) self.loaded_model.load_weights(weights_filename) self.datagen = ImageDataGenerator(rescale=1. / 255., validation_split=0.25) self.loaded_model.compile(optimizers.rmsprop(lr=0.0005, decay=1e-6), loss="categorical_crossentropy", metrics=["accuracy"]) self.loaded_model.summary() @staticmethod def create_spectrogram(wav_filename: str) -> Tuple[str, str]: matplotlib.pyplot.interactive(False) clip, sample_rate = librosa.load(wav_filename, sr=None) fig = matplotlib.pyplot.figure(figsize=[0.72, 0.72]) ax = fig.add_subplot(111) ax.axes.get_xaxis().set_visible(False) ax.axes.get_yaxis().set_visible(False) ax.set_frame_on(False) spectogram = librosa.feature.melspectrogram(y=clip, sr=sample_rate) librosa.display.specshow(librosa.power_to_db(spectogram, ref=numpy.max)) target_dir = tempfile.mkdtemp() # Change extension to jpg... mert 110% biztos vagyok benne hogy a keras nem bírná beolvasni máshogy file_name = os.path.join(target_dir, "unknown", f"{wav_filename[:-4]}.jpg") matplotlib.pyplot.savefig(file_name, dpi=400, bbox_inches='tight', pad_inches=0) matplotlib.pyplot.close() fig.clf() matplotlib.pyplot.close(fig) matplotlib.pyplot.close('all') return target_dir, file_name def _run_predictor(self, directory: str) -> list: predict_generator = self.datagen.flow_from_directory( directory=directory, batch_size=128, seed=42, shuffle=False, class_mode="categorical", target_size=(64, 64)) prediction = self.loaded_model.predict_generator(predict_generator, steps=1) predicted_class_indices = numpy.argmax(prediction, axis=1) labels = { 'anser': 0, 'columba': 1, 'hirundo': 2, 'passer': 3, 'sturnus': 4, 'turdus': 5, 'upupa': 6 } labels = dict((v, k) for k, v in labels.items()) predictions = [labels[k] for k in predicted_class_indices] return predictions def predict(self, wav_filename: str) -> list: directory, _ = self.create_spectrogram(wav_filename) result = self._run_predictor(directory) shutil.rmtree(directory) # The image is no longer needed return result