cnn-classification-service/cnn_classification_service/cnn_clasifier.py

88 lines
2.9 KiB
Python

from typing import Tuple
import tempfile
import os
import os.path
import shutil
import librosa
import librosa.display
import numpy
import matplotlib.pyplot
from keras.models import model_from_json
from keras import optimizers
from keras_preprocessing.image import ImageDataGenerator
class Classifier(object):
def __init__(self, model_filename: str, weights_filename: str):
with open(model_filename, 'r') as f:
self.loaded_model = model_from_json(f.read())
self.loaded_model.load_weights(weights_filename)
self.datagen = ImageDataGenerator(rescale=1. / 255., validation_split=0.25)
self.loaded_model.compile(optimizers.rmsprop(lr=0.0005, decay=1e-6), loss="categorical_crossentropy",
metrics=["accuracy"])
self.loaded_model.summary()
@staticmethod
def create_spectrogram(wav_filename: str) -> Tuple[str, str]:
matplotlib.pyplot.interactive(False)
clip, sample_rate = librosa.load(wav_filename, sr=None)
fig = matplotlib.pyplot.figure(figsize=[0.72, 0.72])
ax = fig.add_subplot(111)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
ax.set_frame_on(False)
spectogram = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
librosa.display.specshow(librosa.power_to_db(spectogram, ref=numpy.max))
target_dir = tempfile.mkdtemp()
# Change extension to jpg... mert 110% biztos vagyok benne hogy a keras nem bírná beolvasni máshogy
file_name = os.path.join(target_dir, "unknown", f"{wav_filename[:-4]}.jpg")
matplotlib.pyplot.savefig(file_name, dpi=400, bbox_inches='tight', pad_inches=0)
matplotlib.pyplot.close()
fig.clf()
matplotlib.pyplot.close(fig)
matplotlib.pyplot.close('all')
return target_dir, file_name
def _run_predictor(self, directory: str) -> list:
predict_generator = self.datagen.flow_from_directory(
directory=directory,
batch_size=128,
seed=42,
shuffle=False,
class_mode="categorical",
target_size=(64, 64))
prediction = self.loaded_model.predict_generator(predict_generator, steps=1)
predicted_class_indices = numpy.argmax(prediction, axis=1)
labels = {
'anser': 0,
'columba': 1,
'hirundo': 2,
'passer': 3,
'sturnus': 4,
'turdus': 5,
'upupa': 6
}
labels = dict((v, k) for k, v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
return predictions
def predict(self, wav_filename: str) -> list:
directory, _ = self.create_spectrogram(wav_filename)
result = self._run_predictor(directory)
shutil.rmtree(directory) # The image is no longer needed
return result