Classifying valence bond structures
Test of a simple image classifier, using PyTorch and TensorFlow
Based on the TensoFlow turorial: https://www.tensorflow.org/tutorials/images/classification
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
import os
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from pathlib import *
import pathlib
dataset = "Valenzstrich.tar"
dataset_path = os.path.abspath("./" + dataset)
data_dir = tf.keras.utils.get_file('Valenzstrich.tar', origin='file:\\' + dataset_path, extract=True)
data_dir = pathlib.Path(data_dir).with_suffix('')
print(data_dir)
C:\Users\Marcus\.keras\datasets\Valenzstrich
Outside of Jupyter Notebooks it must be “file://”
image_count = len(list(data_dir.glob('*/*.png')))
print(image_count)
2386
alkohols = list(data_dir.glob('Alkohole/*'))
PIL.Image.open(str(alkohols[0]))
PIL.Image.open(str(alkohols[1]))
amine = list(data_dir.glob('Amine/*'))
PIL.Image.open(str(amine[0]))
PIL.Image.open(str(amine[1]))
Next, load these images off disk using the helpful tf.keras.utils.image_dataset_from_directory utility. This will take you from a directory of images on disk to a tf.data.Dataset in just a couple lines of code. If you like, you can also write your own data loading code from scratch by visiting the Load and preprocess images tutorial.
Create a dataset Define some parameters for the loader:
batch_size = 64
img_height = 100
img_width = 100
Use 80% of the images for training and 20% for validation.
train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
Found 2386 files belonging to 9 classes. Using 1909 files for training.
val_ds = tf.keras.utils.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
Found 2386 files belonging to 9 classes. Using 477 files for validation.
class_names = train_ds.class_names
print(class_names)
['Aldehyde', 'Alkane', 'Alkohole', 'Amine', 'Bromide', 'Chloride', 'Ketone', 'Saeuren', 'Thiole']
import matplotlib.pyplot as plt
plt.figure(figsize=(5, 5))
for images, labels in train_ds.take(1):
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(class_names[labels[i]])
plt.axis("off")
for image_batch, labels_batch in train_ds:
print(image_batch.shape)
print(labels_batch.shape)
break
(64, 100, 100, 3) (64,)
The image_batch is a tensor of the shape (64, 100, 100, 3). This is a batch of 64 images of shape 100x100x3 (the last dimension refers to color channels RGB). The label_batch is a tensor of the shape (64,), these are corresponding labels to the 64 images.
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
normalization_layer = layers.Rescaling(1./255)
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixel values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))
0.0 1.0
Create the model
num_classes = len(class_names)
model = Sequential([
layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= rescaling_1 (Rescaling) (None, 100, 100, 3) 0 conv2d (Conv2D) (None, 100, 100, 16) 448 max_pooling2d (MaxPooling2D (None, 50, 50, 16) 0 ) conv2d_1 (Conv2D) (None, 50, 50, 32) 4640 max_pooling2d_1 (MaxPooling (None, 25, 25, 32) 0 2D) conv2d_2 (Conv2D) (None, 25, 25, 64) 18496 max_pooling2d_2 (MaxPooling (None, 12, 12, 64) 0 2D) flatten (Flatten) (None, 9216) 0 dense (Dense) (None, 128) 1179776 dense_1 (Dense) (None, 9) 1161 ================================================================= Total params: 1,204,521 Trainable params: 1,204,521 Non-trainable params: 0 _________________________________________________________________
epochs=10
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs
)
Epoch 1/10 30/30 [==============================] - 5s 140ms/step - loss: 2.2010 - accuracy: 0.1676 - val_loss: 2.1495 - val_accuracy: 0.1929 Epoch 2/10 30/30 [==============================] - 4s 130ms/step - loss: 2.0907 - accuracy: 0.2090 - val_loss: 2.0820 - val_accuracy: 0.2222 Epoch 3/10 30/30 [==============================] - 4s 127ms/step - loss: 1.9031 - accuracy: 0.2996 - val_loss: 1.9083 - val_accuracy: 0.3040 Epoch 4/10 30/30 [==============================] - 4s 138ms/step - loss: 1.6679 - accuracy: 0.3855 - val_loss: 1.8077 - val_accuracy: 0.2956 Epoch 5/10 30/30 [==============================] - 4s 131ms/step - loss: 1.3560 - accuracy: 0.4976 - val_loss: 1.4822 - val_accuracy: 0.4172 Epoch 6/10 30/30 [==============================] - 5s 152ms/step - loss: 1.0244 - accuracy: 0.6260 - val_loss: 1.2118 - val_accuracy: 0.5283 Epoch 7/10 30/30 [==============================] - 4s 147ms/step - loss: 0.7442 - accuracy: 0.7213 - val_loss: 1.0752 - val_accuracy: 0.5262 Epoch 8/10 30/30 [==============================] - 4s 143ms/step - loss: 0.6019 - accuracy: 0.7664 - val_loss: 0.9610 - val_accuracy: 0.6038 Epoch 9/10 30/30 [==============================] - 4s 133ms/step - loss: 0.4826 - accuracy: 0.8130 - val_loss: 0.9414 - val_accuracy: 0.6415 Epoch 10/10 30/30 [==============================] - 4s 143ms/step - loss: 0.4068 - accuracy: 0.8355 - val_loss: 0.9346 - val_accuracy: 0.6164
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
print(os.getcwd())
molekul_path = os.path.join(os.getcwd(), "Molekul.png")
img = tf.keras.utils.load_img(
molekul_path, target_size=(img_height, img_width)
)
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])
print(
"This image most likely belongs to {} with a {:.2f} percent confidence."
.format(class_names[np.argmax(score)], 100 * np.max(score))
)
PIL.Image.open(molekul_path)
C:\Users\Marcus 1/1 [==============================] - 0s 114ms/step This image most likely belongs to Amine with a 83.97 percent confidence.
print(os.getcwd())
molekul_path = os.path.join(os.getcwd(), "Molekul_2.png")
img = tf.keras.utils.load_img(
molekul_path, target_size=(img_height, img_width)
)
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])
print(
"This image most likely belongs to {} with a {:.2f} percent confidence."
.format(class_names[np.argmax(score)], 100 * np.max(score))
)
PIL.Image.open(molekul_path)
C:\Users\Marcus 1/1 [==============================] - 0s 16ms/step This image most likely belongs to Chloride with a 92.19 percent confidence.
print(os.getcwd())
molekul_path = os.path.join(os.getcwd(), "Molekul_3.png")
img = tf.keras.utils.load_img(
molekul_path, target_size=(img_height, img_width)
)
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])
print(
"This image most likely belongs to {} with a {:.2f} percent confidence."
.format(class_names[np.argmax(score)], 100 * np.max(score))
)
PIL.Image.open(molekul_path)
C:\Users\Marcus 1/1 [==============================] - 0s 16ms/step This image most likely belongs to Saeuren with a 99.65 percent confidence.