Python源码示例:keras.utils.to_categorical()
示例1
def create_sequences(tokenizer, max_length, captions_list, image):
# X1 : input for image features
# X2 : input for text features
# y : output word
X1, X2, y = list(), list(), list()
vocab_size = len(tokenizer.word_index) + 1
# Walk through each caption for the image
for caption in captions_list:
# Encode the sequence
seq = tokenizer.texts_to_sequences([caption])[0]
# Split one sequence into multiple X,y pairs
for i in range(1, len(seq)):
# Split into input and output pair
in_seq, out_seq = seq[:i], seq[i]
# Pad input sequence
in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
# Encode output sequence
out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
# Store
X1.append(image)
X2.append(in_seq)
y.append(out_seq)
return X1, X2, y
# Data generator, intended to be used in a call to model.fit_generator()
示例2
def load_dataset():
# Load the dataset from Keras
from keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# Preprocessing the dataset
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train= preprocess_input(x_train)
x_test= preprocess_input(x_test)
x_train = x_train.reshape(-1, 32, 32, 3).astype('float32')
x_test = x_test.reshape(-1, 32, 32, 3).astype('float32')
y_train = to_categorical(y_train.astype('float32'))
y_test = to_categorical(y_test.astype('float32'))
return (x_train, y_train), (x_test, y_test)
示例3
def __getitem__(self, index):
"""
takes an index (batch number) and returns one batch of self.batch_size
:param index:
:return:
"""
# index is taken care of by the Sequencer inherited
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
# fetch labels for the batch
y_int = np.empty((self.batch_size, 1), dtype='int')
for tt in np.arange(self.batch_size):
y_int[tt] = int(self.labels[indexes[tt]])
y_cat = to_categorical(y_int, num_classes=self.n_classes)
# fetch features for the batch and adjust format to input CNN
# (batch_size, 1, time, freq) for channels_first
features = self.features[indexes, np.newaxis]
return features, y_cat
示例4
def __iter__(self, random=False):
batch_token_ids, batch_segment_ids, batch_labels = [], [], []
for is_end, (text, label) in self.sample(random):
token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
batch_token_ids.append(token_ids)
batch_segment_ids.append(segment_ids)
batch_labels.append(label)
if len(batch_token_ids) == self.batch_size or is_end:
batch_token_ids = sequence_padding(batch_token_ids)
batch_segment_ids = sequence_padding(batch_segment_ids)
batch_labels = to_categorical(batch_labels, num_classes)
yield [batch_token_ids, batch_segment_ids], batch_labels
batch_token_ids, batch_segment_ids, batch_labels = [], [], []
# 转换数据集
示例5
def get_train_data(input_img_h5, input_ques_h5, data_limit=215359):
img_data = h5py.File(input_img_h5)
ques_data = h5py.File(input_ques_h5)
img_data = np.array(img_data['images_train'])
img_pos_train = ques_data['img_pos_train'][:data_limit]
train_img_data = np.array([img_data[_-1,:] for _ in img_pos_train])
# Normalizing images
tem = np.sqrt(np.sum(np.multiply(train_img_data, train_img_data), axis=1))
train_img_data = np.divide(train_img_data, np.transpose(np.tile(tem,(4096,1))))
#shifting padding to left side
ques_train = np.array(ques_data['ques_train'])[:data_limit, :]
ques_length_train = np.array(ques_data['ques_length_train'])[:data_limit]
ques_train = right_align(ques_train, ques_length_train)
train_X = [train_img_data, ques_train]
# NOTE should've consturcted one-hots using exhausitve list of answers, cause some answers may not be in dataset
# To temporarily rectify this, all those answer indices is set to 1 in validation set
train_y = to_categorical(ques_data['answers'])[:data_limit, :]
return train_X, train_y
示例6
def get_train_data(input_img_h5, input_ques_h5, data_limit=215359):
img_data = h5py.File(input_img_h5)
ques_data = h5py.File(input_ques_h5)
img_data = np.array(img_data['images_train'])
img_pos_train = ques_data['img_pos_train'][:data_limit]
train_img_data = np.array([img_data[_-1,:] for _ in img_pos_train])
# Normalizing images
tem = np.sqrt(np.sum(np.multiply(train_img_data, train_img_data), axis=1))
train_img_data = np.divide(train_img_data, np.transpose(np.tile(tem,(4096,1))))
#shifting padding to left side
ques_train = np.array(ques_data['ques_train'])[:data_limit, :]
ques_length_train = np.array(ques_data['ques_length_train'])[:data_limit]
ques_train = right_align(ques_train, ques_length_train)
train_X = [train_img_data, ques_train]
# NOTE should've consturcted one-hots using exhausitve list of answers, cause some answers may not be in dataset
# To temporarily rectify this, all those answer indices is set to 1 in validation set
train_y = to_categorical(ques_data['answers'])[:data_limit, :]
return train_X, train_y
示例7
def load_sents_from_file(self, file_path, encoding):
with open(file_path, encoding=encoding) as f:
sent, chunk = [], []
for line in f:
line = line[:-1]
chars, tags = line.split(self.sent_delimiter)
sent.append(chars.split(self.word_delimiter))
chunk.append(tags.split(self.word_delimiter))
if len(sent) >= self.batch_size:
sent = self.src_tokenizer.texts_to_sequences(sent)
chunk = self.tgt_tokenizer.texts_to_sequences(chunk)
sent, chunk = self._pad_seq(sent, chunk)
if not self.sparse_target:
chunk = to_categorical(chunk, num_classes=self.tgt_vocab_size + 1)
yield sent, chunk
sent, chunk = [], []
示例8
def generator_from_data(self, X, Y):
steps = 0
total_size = X.shape[0]
while True:
if steps >= self.shuffle_batch:
indicates = list(range(total_size))
np.random.shuffle(indicates)
X = X[indicates]
Y = Y[indicates]
steps = 0
sample_index = np.random.randint(0, total_size - self.batch_size)
ret_x = X[sample_index:sample_index + self.batch_size]
ret_y = Y[sample_index:sample_index + self.batch_size]
if not self.sparse_target:
ret_y = to_categorical(ret_y, num_classes=self.tgt_vocab_size + 1)
else:
ret_y = np.expand_dims(ret_y, 2)
yield ret_x, ret_y
steps += 1
示例9
def test_to_categorical():
num_classes = 5
shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
expected_shapes = [(1, num_classes),
(3, num_classes),
(4, 3, num_classes),
(5, 4, 3, num_classes),
(3, num_classes),
(3, 2, num_classes)]
labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
one_hots = [to_categorical(label, num_classes) for label in labels]
for label, one_hot, expected_shape in zip(labels,
one_hots,
expected_shapes):
# Check shape
assert one_hot.shape == expected_shape
# Make sure there are only 0s and 1s
assert np.array_equal(one_hot, one_hot.astype(bool))
# Make sure there is exactly one 1 in a row
assert np.all(one_hot.sum(axis=-1) == 1)
# Get original labels back from one hots
assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)
示例10
def test_to_categorical():
num_classes = 5
shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
expected_shapes = [(1, num_classes),
(3, num_classes),
(4, 3, num_classes),
(5, 4, 3, num_classes),
(3, num_classes),
(3, 2, num_classes)]
labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
one_hots = [to_categorical(label, num_classes) for label in labels]
for label, one_hot, expected_shape in zip(labels,
one_hots,
expected_shapes):
# Check shape
assert one_hot.shape == expected_shape
# Make sure there are only 0s and 1s
assert np.array_equal(one_hot, one_hot.astype(bool))
# Make sure there is exactly one 1 in a row
assert np.all(one_hot.sum(axis=-1) == 1)
# Get original labels back from one hots
assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)
示例11
def test_to_categorical():
num_classes = 5
shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
expected_shapes = [(1, num_classes),
(3, num_classes),
(4, 3, num_classes),
(5, 4, 3, num_classes),
(3, num_classes),
(3, 2, num_classes)]
labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
one_hots = [to_categorical(label, num_classes) for label in labels]
for label, one_hot, expected_shape in zip(labels,
one_hots,
expected_shapes):
# Check shape
assert one_hot.shape == expected_shape
# Make sure there are only 0s and 1s
assert np.array_equal(one_hot, one_hot.astype(bool))
# Make sure there is exactly one 1 in a row
assert np.all(one_hot.sum(axis=-1) == 1)
# Get original labels back from one hots
assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)
示例12
def test_to_categorical():
num_classes = 5
shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
expected_shapes = [(1, num_classes),
(3, num_classes),
(4, 3, num_classes),
(5, 4, 3, num_classes),
(3, num_classes),
(3, 2, num_classes)]
labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
one_hots = [to_categorical(label, num_classes) for label in labels]
for label, one_hot, expected_shape in zip(labels,
one_hots,
expected_shapes):
# Check shape
assert one_hot.shape == expected_shape
# Make sure there are only 0s and 1s
assert np.array_equal(one_hot, one_hot.astype(bool))
# Make sure there is exactly one 1 in a row
assert np.all(one_hot.sum(axis=-1) == 1)
# Get original labels back from one hots
assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)
示例13
def test_to_categorical():
num_classes = 5
shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
expected_shapes = [(1, num_classes),
(3, num_classes),
(4, 3, num_classes),
(5, 4, 3, num_classes),
(3, num_classes),
(3, 2, num_classes)]
labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
one_hots = [to_categorical(label, num_classes) for label in labels]
for label, one_hot, expected_shape in zip(labels,
one_hots,
expected_shapes):
# Check shape
assert one_hot.shape == expected_shape
# Make sure there are only 0s and 1s
assert np.array_equal(one_hot, one_hot.astype(bool))
# Make sure there is exactly one 1 in a row
assert np.all(one_hot.sum(axis=-1) == 1)
# Get original labels back from one hots
assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)
示例14
def test_to_categorical():
num_classes = 5
shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
expected_shapes = [(1, num_classes),
(3, num_classes),
(4, 3, num_classes),
(5, 4, 3, num_classes),
(3, num_classes),
(3, 2, num_classes)]
labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
one_hots = [to_categorical(label, num_classes) for label in labels]
for label, one_hot, expected_shape in zip(labels,
one_hots,
expected_shapes):
# Check shape
assert one_hot.shape == expected_shape
# Make sure there are only 0s and 1s
assert np.array_equal(one_hot, one_hot.astype(bool))
# Make sure there is exactly one 1 in a row
assert np.all(one_hot.sum(axis=-1) == 1)
# Get original labels back from one hots
assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)
示例15
def test_to_categorical():
num_classes = 5
shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
expected_shapes = [(1, num_classes),
(3, num_classes),
(4, 3, num_classes),
(5, 4, 3, num_classes),
(3, num_classes),
(3, 2, num_classes)]
labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
one_hots = [to_categorical(label, num_classes) for label in labels]
for label, one_hot, expected_shape in zip(labels,
one_hots,
expected_shapes):
# Check shape
assert one_hot.shape == expected_shape
# Make sure there are only 0s and 1s
assert np.array_equal(one_hot, one_hot.astype(bool))
# Make sure there is exactly one 1 in a row
assert np.all(one_hot.sum(axis=-1) == 1)
# Get original labels back from one hots
assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)
示例16
def label_tagging(data_x_s, tag_label2index, len_max=32):
"""
根据类别字典dict、语料y和最大文本长度l,padding和to_categorical
:param data_x_s: list
:param tag_label2index:dict
:param len_max: int
:return: list
"""
tag_labels = []
for data_x in data_x_s:
if len(data_x) <= len_max-2:
tag_labels.append([tag_label2index['O']] + [tag_label2index[i] for i in data_x] + [tag_label2index['O'] for i in range(len_max - len(data_x) - 1)])
else:
tag_labels.append([tag_label2index['O']] + [tag_label2index[i] for i in data_x[:len_max-1]] + [tag_label2index['O']])
tag_labels_pad = pad_sequences(sequences=tag_labels, maxlen=len_max, dtype='int32',
padding='post', truncating='post', value=tag_label2index['O'])
one_hot_y = to_categorical(tag_labels_pad, num_classes=len(tag_label2index))
label_num = len(set(["".join(str(i)) for i in tag_labels]))
# tag_labels_pad_to = to_categorical(y=tag_labels_pad.tolist(), num_classes=label_num)
return one_hot_y, label_num
示例17
def _data_generator(self, batch_samples: List[dict]) -> Tuple[np.array, np.array]:
"""Generates data from samples in specified batch."""
# initialize images and labels tensors for faster processing
dims = self.img_crop_dims if self.train == True else self.img_load_dims
X = np.empty((len(batch_samples), *dims, 3))
y = np.empty((len(batch_samples), self.n_classes))
for i, sample in enumerate(batch_samples):
# load and randomly augment image
img_file = self.image_dir / sample['image_id']
img = np.asarray(load_image(img_file, self.img_load_dims))
if self.train == True:
img = random_crop(img, self.img_crop_dims)
X[i,] = img
# TODO: more efficient by preprocessing
y[i,] = to_categorical([sample['label']], num_classes=self.n_classes)
# apply basenet specific preprocessing
# input is 4D numpy array of RGB values within [0, 255]
X = self.basenet_preprocess(X)
return X, y
示例18
def sample_generator_input(self, batch_size):
# Generator inputs
sampled_noise = np.random.normal(0, 1, (batch_size, 62))
sampled_labels = np.random.randint(0, self.num_classes, batch_size).reshape(-1, 1)
sampled_labels = to_categorical(sampled_labels, num_classes=self.num_classes)
return sampled_noise, sampled_labels
示例19
def sample_images(self, epoch):
r, c = 10, 10
fig, axs = plt.subplots(r, c)
for i in range(c):
sampled_noise, _ = self.sample_generator_input(c)
label = to_categorical(np.full(fill_value=i, shape=(r,1)), num_classes=self.num_classes)
gen_input = np.concatenate((sampled_noise, label), axis=1)
gen_imgs = self.generator.predict(gen_input)
gen_imgs = 0.5 * gen_imgs + 0.5
for j in range(r):
axs[j,i].imshow(gen_imgs[j,:,:,0], cmap='gray')
axs[j,i].axis('off')
fig.savefig("images/%d.png" % epoch)
plt.close()
示例20
def __getitem__(self, idx):
batch_x = self.x[idx * self.batch:(idx + 1) * self.batch]
batch_c = self.c[idx * self.batch:(idx + 1) * self.batch]
batch_y = self.y[idx * self.batch:(idx + 1) * self.batch]
if self.task == "category":
return self.tensorize(batch_x, batch_c), to_categorical(batch_y)
elif self.task == "binary":
return self.tensorize(batch_x, batch_c), np.array(batch_y, dtype=int)
elif self.task == "regression":
return self.tensorize(batch_x, batch_c), np.array(batch_y, dtype=float)
elif self.task == "input_only":
return self.tensorize(batch_x, batch_c)
示例21
def ValAcc():
trainingData = getValData()
labels = getValLabels()
labels = to_categorical(labels,num_classes=49)
print(checkAcc(model,trainingData,labels))
示例22
def ValAcc():
trainingData = getValData()
labels = getValLabels()
labels = to_categorical(labels,num_classes=49)
print(checkAcc(model,trainingData,labels))
示例23
def ValAcc():
trainingData = getValData()
labels = getValLabels()
labels = to_categorical(labels,num_classes=49)
print(checkAcc(model,trainingData,labels))
示例24
def ValAcc():
trainingData = getValData()
labels = getValLabels()
temp_shape = trainingData.shape
trainingData = trainingData.reshape(temp_shape[0],300,-1)
labels = to_categorical(labels,num_classes=49)
print(checkAcc(model,trainingData,labels))
示例25
def ValAcc():
trainingData = getValData()
labels = getValLabels()
labels = to_categorical(labels,num_classes=49)
print(checkAcc(model,trainingData,labels))
示例26
def ValAcc():
trainingData = getValData()
labels = getValLabels()
labels = to_categorical(labels,num_classes=49)
print(checkAcc(model,trainingData,labels))
示例27
def split(self, random_state=0):
(data_train, data_test) = train_test_split(self._data, stratify=self._data[:,-1], random_state=random_state)
x_train = data_train[:,0:-1]
x_test = data_test[:, 0:-1]
y_train = data_train[:, -1]
y_test = data_test[:, -1]
y_train_b = to_categorical(y_train)
y_test_b = to_categorical(y_test)
return (self._data, x_train, x_test, y_train_b, y_test_b)
示例28
def test_keras_callback(self):
expected_score = f1_score(self.y_true, self.y_pred)
tokenizer = Tokenizer(lower=False)
tokenizer.fit_on_texts(self.y_true)
maxlen = max((len(row) for row in self.y_true))
def prepare(y, padding):
indexes = tokenizer.texts_to_sequences(y)
padded = pad_sequences(indexes, maxlen=maxlen, padding=padding, truncating=padding)
categorical = to_categorical(padded)
return categorical
for padding in ('pre', 'post'):
callback = F1Metrics(id2label=tokenizer.index_word)
y_true_cat = prepare(self.y_true, padding)
y_pred_cat = prepare(self.y_pred, padding)
input_shape = (1,)
layer = Lambda(lambda _: constant(y_pred_cat), input_shape=input_shape)
fake_model = Sequential(layers=[layer])
callback.set_model(fake_model)
X = numpy.zeros((y_true_cat.shape[0], 1))
# Verify that the callback translates sequences correctly by itself
y_true_cb, y_pred_cb = callback.predict(X, y_true_cat)
self.assertEqual(y_pred_cb, self.y_pred)
self.assertEqual(y_true_cb, self.y_true)
# Verify that the callback stores the correct number in logs
fake_model.compile(optimizer='adam', loss='categorical_crossentropy')
history = fake_model.fit(x=X, batch_size=y_true_cat.shape[0], y=y_true_cat,
validation_data=(X, y_true_cat),
callbacks=[callback])
actual_score = history.history['f1'][0]
self.assertAlmostEqual(actual_score, expected_score)
示例29
def load_train_data(self):
mydata = DataProcess(self.img_rows, self.img_cols)
imgs_train, imgs_mask_train = mydata.load_my_train_data()
imgs_mask_train = to_categorical(imgs_mask_train, num_classes=2)
return imgs_train, imgs_mask_train
示例30
def geometric_transform(image):
image = np.reshape(image, (32, 32, 3))
labels = np.empty((proxy_labels,), dtype='uint8')
images = np.empty((proxy_labels, 32, 32, 3), dtype='float32')
for i in range(proxy_labels):
if i <= 3:
t = np.rot90(image, i)
elif i == 4:
t = np.fliplr(image)
else:
t = np.flipud(image)
images[i] = t
labels[i] = i
return (images, to_categorical(labels))