Python源码示例:keras.utils.to_categorical()

示例1
def create_sequences(tokenizer, max_length, captions_list, image):
	# X1 : input for image features
	# X2 : input for text features
	# y  : output word
	X1, X2, y = list(), list(), list()
	vocab_size = len(tokenizer.word_index) + 1
	# Walk through each caption for the image
	for caption in captions_list:
		# Encode the sequence
		seq = tokenizer.texts_to_sequences([caption])[0]
		# Split one sequence into multiple X,y pairs
		for i in range(1, len(seq)):
			# Split into input and output pair
			in_seq, out_seq = seq[:i], seq[i]
			# Pad input sequence
			in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
			# Encode output sequence
			out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
			# Store
			X1.append(image)
			X2.append(in_seq)
			y.append(out_seq)
	return X1, X2, y

# Data generator, intended to be used in a call to model.fit_generator() 
示例2
def load_dataset():
    # Load the dataset from Keras
    from keras.datasets import cifar10
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()

    # Preprocessing the dataset
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train= preprocess_input(x_train)
    x_test= preprocess_input(x_test)
    x_train = x_train.reshape(-1, 32, 32, 3).astype('float32') 
    x_test = x_test.reshape(-1, 32, 32, 3).astype('float32')
    y_train = to_categorical(y_train.astype('float32'))
    y_test = to_categorical(y_test.astype('float32'))

    return (x_train, y_train), (x_test, y_test) 
示例3
def __getitem__(self, index):
        """
        takes an index (batch number) and returns one batch of self.batch_size
        :param index:
        :return:
        """
        # index is taken care of by the Sequencer inherited
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # fetch labels for the batch
        y_int = np.empty((self.batch_size, 1), dtype='int')
        for tt in np.arange(self.batch_size):
            y_int[tt] = int(self.labels[indexes[tt]])
        y_cat = to_categorical(y_int, num_classes=self.n_classes)

        # fetch features for the batch and adjust format to input CNN
        # (batch_size, 1, time, freq) for channels_first
        features = self.features[indexes, np.newaxis]
        return features, y_cat 
示例4
def __iter__(self, random=False):
        batch_token_ids, batch_segment_ids, batch_labels = [], [], []
        for is_end, (text, label) in self.sample(random):
            token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)
            batch_labels.append(label)
            if len(batch_token_ids) == self.batch_size or is_end:
                batch_token_ids = sequence_padding(batch_token_ids)
                batch_segment_ids = sequence_padding(batch_segment_ids)
                batch_labels = to_categorical(batch_labels, num_classes)
                yield [batch_token_ids, batch_segment_ids], batch_labels
                batch_token_ids, batch_segment_ids, batch_labels = [], [], []


# 转换数据集 
示例5
def get_train_data(input_img_h5, input_ques_h5, data_limit=215359):
    img_data = h5py.File(input_img_h5)
    ques_data = h5py.File(input_ques_h5)
  
    img_data = np.array(img_data['images_train'])
    img_pos_train = ques_data['img_pos_train'][:data_limit]
    train_img_data = np.array([img_data[_-1,:] for _ in img_pos_train])
    # Normalizing images
    tem = np.sqrt(np.sum(np.multiply(train_img_data, train_img_data), axis=1))
    train_img_data = np.divide(train_img_data, np.transpose(np.tile(tem,(4096,1))))

    #shifting padding to left side
    ques_train = np.array(ques_data['ques_train'])[:data_limit, :]
    ques_length_train = np.array(ques_data['ques_length_train'])[:data_limit]
    ques_train = right_align(ques_train, ques_length_train)

    train_X = [train_img_data, ques_train]
    # NOTE should've consturcted one-hots using exhausitve list of answers, cause some answers may not be in dataset
    # To temporarily rectify this, all those answer indices is set to 1 in validation set
    train_y = to_categorical(ques_data['answers'])[:data_limit, :]

    return train_X, train_y 
示例6
def get_train_data(input_img_h5, input_ques_h5, data_limit=215359):
    img_data = h5py.File(input_img_h5)
    ques_data = h5py.File(input_ques_h5)
  
    img_data = np.array(img_data['images_train'])
    img_pos_train = ques_data['img_pos_train'][:data_limit]
    train_img_data = np.array([img_data[_-1,:] for _ in img_pos_train])
    # Normalizing images
    tem = np.sqrt(np.sum(np.multiply(train_img_data, train_img_data), axis=1))
    train_img_data = np.divide(train_img_data, np.transpose(np.tile(tem,(4096,1))))

    #shifting padding to left side
    ques_train = np.array(ques_data['ques_train'])[:data_limit, :]
    ques_length_train = np.array(ques_data['ques_length_train'])[:data_limit]
    ques_train = right_align(ques_train, ques_length_train)

    train_X = [train_img_data, ques_train]
    # NOTE should've consturcted one-hots using exhausitve list of answers, cause some answers may not be in dataset
    # To temporarily rectify this, all those answer indices is set to 1 in validation set
    train_y = to_categorical(ques_data['answers'])[:data_limit, :]

    return train_X, train_y 
示例7
def load_sents_from_file(self, file_path, encoding):
        with open(file_path, encoding=encoding) as f:
            sent, chunk = [], []
            for line in f:
                line = line[:-1]
                chars, tags = line.split(self.sent_delimiter)
                sent.append(chars.split(self.word_delimiter))
                chunk.append(tags.split(self.word_delimiter))
                if len(sent) >= self.batch_size:
                    sent = self.src_tokenizer.texts_to_sequences(sent)
                    chunk = self.tgt_tokenizer.texts_to_sequences(chunk)
                    sent, chunk = self._pad_seq(sent, chunk)
                    if not self.sparse_target:
                        chunk = to_categorical(chunk, num_classes=self.tgt_vocab_size + 1)
                    yield sent, chunk
                    sent, chunk = [], [] 
示例8
def generator_from_data(self, X, Y):
        steps = 0
        total_size = X.shape[0]
        while True:
            if steps >= self.shuffle_batch:
                indicates = list(range(total_size))
                np.random.shuffle(indicates)
                X = X[indicates]
                Y = Y[indicates]
                steps = 0
            sample_index = np.random.randint(0, total_size - self.batch_size)
            ret_x = X[sample_index:sample_index + self.batch_size]
            ret_y = Y[sample_index:sample_index + self.batch_size]

            if not self.sparse_target:
                ret_y = to_categorical(ret_y, num_classes=self.tgt_vocab_size + 1)
            else:
                ret_y = np.expand_dims(ret_y, 2)
            yield ret_x, ret_y
            steps += 1 
示例9
def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label) 
示例10
def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label) 
示例11
def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label) 
示例12
def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label) 
示例13
def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label) 
示例14
def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label) 
示例15
def test_to_categorical():
    num_classes = 5
    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
    expected_shapes = [(1, num_classes),
                       (3, num_classes),
                       (4, 3, num_classes),
                       (5, 4, 3, num_classes),
                       (3, num_classes),
                       (3, 2, num_classes)]
    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
    one_hots = [to_categorical(label, num_classes) for label in labels]
    for label, one_hot, expected_shape in zip(labels,
                                              one_hots,
                                              expected_shapes):
        # Check shape
        assert one_hot.shape == expected_shape
        # Make sure there are only 0s and 1s
        assert np.array_equal(one_hot, one_hot.astype(bool))
        # Make sure there is exactly one 1 in a row
        assert np.all(one_hot.sum(axis=-1) == 1)
        # Get original labels back from one hots
        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label) 
示例16
def label_tagging(data_x_s, tag_label2index, len_max=32):
    """
        根据类别字典dict、语料y和最大文本长度l,padding和to_categorical
    :param data_x_s: list
    :param tag_label2index:dict 
    :param len_max: int
    :return: list
    """
    tag_labels = []
    for data_x in data_x_s:
        if len(data_x) <= len_max-2:
            tag_labels.append([tag_label2index['O']] + [tag_label2index[i] for i in data_x] + [tag_label2index['O'] for i in range(len_max - len(data_x) - 1)])
        else:
            tag_labels.append([tag_label2index['O']] + [tag_label2index[i] for i in data_x[:len_max-1]] + [tag_label2index['O']])

    tag_labels_pad = pad_sequences(sequences=tag_labels, maxlen=len_max, dtype='int32',
                                padding='post', truncating='post', value=tag_label2index['O'])
    one_hot_y = to_categorical(tag_labels_pad, num_classes=len(tag_label2index))

    label_num = len(set(["".join(str(i)) for i in tag_labels]))
    # tag_labels_pad_to = to_categorical(y=tag_labels_pad.tolist(), num_classes=label_num)
    return one_hot_y, label_num 
示例17
def _data_generator(self, batch_samples: List[dict]) -> Tuple[np.array, np.array]:
        """Generates data from samples in specified batch."""
        #  initialize images and labels tensors for faster processing
        dims = self.img_crop_dims if self.train == True else self.img_load_dims
        X = np.empty((len(batch_samples), *dims, 3))
        y = np.empty((len(batch_samples), self.n_classes))

        for i, sample in enumerate(batch_samples):
            # load and randomly augment image
            img_file = self.image_dir / sample['image_id']
            img = np.asarray(load_image(img_file, self.img_load_dims))
            if self.train == True:
                img = random_crop(img, self.img_crop_dims)
            X[i,] = img

            # TODO: more efficient by preprocessing
            y[i,] = to_categorical([sample['label']], num_classes=self.n_classes)

        # apply basenet specific preprocessing
        # input is 4D numpy array of RGB values within [0, 255]
        X = self.basenet_preprocess(X)

        return X, y 
示例18
def sample_generator_input(self, batch_size):
        # Generator inputs
        sampled_noise = np.random.normal(0, 1, (batch_size, 62))
        sampled_labels = np.random.randint(0, self.num_classes, batch_size).reshape(-1, 1)
        sampled_labels = to_categorical(sampled_labels, num_classes=self.num_classes)

        return sampled_noise, sampled_labels 
示例19
def sample_images(self, epoch):
        r, c = 10, 10

        fig, axs = plt.subplots(r, c)
        for i in range(c):
            sampled_noise, _ = self.sample_generator_input(c)
            label = to_categorical(np.full(fill_value=i, shape=(r,1)), num_classes=self.num_classes)
            gen_input = np.concatenate((sampled_noise, label), axis=1)
            gen_imgs = self.generator.predict(gen_input)
            gen_imgs = 0.5 * gen_imgs + 0.5
            for j in range(r):
                axs[j,i].imshow(gen_imgs[j,:,:,0], cmap='gray')
                axs[j,i].axis('off')
        fig.savefig("images/%d.png" % epoch)
        plt.close() 
示例20
def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch:(idx + 1) * self.batch]
        batch_c = self.c[idx * self.batch:(idx + 1) * self.batch]
        batch_y = self.y[idx * self.batch:(idx + 1) * self.batch]

        if self.task == "category":
            return self.tensorize(batch_x, batch_c), to_categorical(batch_y)
        elif self.task == "binary":
            return self.tensorize(batch_x, batch_c), np.array(batch_y, dtype=int)
        elif self.task == "regression":
            return self.tensorize(batch_x, batch_c), np.array(batch_y, dtype=float)
        elif self.task == "input_only":
            return self.tensorize(batch_x, batch_c) 
示例21
def ValAcc():
	trainingData = getValData()
	labels = getValLabels()
	labels = to_categorical(labels,num_classes=49)
	print(checkAcc(model,trainingData,labels)) 
示例22
def ValAcc():
	trainingData = getValData()
	labels = getValLabels()
	labels = to_categorical(labels,num_classes=49)
	print(checkAcc(model,trainingData,labels)) 
示例23
def ValAcc():
	trainingData = getValData()
	labels = getValLabels()
	labels = to_categorical(labels,num_classes=49)
	print(checkAcc(model,trainingData,labels)) 
示例24
def ValAcc():
	trainingData = getValData()
	labels = getValLabels()
	temp_shape = trainingData.shape 
	trainingData = trainingData.reshape(temp_shape[0],300,-1)
	labels = to_categorical(labels,num_classes=49)
	print(checkAcc(model,trainingData,labels)) 
示例25
def ValAcc():
	trainingData = getValData()
	labels = getValLabels()
	labels = to_categorical(labels,num_classes=49)
	print(checkAcc(model,trainingData,labels)) 
示例26
def ValAcc():
	trainingData = getValData()
	labels = getValLabels()
	labels = to_categorical(labels,num_classes=49)
	print(checkAcc(model,trainingData,labels)) 
示例27
def split(self, random_state=0):
        (data_train, data_test) = train_test_split(self._data, stratify=self._data[:,-1], random_state=random_state)

        x_train = data_train[:,0:-1]
        x_test = data_test[:, 0:-1]
        y_train = data_train[:, -1]
        y_test = data_test[:, -1]

        y_train_b = to_categorical(y_train)
        y_test_b = to_categorical(y_test)

        return (self._data, x_train, x_test, y_train_b, y_test_b) 
示例28
def test_keras_callback(self):
        expected_score = f1_score(self.y_true, self.y_pred)
        tokenizer = Tokenizer(lower=False)
        tokenizer.fit_on_texts(self.y_true)
        maxlen = max((len(row) for row in self.y_true))

        def prepare(y, padding):
            indexes = tokenizer.texts_to_sequences(y)
            padded = pad_sequences(indexes, maxlen=maxlen, padding=padding, truncating=padding)
            categorical = to_categorical(padded)
            return categorical

        for padding in ('pre', 'post'):
            callback = F1Metrics(id2label=tokenizer.index_word)
            y_true_cat = prepare(self.y_true, padding)
            y_pred_cat = prepare(self.y_pred, padding)

            input_shape = (1,)
            layer = Lambda(lambda _: constant(y_pred_cat), input_shape=input_shape)
            fake_model = Sequential(layers=[layer])
            callback.set_model(fake_model)

            X = numpy.zeros((y_true_cat.shape[0], 1))

            # Verify that the callback translates sequences correctly by itself
            y_true_cb, y_pred_cb = callback.predict(X, y_true_cat)
            self.assertEqual(y_pred_cb, self.y_pred)
            self.assertEqual(y_true_cb, self.y_true)

            # Verify that the callback stores the correct number in logs
            fake_model.compile(optimizer='adam', loss='categorical_crossentropy')
            history = fake_model.fit(x=X, batch_size=y_true_cat.shape[0], y=y_true_cat,
                                     validation_data=(X, y_true_cat),
                                     callbacks=[callback])
            actual_score = history.history['f1'][0]
            self.assertAlmostEqual(actual_score, expected_score) 
示例29
def load_train_data(self):
        mydata = DataProcess(self.img_rows, self.img_cols)
        imgs_train, imgs_mask_train = mydata.load_my_train_data()
        imgs_mask_train = to_categorical(imgs_mask_train, num_classes=2)
        return imgs_train, imgs_mask_train 
示例30
def geometric_transform(image):
    image = np.reshape(image, (32, 32, 3))
    labels = np.empty((proxy_labels,), dtype='uint8')
    images = np.empty((proxy_labels, 32, 32, 3), dtype='float32')
    for i in range(proxy_labels):
        if i <= 3:
            t = np.rot90(image, i)
        elif i == 4:
            t = np.fliplr(image)
        else:
            t = np.flipud(image)
        images[i] = t
        labels[i] = i
    return (images, to_categorical(labels))