python - 为什么 ANN 验证准确性会波动?

标签 python tensorflow keras neural-network

以下训练曲线是使用用 Python 编写的相同 Tensorflow + Keras 脚本生成的:

enter image description here

  1. RED 线使用五个特征。
  2. 绿线使用七个特征。
  3. 蓝线使用九个特征。

谁能告诉我绿线振荡的可能原因,以便我对脚本进行故障排除?

源代码:

import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Use both gpus for training.


import sys, random
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
import numpy as np
from lxml import etree, objectify


# <editor-fold desc="GPU">
# resolve GPU related issues.
try:
    physical_devices = tf.config.list_physical_devices('GPU') 
    for gpu_instance in physical_devices: 
        tf.config.experimental.set_memory_growth(gpu_instance, True)
except Exception as e:
    pass
# END of try
# </editor-fold>


# <editor-fold desc="Lxml helper">
class LxmlHelper:
    @classmethod
    def objectify_xml(cls, input_path_dir):
        file_dom = etree.parse(input_path_dir)  # parse xml and convert it into DOM
        file_xml_bin = etree.tostring(file_dom, pretty_print=False, encoding="ascii")  # encode DOM into ASCII object
        file_xml_text = file_xml_bin.decode()  # convert binary ASCII object into ASCII text
        objectified_xml = objectify.fromstring(file_xml_text)  # convert text into a Doxygen object
        return objectified_xml
# </editor-fold>


# <editor-fold desc="def encode(letter)">
def encode(letter: str):
    if letter == 'H':
        return [1.0, 0.0, 0.0]
    elif letter == 'E':
        return [0.0, 1.0, 0.0]
    elif letter == 'C':
        return [0.0, 0.0, 1.0]
    elif letter == '-':
        return [0.0, 0.0, 0.0]
# END of function

def encode_string_1(pattern_str: str):
    # Iterate over the string
    one_hot_binary_str = []
    for ch in pattern_str:
        try:
            one_hot_binary_str = one_hot_binary_str + encode(ch)
        except Exception as e:
            print(pattern_str, one_hot_binary_str, ch)
    # END of for loop
    return one_hot_binary_str
# END of function

def encode_string_2(pattern_str: str):
    # Iterate over the string
    one_hot_binary_str = []
    for ch in pattern_str:
        temp_encoded_vect = [encode(ch)]
        one_hot_binary_str = one_hot_binary_str + temp_encoded_vect
    # END of for loop
    return one_hot_binary_str
# END of function
# </editor-fold>


# <editor-fold desc="def load_data()">
def load_data_k(fname: str, class_index: int, feature_start_index: int, **selection):
    """Loads data for training and validation

    :param fname: (``string``) - name of the file with the data
    :param selection: (``kwargs``) - see below
    :return: four tensorflow tensors: training input, training output, validation input and validation output

    :Keyword Arguments:
        * *top_n_lines* (``number``) --
          take top N lines of the input and disregard the rest
        * *random_n_lines* (``number``) --
          take random N lines of the input and disregard the rest
        * *validation_part* (``float``) --
          separate N_lines * given_fraction of the input lines from the training set and use
          them for validation. When the given_fraction = 1.0, then the same input set of
          N_lines is used both for training and validation (this is the default)
    """
    i = 0
    file = open(fname)
    if "top_n_lines" in selection:
        lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
    elif "random_n_lines" in selection:
        tmp_lines = file.readlines()
        lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
    else:
        lines = file.readlines()

    data_x, data_y, data_z = [], [], []
    for l in lines:
        row = l.strip().split()  # return a list of words from the line.
        x = [float(ix) for ix in row[feature_start_index:]]  # convert 3rd to 20th word into a vector of float numbers.
        y = encode(row[class_index])  # convert the 3rd word into binary.
        z = encode_string_1(row[class_index+1])
        data_x.append(x)  # append the vector into 'data_x'
        data_y.append(y)  # append the vector into 'data_y'
        data_z.append(z)  # append the vector into 'data_z'
    # END for l in lines

    num_rows = len(data_x)
    given_fraction = selection.get("validation_part", 1.0)
    if given_fraction > 0.9999:
        valid_x, valid_y, valid_z = data_x, data_y, data_z
    else:
        n = int(num_rows * given_fraction)
        data_x, data_y, data_z = data_x[n:], data_y[n:], data_z[n:]
        valid_x, valid_y, valid_z = data_x[:n], data_y[:n], data_z[:n]
    # END of if-else block

    tx = tf.convert_to_tensor(data_x, np.float32)
    ty = tf.convert_to_tensor(data_y, np.float32)
    tz = tf.convert_to_tensor(data_z, np.float32)
    vx = tf.convert_to_tensor(valid_x, np.float32)
    vy = tf.convert_to_tensor(valid_y, np.float32)
    vz = tf.convert_to_tensor(valid_z, np.float32)

    return tx, ty, tz, vx, vy, vz
# END of the function
# </editor-fold>


# <editor-fold desc="def create_model()">
def create_model(n_hidden_1, n_hidden_2, num_classes, num_features):
    # create the model
    model = Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=(num_features,)))
    model.add(tf.keras.layers.Dense(n_hidden_1, activation='sigmoid'))
    model.add(tf.keras.layers.Dense(n_hidden_2, activation='sigmoid'))
    ###model.add(tf.keras.layers.Dense(n_hidden_3, activation='sigmoid'))
    model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

    # instantiate the optimizer
    opt = keras.optimizers.SGD(learning_rate=LEARNING_RATE)

    # compile the model
    model.compile(
        optimizer=opt,
        loss="categorical_crossentropy",
        metrics="categorical_accuracy"
    )

    # return model
    return model
# </editor-fold>


if __name__ == "__main__":
    # <editor-fold desc="(input/output parameters)">
    my_project_routine = LxmlHelper.objectify_xml("my_project_evaluate.xml")

    # input data
    INPUT_DATA_FILE = str(my_project_routine.input.input_data_file)
    INPUT_PATH = str(my_project_routine.input.input_path)
    CLASS_INDEX = int(my_project_routine.input.class_index)
    FEATURE_INDEX = int(my_project_routine.input.feature_index)

    # output data
    OUTPUT_PATH = str(my_project_routine.output.output_path)
    MODEL_FILE = str(my_project_routine.output.model_file)
    TRAINING_PROGRESS_FILE = str(my_project_routine.output.training_progress_file)

    # Learning parameters
    LEARNING_RATE = float(my_project_routine.training_params.learning_rate)
    EPOCH_SIZE = int(my_project_routine.training_params.epoch_size)
    BATCH_SIZE = int(my_project_routine.training_params.batch_size)
    INPUT_LINES_COUNT = int(my_project_routine.input.input_lines_count)
    VALIDATION_PART = float(my_project_routine.training_params.validation_part)
    SAVE_PERIOD = str(my_project_routine.output.save_period)

    # NN parameters
    HIDDEN_LAYER_1_NEURON_COUNT = int(my_project_routine.hidden_layers.one)
    HIDDEN_LAYER_2_NEURON_COUNT = int(my_project_routine.hidden_layers.two)
    ###HIDDEN_LAYER_3_NEURON_COUNT = int(my_project_routine.hidden_layers.three)
    CLASS_COUNT = int(my_project_routine.class_count)
    FEATURES_COUNT = int(my_project_routine.features_count)

    input_file_path_str = os.path.join(INPUT_PATH, INPUT_DATA_FILE)
    training_progress_file_path_str = os.path.join(OUTPUT_PATH, TRAINING_PROGRESS_FILE)
    model_file_path = os.path.join(OUTPUT_PATH, MODEL_FILE)

    # command-line arg processing
    input_file_name_str = None
    if len(sys.argv) > 1:
        input_file_name_str = sys.argv[1]
    else:
        input_file_name_str = input_file_path_str
    # END of if-else
    # </editor-fold>

    # <editor-fold desc="(load data from file)">
    # load training data from the disk
    train_x, train_y, _, validate_x, validate_y, _ = \
        load_data_k(
            fname=input_file_name_str,
            class_index=CLASS_INDEX,
            feature_start_index=FEATURE_INDEX,
            random_n_lines=INPUT_LINES_COUNT,
            validation_part=VALIDATION_PART
        )

    print("training data size : ", len(train_x))
    print("validation data size : ", len(validate_x))
    # </editor-fold>

    ### STEPS_PER_EPOCH = len(train_x) // BATCH_SIZE
    ### VALIDATION_STEPS = len(validate_x) // BATCH_SIZE

    # <editor-fold desc="(model creation)">
    # load previously saved NN model
    model = None
    try:
        model = keras.models.load_model(model_file_path)
        print("Loading NN model from file.")
        model.summary()
    except Exception as ex:
        print("No NN model found for loading.")
    # END of try-except
    # </editor-fold>

    # <editor-fold desc="(model run)">
    # # if there is no model loaded, create a new model
    if model is None:
        csv_logger = keras.callbacks.CSVLogger(training_progress_file_path_str)

        checkpoint = ModelCheckpoint(
            model_file_path,
            monitor='loss',
            verbose=1,
            save_best_only=True,
            mode='auto',
            save_freq='epoch'
        )

        callbacks_vector = [
            csv_logger,
            checkpoint
        ]

        # Set mirror strategy
        #strategy = tf.distribute.MirroredStrategy(devices=["/device:GPU:0","/device:GPU:1"])

        #with strategy.scope():
        print("New NN model created.")
        # create sequential NN model
        model = create_model(
            n_hidden_1=HIDDEN_LAYER_1_NEURON_COUNT,
            n_hidden_2=HIDDEN_LAYER_2_NEURON_COUNT,
            ##n_hidden_3=HIDDEN_LAYER_3_NEURON_COUNT,
            num_classes=CLASS_COUNT,
            num_features=FEATURES_COUNT
        )

        # Train the model with the new callback
        history = model.fit(
                train_x, train_y,
                validation_data=(validate_x, validate_y),
                batch_size=BATCH_SIZE,
                epochs=EPOCH_SIZE,
                callbacks=[callbacks_vector],
                shuffle=True,
                verbose=2
            )

        print(history.history.keys())
        # END of ... with
    # END of ... if
    # </editor-fold>

绘图脚本

import os
from argparse import ArgumentParser
import random
from typing import List

import matplotlib.pyplot as plt
import numpy as np
import math
import sys
import datetime


class Quad:
    def __init__(self, x_vector, y_vector, color_char, label_str):
        self.__x_vector = x_vector
        self.__y_vector = y_vector
        self.__color_char = color_char
        self.__label_str = label_str

    def get_x_vector(self):
        return self.__x_vector

    def get_y_vector(self):
        return self.__y_vector

    def get_color_char(self):
        return self.__color_char

    def get_label_str(self):
        return self.__label_str


class HecaPlotClass:
    def __init__(self):
        self.__x_label_str: str = None
        self.__y_label_str: str = None
        self.__title_str: str = None
        self.__trio_vector: List[Quad] = []
        self.__plotter = plt

    @property
    def x_label_str(self):
        return self.__x_label_str

    @x_label_str.setter
    def x_label_str(self, t):
        self.__x_label_str = t

    @property
    def y_label_str(self):
        return self.__y_label_str

    @y_label_str.setter
    def y_label_str(self, t):
        self.__y_label_str = t

    @property
    def title_str(self):
        return self.__title_str

    @title_str.setter
    def title_str(self, t):
        self.__title_str = t

    def add_y_axes(self, trio_obj: Quad):
        self.__trio_vector.append(trio_obj)

    def generate_plot(self):
        for obj in self.__trio_vector:
            x_vector = obj.get_x_vector()
            y_vector = obj.get_y_vector()
            label_str = obj.get_label_str()
            # print(label_str)
            # print(len(x_vector))
            # print(len(y_vector))
            self.__plotter.plot(
                x_vector,
                y_vector,
                color=obj.get_color_char(),
                label=label_str
            )
        # END of ... for loop

        # Naming the x-axis, y_1_vector-axis and the whole graph
        self.__plotter.xlabel(self.__x_label_str)
        self.__plotter.ylabel(self.__y_label_str)
        self.__plotter.title(self.__title_str)

        # Adding legend, which helps us recognize the curve according to it's color
        self.__plotter.legend()

        # To load the display window
        #self.__plotter.show()

    def save_png(self, output_directory_str):
        output_file_str = os.path.join(output_directory_str, self.__title_str + '.png')
        self.__plotter.savefig(output_file_str)

    def save_pdf(self, output_directory_str):
        output_file_str = os.path.join(output_directory_str, self.__title_str + '.pdf')
        self.__plotter.savefig(output_file_str)



class MainClass(object):
    __colors_vector = ['red', 'green', 'blue', 'cyan', 'magenta', 'yellow', 'orange', 'lightgreen', 'crimson']
    __working_dir = r"."
    __file_names_vector = ["training_progress-32.txt", "training_progress-64.txt", "training_progress-128.txt"]
    __input_files_vector = []
    __output_directory = None
    __column_no_int = 0
    __split_percentage_at_tail_int = 100
    __is_pdf_output = False
    __is_png_output = False

    # <editor-fold desc="def load_data()">
    @classmethod
    def __load_data(cls, fname: str, percetage_int:int, column_no_int:int):
        np_array = np.loadtxt(
            fname,
            # usecols=range(1,11),
            dtype=np.float32, 
            skiprows=1,
            delimiter=","
        )
        size_vector = np_array.shape
        array_len_int = size_vector[0]
        rows_count_int = int(percetage_int * array_len_int / 100)
        np_array = np_array[-rows_count_int:]
        x = np_array[:, 0]
        y = np_array[:, column_no_int]
        return x, y
    # END of the function
    # </editor-fold>

    # <editor-fold desc="(__parse_args())">
    @classmethod
    def __parse_args(cls):
        # initialize argument parser
        my_parser = ArgumentParser()
        my_parser.add_argument("-c", help="column no.", type=int)
        my_parser.add_argument('-i', nargs='+', help='a list of input files', required=True)
        my_parser.add_argument("-o", help="output directory", type=str)
        my_parser.add_argument("-n", help="percentage of data to split from tail", type=float)
        my_parser.add_argument("--pdf", help="PDF output", action='store_true')
        my_parser.add_argument("--png", help="PNG output", action='store_true')

        # parse the argument
        args = my_parser.parse_args()

        cls.__input_files_vector = args.i
        cls.__output_directory = args.o
        cls.__split_percentage_at_tail_int = args.n
        cls.__column_no_int = args.c
        cls.__is_pdf_output = args.pdf
        cls.__is_png_output = args.png
    # </editor-fold>

    @classmethod
    def main(cls):
        cls.__parse_args()

        if cls.__input_files_vector is None:
            cls.__input_files_vector = cls.__file_names_vector

        if cls.__output_directory is None:
            cls.__output_directory = cls.__working_dir

        if cls.__split_percentage_at_tail_int is None:
            cls.__split_percentage_at_tail_int = 100

        if cls.__column_no_int is None:
            cls.__column_no_int = 1

        my_project_plot_obj = HecaPlotClass()
        i = 0
        for file_path_str in cls.__input_files_vector:
            print(file_path_str)
            x_vector, y_vector = cls.__load_data(os.path.join(cls.__working_dir, file_path_str), cls.__split_percentage_at_tail_int, cls.__column_no_int)
            my_project_plot_obj.x_label_str = "Epoch"
            my_project_plot_obj.y_label_str = "Accuracy"
            my_project_plot_obj.title_str = "training_plot-{date:%Y-%m-%d_%H:%M:%S}".format(date=datetime.datetime.now())
            my_project_plot_obj.x_axis_vector = x_vector

            if i  == 0:
                random_int = 0
            else:
                random_int = i % (len(cls.__colors_vector)-1)
            # END of ... if
            print("random_int : ", random_int)
            my_project_plot_obj.add_y_axes(Quad(x_vector, y_vector, cls.__colors_vector[random_int], file_path_str))
            i = i + 1
        # END of ... for loop
        my_project_plot_obj.generate_plot()
        my_project_plot_obj.save_png(cls.__output_directory)
        my_project_plot_obj.save_pdf(cls.__output_directory)


if __name__ == "__main__":
    MainClass.main()

最佳答案

主要原因可能是数据分布不当(非随机~有序)。

如果您注意到超过 epoch 180 的准确度,准确度会在 ~0.43(大约)和 ~0.33(~大约)之间有序切换,偶尔会在 ~0.23(大约)之间切换。需要注意的更重要的一点是,随着时代的增加,准确性正在下降(验证准确性没有提高)。

在这种情况下,如果您 (1) 减小批量大小,或 (2) 使用更好的优化器(如 Adam),则可以提高准确性。并检查学习率。

这些变化也有助于转变和振荡。

此外,可以绘制精度的运行平均值以避免振荡。这又是一种缓解方案,而不是纠正方案。但是,它所做的是删除顺序(数据的分区)并混合附近的数据。

最后,我还会重新整理数据并在每一层之后进行归一化。看看是否有帮助。

关于python - 为什么 ANN 验证准确性会波动?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/70378719/

相关文章:

php - 如何根据用户从网站输入的字段执行 Python 脚本

python - 在 python OOP 中是否有一种神奇的方法来创建列表对象

Python如何从QDateTimeEdit中提取字符串

python - Tensorflow 大步论证

machine-learning - LSTM() 和 LSTMCell() 有什么区别?

python - Keras multi_gpu_model 错误 : "swig/python detected a memory leak of type ' int64_t *', no destructor found"

python - Pygtk 选项卡中的 TreeView

python - 如何加载保存为 .pb 的 keras 模型

python - Tensorflow model.fit() 对于所有浮点输入返回 NaN

python-3.x - 已存在另一个同名指标