我目前正在学习数据挖掘课程,对于我们的一个项目,我们需要通过首先在已经提供类标签的训练数据集上构建分类器来预测未知数据集的类标签。
我们只需要获得 80% 的准确率即可在作业中获得满分。我已经使用 J48 决策树算法 (acc=84.08%) 实现了这一点。
还有一场关于谁能获得最高准确性的竞争(由我们看不到的裁判系统决定)。
我有两个问题:
- 我如何使用集成方法来做到这一点
- 有没有办法优化每个分类器的参数?
import java.io.*;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.*;
import weka.classifiers.trees.*;
import weka.classifiers.Evaluation;
public class CompClassifier {
public static FileOutputStream Output;
public static PrintStream file;
public static void main(String[] args) throws Exception {
// load training data
weka.core.Instances training_data = new weka.core.Instances(new
java.io.FileReader("/Users//Weka/training.arff"));
//load test data
weka.core.Instances test_data = new weka.core.Instances(new
java.io.FileReader("/Users//Weka/unknown.arff"));
//Clean up training data
ReplaceMissingValues replace = new ReplaceMissingValues();
replace.setInputFormat(training_data);
Instances training_data_filter1 = Filter.useFilter(training_data, replace);
//Normalize training data
Normalize norm = new Normalize();
norm.setInputFormat(training_data_filter1);
Instances processed_training_data = Filter.useFilter(training_data_filter1, norm);
//Set class attribute for pre-processed training data
processed_training_data.setClassIndex(processed_training_data.numAttributes() - 1);
//output to file
Output = new FileOutputStream("/Users//Desktop/CLASSIFICATION/test.txt");
file = new PrintStream(Output);
//build classifier
J48 tree = new J48();
tree.buildClassifier(processed_training_data);
//Clean up test data
replace.setInputFormat(test_data);
Instances test_data_filter1 = Filter.useFilter(test_data, replace);
//Normalize test data
norm.setInputFormat(training_data_filter1);
Instances processed_test_data = Filter.useFilter(test_data_filter1, norm);
//Set class attribute for pre-processed training data
processed_test_data.setClassIndex(processed_test_data.numAttributes() - 1);
//int num_correct=0;
for (int i = 0; i < processed_test_data.numInstances(); i++) {
weka.core.Instance currentInst = processed_test_data.instance(i);
int predictedClass = (int) tree.classifyInstance(currentInst);
System.out.println(predictedClass);
file.println("O"+ predictedClass);
}
}