javaknn算法
“算法”不能算是“模型”,更不能说是“python模型”,因为python能实现的,c++、java等通用语言也能实现。
2. 你好,请问你现在有基于MapRece的knn算法的Java代码么谢谢~
14/09/0308:08:01INFOjvm.JvmMetrics:=JobTracker,sessionId=
14/09/0308:08:01WARNmapred.JobClient:..
14/09/0308:08:01WARNmapred.JobClient:Nojobjarfileset.Userclassesmaynotbefound.SeeJobConf(Class)orJobConf#setJar(String).
14/09/0308:08:02INFOinput.FileInputFormat:Totalinputpathstoprocess:1
14/09/0308:08:02INFOmapred.JobClient:Runningjob:job_local_0001
14/09/0308:08:02INFOinput.FileInputFormat:Totalinputpathstoprocess:1
14/09/0308:08:02INFOmapred.MapTask:io.sort.mb=100
14/09/0308:08:03INFOmapred.MapTask:databuffer=79691776/99614720
14/09/0308:08:03INFOmapred.MapTask:recordbuffer=262144/327680
14/09/0308:08:03WARNmapred.LocalJobRunner:job_local_0001
java.lang.ClassCastException:classPoint2D
atjava.lang.Class.asSubclass(Class.java:3018)
atorg.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:599)
atorg.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:791)
atorg.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:524)
atorg.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:613)
atorg.apache.hadoop.mapred.MapTask.run(MapTask.java:305)
atorg.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177)
14/09/0308:08:03INFOmapred.JobClient:map0%rece0%
14/09/0308:08:03INFOmapred.JobClient:Jobcomplete:job_local_0001
14/09/0308:08:03INFOmapred.JobClient:Counters:0
3. 你好,关于KNN算法的maprece化
==================cluster.txt===========================
A 2 2
B 2 4
C 4 2
D 4 4
E 6 6
F 6 8
G 8 6
H 8 8
==================cluster.center.conf===========================
K1 3 2
K2 6 2
====================================================================================
package com.mahout.cluster;
//二维坐标的点
public class DmRecord {
private String name;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
private double xpodouble;
private double ypodouble;
public DmRecord(){
}
public DmRecord(String name,double x,double y){
this.name = name;
this.xpodouble = x;
this.ypodouble = y;
}
public double getXpoint() {
return xpodouble;
}
public void setXpoint(double xpodouble) {
this.xpodouble = xpodouble;
}
public double getYpoint() {
return ypodouble;
}
public void setYpoint(double ypodouble) {
this.ypodouble = ypodouble;
}
public double distance(DmRecord record){
return Math.sqrt(Math.pow(this.xpodouble-record.xpodouble, 2)+Math.pow(this.ypodouble-record.ypodouble, 2));
}
}
==============================================================================
package com.mahout.cluster;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.io.IOUtils;
public class DmRecordParser {
private Map<String,DmRecord> urlMap = new HashMap<String,DmRecord>();
/**
* 读取配置文件记录,生成对象
*/
public void initialize(File file) throws IOException {
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
String line;
while ((line = in.readLine()) != null) {
String [] strKey = line.split("\t");
urlMap.put(strKey[0],parse(line));
}
} finally {
IOUtils.closeStream(in);
}
}
/**
* 生成坐标对象
*/
public DmRecord parse(String line){
String [] strPlate = line.split("\t");
DmRecord Dmurl = new DmRecord(strPlate[0],Integer.parseInt(strPlate[1]),Integer.parseInt(strPlate[2]));
return Dmurl;
}
/**
* 获取分类中心坐标
*/
public DmRecord getUrlCode(String cluster){
DmRecord returnCode = null;
DmRecord dmUrl = (DmRecord)urlMap.get(cluster);
if(dmUrl == null){
//35 6
returnCode = null;
}else{
returnCode =dmUrl;
}
return returnCode;
}
}
==============================================================================
package com.mahout.cluster;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Recer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.mahout.test.StringStringPairAsce;
public class Kmeans extends Configured implements Tool {
public static class KmeansMapper extends MapReceBase implements
Mapper<LongWritable, Text, Text, Text> {
private DmRecordParser drp ;
private String clusterNode = "K";
private DmRecord record0 = null;
private DmRecord record1 = new DmRecord();
private double Min_distance = 9999;
private int tmpK = 0;
private Text tKey = new Text();
private Text tValue = new Text();
//获取聚类中心坐标
@Override
public void configure(JobConf conf) {
drp = new DmRecordParser();
try {
drp.initialize(new File("cluster.center.conf"));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
//根据聚类坐标,把文件中的点进行类别划分
@Override
public void map(LongWritable key, Text value,
OutputCollector<Text, Text> output, Reporter arg3)
throws IOException {
String [] strArr = value.toString().split("\t");
for(int i=1; i <= 2; i++){
record0 = drp.getUrlCode("K"+i);
record1.setName(strArr[0]);
record1.setXpoint(Double.parseDouble(strArr[1]));
record1.setXpoint(Integer.parseInt(strArr[2]));
if(record0.distance(record1) < Min_distance){
tmpK = i;
Min_distance = record0.distance(record1);
}
}
tKey.set("C"+tmpK);
output.collect(tKey, value);
}
}
//计算新的聚类中心
public static class KmeansRecer extends MapReceBase implements
Recer<Text, Text, Text, Text> {
private Text tKey = new Text();
private Text tValue = new Text();
@Override
public void rece(Text key, Iterator<Text> value,
OutputCollector<Text, Text> output, Reporter arg3)
throws IOException {
double avgX=0;
double avgY=0;
double sumX=0;
double sumY=0;
int count=0;
String [] strValue = null;
while(value.hasNext()){
count++;
strValue = value.next().toString().split("\t");
sumX = sumX + Integer.parseInt(strValue[1]);
sumY = sumY + Integer.parseInt(strValue[1]);
}
avgX = sumX/count;
avgY = sumY/count;
tKey.set("K"+key.toString().substring(1,2));
tValue.set(avgX + "\t" + avgY);
System.out.println("K"+key.toString().substring(1,2)+"\t"+avgX + "\t" + avgY);
output.collect(tKey, tValue);
}
}
@Override
public int run(String[] args) throws Exception {
JobConf conf = new JobConf(getConf(), Kmeans.class);
conf.setJobName("Kmeans");
//conf.setNumMapTasks(200);
// 设置Map输出的key和value的类型
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
// 设置Rece输出的key和value的类型
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
// 设置Mapper和Recer
conf.setMapperClass(KmeansMapper.class);
conf.setRecerClass(KmeansRecer.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
// 设置输入输出目录
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Kmeans(), args);
System.exit(exitCode);
}
}
4. knn算法是什么
KNN(K- Nearest Neighbor)法即K最邻近法,最初由Cover和Hart于1968年提出,是一个理论上比较成熟的方法,也是最简单的机器学习算法之一。
作为一种非参数的分类算法,K-近邻(KNN)算法是非常有效和容易实现的。它已经广泛应用于分类、回归和模式识别等。
介绍
KNN算法本身简单有效,它是一种lazy-learning算法,分类器不需要使用训练集进行训练,训练时间复杂度为0。KNN分类的计算复杂度和训练集中的文档数目成正比,也就是说,如果训练集中文档总数为n,那么KNN的分类时间复杂度为O(n)。
KNN方法虽然从原理上也依赖于极限定理,但在类别决策时,只与极少量的相邻样本有关。由于KNN方法主要靠周围有限的邻近的样本,而不是靠判别类域的方法来确定所属类别的,因此对于类域的交叉或重叠较多的待分样本集来说,KNN方法较其他方法更为适合。
5. knn分类算法 怎么处理定性数据
knn算法(k-Nearest Neighbor algorithm).是一种经典的分类算法.
注意,不是聚类算法.所以这种分类算法必然包括了训练过程.
然而和一般性的分类算法不同,knn算法是一种 懒惰算法 .它并非
像其他的分类算法先通过训练建立分类模型.,而是一种被动的分类
过程.它是边测试边训练建立分类模型.
算法的一般描述过程如下:
1.首先计算每个测试样本点到其他每个点的距离.
这个距离可以是欧氏距离,余弦距离等.