找回密码
 立即注册
查看: 5674|回复: 1

求教:如何做聚类分析

[复制链接]
发表于 2014-12-31 19:20:06 | 显示全部楼层 |阅读模式
在做一个聚类分析,输入以下语言后:
#### 用数据框形式输入数据
V <- data.frame(
v1=c(11.26, 24.1, 21.38, 11.12, 12.23, 2.74, 16.74, 19.08, 18.27, 33.89, 13.32, 6.42, 40.27, 11.02, 10.13, 15.6, 10.46, 48.64, 7.03, 18.65, 13.1),
v2=c(28.64, 41.99, 48.23, 26.73, 13.64, 4.64, 2.83, 41.62, 21.05, 65.79, 23.78, 4.85, 34.67, 4.67, 9.73, 16.56, 26.73, 48.31, 5.7, 9.1, 11.04),
v3=c(14.61, 16.23, 29.10, 18.04, 12.33, 23.16, 19.53, 28.57, 19.94, 7.27, 12.22, 14.90, 22.02, 26.25, 32.47, 17.60, 16.18, 15.94, 32.84, 13.74, 12.25),
v4=c(12.16, 19.25, 22.16, 11.12, 13.01, 7.51, 9.29, 20.07, 20.72, 14.59, 15.68, 5.66, 16.02, 23.51, 16.29, 19.88, 9.78, 13.96, 9.00, 12.99, 7.19),
v5=c(23.65, 18.94, 21.90, 26.68, 26.47, 29.70, 22.72, 20.14, 18.67, 23.50, 19.14, 18.03, 22.74, 38.63, 29.28, 38.32, 24.02, 20.26, 31.87, 39.74, 28.89),
v6=c(7.70, 10.79, 3.59, 3.97, 3.01, -3.77, 15.94, 17.13, 14.14, 34.14, 2.34, 1.67, 32.03, 0.11, 13.14, 18.87, 8.91, 18.19, 1.15, 12.45, -1.19),
v7=c(9.35, 9.16, 10.78, 8.12, 9.42, 15.83, 9.87, 8.37, 8.67, 5.25, 6.14, 0.00, 13.40, 10.03, 8.85, 11.20, 7.01, 8.75, 9.83, 14.07, 11.60),
v8=c(5.96, 16.55, 7.73, 15.14, 10.46, 1.55, 4.12, 4.20, 0.85, 0.16, 18.72, 0.00, 0.17, 1.01, 1.31, 5.75, 10.19, 0.98, 11.53, 7.36, 13.13),
row.names=c("全国", "北京", "天津", "沈阳", "大连", "长春", "哈尔滨", "上海", "南京", "苏州", "杭州", "宁波", "厦门", "济南", "青岛", "武汉", "广州", "深圳", "重庆", "成都", "西安")
#### 生成距离结构,做系统聚类(离差平方和法)
d <- dist(scale(V))
hc=hclust(d, "ward")
#### 绘出谱系图和聚类情况(离差平方和法)
opar=par(mfrow=c(2,1), mar=c(5.2, 4, 0, 0))
plclust(hc, hang=-1); re2=rect.plclust(hc, k=5, border="red")
par(opar)
出现如下结果:
> #### 用数据框形式输入数据
> V <- data.frame(
+ v1=c(11.26, 24.1, 21.38, 11.12, 12.23, 2.74, 16.74, 19.08, 18.27, 33.89, 13.32, 6.42, 40.27, 11.02, 10.13, 15.6, 10.46, 48.64, 7.03, 18.65, 13.1),
+ v2=c(28.64, 41.99, 48.23, 26.73, 13.64, 4.64, 2.83, 41.62, 21.05, 65.79, 23.78, 4.85, 34.67, 4.67, 9.73, 16.56, 26.73, 48.31, 5.7, 9.1, 11.04),
+ v3=c(14.61, 16.23, 29.10, 18.04, 12.33, 23.16, 19.53, 28.57, 19.94, 7.27,12.22, 14.90, 22.02, 26.25, 32.47, 17.60, 16.18, 15.94, 32.84, 13.74, 12.25),
+ v4=c(12.16, 19.25, 22.16, 11.12, 13.01, 7.51, 9.29, 20.07, 20.72, 14.59, 15.68, 5.66, 16.02, 23.51, 16.29, 19.88, 9.78, 13.96, 9.00, 12.99, 7.19),
+ v5=c(23.65, 18.94, 21.90, 26.68, 26.47, 29.70, 22.72, 20.14, 18.67, 23.50, 19.14, 18.03, 22.74, 38.63, 29.28, 38.32, 24.02, 20.26, 31.87, 39.74, 28.89),
+ v6=c(7.70, 10.79, 3.59, 3.97, 3.01, -3.77, 15.94, 17.13, 14.14, 34.14, 2.34, 1.67, 32.03, 0.11, 13.14, 18.87, 8.91, 18.19, 1.15, 12.45, -1.19),
+ v7=c(9.35, 9.16, 10.78, 8.12, 9.42, 15.83, 9.87, 8.37, 8.67, 5.25, 6.14, 0.00, 13.40, 10.03, 8.85, 11.20, 7.01, 8.75, 9.83, 14.07, 11.60),
+ v8=c(5.96, 16.55, 7.73, 15.14, 10.46, 1.55, 4.12, 4.20, 0.85, 0.16, 18.72, 0.00, 0.17, 1.01, 1.31, 5.75, 10.19, 0.98, 11.53, 7.36, 13.13), row.names=c("全国", "北京", "天津", "沈阳", "大连", "长春", "哈尔滨", "上海", "南京", "苏州", "杭州", "宁波", "厦门", "济南", "青岛", "武汉", "广州", "深圳", "重庆", "成都", "西安")
+ )
> #### 生成距离结构,做系统聚类(离差平方和法)
> d <- dist(scale(V))
错误于scale(V) : 找不到对象'V'
> hc=hclust(d, "ward")
The "ward" method has been renamed to "ward.D"; note new "ward.D2"
错误于if (is.na(n) || n > 65536L) stop("size cannot be NA nor exceed 65536") :
  需要TRUE/FALSE值的地方不可以用缺少值
> #### 绘出谱系图和聚类情况(离差平方和法)
> opar=par(mfrow=c(2,1), mar=c(5.2, 4, 0, 0))
> plclust(hc, hang=-1); re2=rect.plclust(hc, k=5, border="red")
错误于plot.hclust(x = tree, labels = labels, hang = hang, axes = axes,  :
  找不到对象'hc'
此外: 警告信息:
'plclust' is deprecated.
Use 'plot' instead.
See help("Deprecated")
> par(opar)
求教,如何修改,以实现聚类分析!万分感谢!
回复

使用道具 举报

发表于 2015-1-16 17:06:51 | 显示全部楼层
你把建立数据框的语句分成2部分
V <- data.frame(v1=c(11.26, 24.1, 21.38, 11.12, 12.23, 2.74, 16.74, 19.08, 18.27, 33.89, 13.32, 6.42, 40.27, 11.02, 10.13, 15.6, 10.46, 48.64, 7.03, 18.65, 13.1),
+ v2=c(28.64, 41.99, 48.23, 26.73, 13.64, 4.64, 2.83, 41.62, 21.05, 65.79, 23.78, 4.85, 34.67, 4.67, 9.73, 16.56, 26.73, 48.31, 5.7, 9.1, 11.04),
+ v3=c(14.61, 16.23, 29.10, 18.04, 12.33, 23.16, 19.53, 28.57, 19.94, 7.27,12.22, 14.90, 22.02, 26.25, 32.47, 17.60, 16.18, 15.94, 32.84, 13.74, 12.25),
+ v4=c(12.16, 19.25, 22.16, 11.12, 13.01, 7.51, 9.29, 20.07, 20.72, 14.59, 15.68, 5.66, 16.02, 23.51, 16.29, 19.88, 9.78, 13.96, 9.00, 12.99, 7.19),
+ v5=c(23.65, 18.94, 21.90, 26.68, 26.47, 29.70, 22.72, 20.14, 18.67, 23.50, 19.14, 18.03, 22.74, 38.63, 29.28, 38.32, 24.02, 20.26, 31.87, 39.74, 28.89),
+ v6=c(7.70, 10.79, 3.59, 3.97, 3.01, -3.77, 15.94, 17.13, 14.14, 34.14, 2.34, 1.67, 32.03, 0.11, 13.14, 18.87, 8.91, 18.19, 1.15, 12.45, -1.19),
+ v7=c(9.35, 9.16, 10.78, 8.12, 9.42, 15.83, 9.87, 8.37, 8.67, 5.25, 6.14, 0.00, 13.40, 10.03, 8.85, 11.20, 7.01, 8.75, 9.83, 14.07, 11.60),
+ v8=c(5.96, 16.55, 7.73, 15.14, 10.46, 1.55, 4.12, 4.20, 0.85, 0.16, 18.72, 0.00, 0.17, 1.01, 1.31, 5.75, 10.19, 0.98, 11.53, 7.36, 13.13))
然后检查一下数据框是否建立
如有,在运行
rownames(V)=c("全国", "北京", "天津", "沈阳", "大连", "长春", "哈尔滨", "上海", "南京", "苏州", "杭州", "宁波", "厦门", "济南", "青岛", "武汉", "广州", "深圳", "重庆", "成都", "西安")

在看看列名有没有改。
如果有,
hc=hclust(d, "ward"),该"ward",应该是"ward.D".该参数可以看帮助文件。不知道你要做什么。所以,单单一个ward是错误的。
下边的,几个函数,好像有点错误,我也刚学,说不出什么错。
但结果好像可以看。

结果

结果
回复

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

Archiver|手机版|小黑屋|R语言中文网

GMT+8, 2024-11-25 18:27 , Processed in 0.254715 second(s), 20 queries .

Powered by Discuz! X3.5

© 2001-2024 Discuz! Team.

快速回复 返回顶部 返回列表