[R]ネットワーク分析 - ネットワークの比較
先日の第1回 にこにこテキストマイニング勉強会(#nicoTextMining)に参加してきた - yokkunsの日記の中で、@bob3bob3さんの、ことばネットワークが印象的だったので、とりあえず、ネットワーク分析を復習してみる。
今回は、次数中心性とPageRankを使ったネットワークの比較をやってみた
データは、ネットワーク分析 (Rで学ぶデータサイエンス 8)の6章に出てくる、「ハイテク企業の管理職21人の社会ネットワーク」
- 「アドバイスを求める」のネットワーク
- 「友人である」のネットワーク
- 「報告をする」のネットワーク
隣接行列の作成
ADVICE <- matrix(c( 0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1, 0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, 1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1, 1,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,1, 1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, 0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,1, 1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1, 1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,1,0, 1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, 1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0, 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1, 1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0, 1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, 1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1, 1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,0, 1,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,0,1, 0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,1,0), nrow = 21, byrow = TRUE) FRIEND <- matrix(c( 0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0, 1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0, 0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,0,1,0,1, 0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0, 1,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,0, 1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1, 0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, 1,0,1,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,0, 0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0, 0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0), nrow = 21, byrow = TRUE) REPORT <- matrix(c( 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), nrow = 21, byrow = TRUE)
次数中心性
次数は頂点に接続している辺の数のこと。次数中心性は、ネットワーク内で多くの関係を持つ頂点を高く評価する指標。
library(sna) d1 <- degree(ADVICE) d2 <- degree(FRIEND) d3 <- degree(REPORT) data.degree <- data.frame(ADVICE =d1 ,FRIEND = d2, REPORT = d3) data.degree
ADVICE FRIEND REPORT 1 19 13 1 2 21 13 4 3 20 7 1 4 20 11 1 5 20 13 1 6 11 8 1 7 21 3 4 8 18 6 1 9 17 6 1 10 23 8 1 11 14 19 1 12 9 12 1 13 10 3 1 14 14 7 8 15 24 12 1 16 12 6 1 17 14 24 1 18 32 5 3 19 15 14 1 20 20 5 1 21 26 9 5
PageRank
Googleがウェブページの評価法として開発したもの。
他のページからのリンク数が多いページほどランキングが高く、ランキングの高いページからのリンクは高く評価するという指標
library(igraph) g.advice <- graph.adjacency(ADVICE, mode="directed") g.friend <- graph.adjacency(FRIEND, mode="directed") g.report <- graph.adjacency(REPORT, mode="directed") d1 <- page.rank(g.advice,directed=T)$vector d2 <- page.rank(g.friend,directed=T)$vector d3 <- page.rank(g.report,directed=T)$vector data.page.rank <- data.frame(ADVICE=d1,FRIEND=d2,REPORT=d3)
ADVICE FRIEND REPORT 1 0.04940998 0.114971023 0.01207780 2 0.09391770 0.163241396 0.06415455 3 0.02697010 0.019422852 0.01207780 4 0.04605231 0.090058309 0.01207780 5 0.01631746 0.023604796 0.01207780 6 0.07160099 0.013433368 0.01207780 7 0.10404817 0.024633315 0.48070136 8 0.04188507 0.047371756 0.01207780 9 0.01495868 0.021730625 0.01207780 10 0.02954916 0.010410908 0.01207780 11 0.03882723 0.029620548 0.01207780 12 0.03961603 0.069462375 0.01207780 13 0.01414518 0.009452086 0.01207780 14 0.04398129 0.027667883 0.13359021 15 0.01595085 0.027325617 0.01207780 16 0.02770023 0.045355571 0.01207780 17 0.04233474 0.060517356 0.01207780 18 0.07211404 0.081417669 0.04679563 19 0.01414518 0.027026814 0.01207780 20 0.03268510 0.014384208 0.01207780 21 0.16379049 0.078891525 0.08151346
コレスポンデンス分析
caパッケージを用いて、コレスポンデンス分析を行う。
library(ca) #次数中心性 data.ca <- ca(data.degree) plot(data.ca, arrows=c(F,T)) #PageRank data.page.rank.ca <- ca(data.page.rank) plot(data.page.rank.ca, arrows=c(F,T))
- 次数中心性
所感
あとで書く