[R]ネットワーク分析 - ネットワークの比較

先日の第1回 にこにこテキストマイニング勉強会(#nicoTextMining)に参加してきた - yokkunsの日記の中で、@bob3bob3さんの、ことばネットワークが印象的だったので、とりあえず、ネットワーク分析を復習してみる。
今回は、次数中心性とPageRankを使ったネットワークの比較をやってみた

データは、ネットワーク分析 (Rで学ぶデータサイエンス 8)の6章に出てくる、「ハイテク企業の管理職21人の社会ネットワーク」

  • 「アドバイスを求める」のネットワーク


  • 「友人である」のネットワーク


  • 「報告をする」のネットワーク

隣接行列の作成

ADVICE <- matrix(c(
				0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,
				0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
				1,1,0,1,0,1,1,1,1,1,1,1,0,1,0,0,1,1,0,1,1,
				1,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,1,1,0,1,1,
				1,1,0,0,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,1,1,
				0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
				0,1,0,0,0,1,0,0,0,0,1,1,0,1,0,0,1,1,0,0,1,
				0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,1,
				1,1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,1,0,0,1,
				1,1,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,1,0,
				1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
				1,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,
				0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,
				1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,
				1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,
				1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
				1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,
				1,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,1,0,1,0,
				1,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,0,1,
				0,1,1,1,0,1,1,1,0,0,0,1,0,1,0,0,1,1,0,1,0),
		nrow = 21, byrow = TRUE)

FRIEND <- matrix(c(
				0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,
				1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,
				0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,
				1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,
				0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,0,1,0,1,
				0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,
				0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,0,
				1,1,1,1,1,0,0,1,1,0,0,1,1,0,1,0,1,1,1,0,0,
				1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,
				0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
				1,0,1,0,1,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
				1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1,
				0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				1,1,1,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,0,1,0,
				0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,
				0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0),
		nrow = 21, byrow = TRUE)

REPORT <- matrix(c(
				0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
				0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
				0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
				0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
				0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
				0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
				0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
				0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
				0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
		nrow = 21, byrow = TRUE)

次数中心性

次数は頂点に接続している辺の数のこと。次数中心性は、ネットワーク内で多くの関係を持つ頂点を高く評価する指標。

library(sna)
d1 <- degree(ADVICE)
d2 <- degree(FRIEND)
d3 <- degree(REPORT)
data.degree <- data.frame(ADVICE =d1 ,FRIEND = d2, REPORT = d3)
data.degree
   ADVICE FRIEND REPORT
1      19     13      1
2      21     13      4
3      20      7      1
4      20     11      1
5      20     13      1
6      11      8      1
7      21      3      4
8      18      6      1
9      17      6      1
10     23      8      1
11     14     19      1
12      9     12      1
13     10      3      1
14     14      7      8
15     24     12      1
16     12      6      1
17     14     24      1
18     32      5      3
19     15     14      1
20     20      5      1
21     26      9      5

PageRank

Googleがウェブページの評価法として開発したもの。
他のページからのリンク数が多いページほどランキングが高く、ランキングの高いページからのリンクは高く評価するという指標

library(igraph)
g.advice <- graph.adjacency(ADVICE, mode="directed")
g.friend <- graph.adjacency(FRIEND, mode="directed")
g.report <- graph.adjacency(REPORT, mode="directed")

d1 <- page.rank(g.advice,directed=T)$vector
d2 <- page.rank(g.friend,directed=T)$vector
d3 <- page.rank(g.report,directed=T)$vector

data.page.rank <- data.frame(ADVICE=d1,FRIEND=d2,REPORT=d3)
       ADVICE      FRIEND     REPORT
1  0.04940998 0.114971023 0.01207780
2  0.09391770 0.163241396 0.06415455
3  0.02697010 0.019422852 0.01207780
4  0.04605231 0.090058309 0.01207780
5  0.01631746 0.023604796 0.01207780
6  0.07160099 0.013433368 0.01207780
7  0.10404817 0.024633315 0.48070136
8  0.04188507 0.047371756 0.01207780
9  0.01495868 0.021730625 0.01207780
10 0.02954916 0.010410908 0.01207780
11 0.03882723 0.029620548 0.01207780
12 0.03961603 0.069462375 0.01207780
13 0.01414518 0.009452086 0.01207780
14 0.04398129 0.027667883 0.13359021
15 0.01595085 0.027325617 0.01207780
16 0.02770023 0.045355571 0.01207780
17 0.04233474 0.060517356 0.01207780
18 0.07211404 0.081417669 0.04679563
19 0.01414518 0.027026814 0.01207780
20 0.03268510 0.014384208 0.01207780
21 0.16379049 0.078891525 0.08151346

コレスポンデンス分析

caパッケージを用いて、コレスポンデンス分析を行う。

library(ca)
#次数中心性
data.ca <- ca(data.degree)
plot(data.ca, arrows=c(F,T))

#PageRank			
data.page.rank.ca <- ca(data.page.rank)
plot(data.page.rank.ca, arrows=c(F,T))
  • 次数中心性



所感

あとで書く