Kinship Networks

The first step is to load the edge list (representing kinship ties) and the node list (containing students’ and relatives’ attributes). For a detailed description of the kinship data and the method for extracting it from the directory, please refer to the relevant documentation.

# load packages

library(readr)
library(tidyverse)
library(igraph)

# load kinship ties (edge list)

library(readr)
kinship <- read_delim("data/kinship.csv", 
    delim = ";", escape_double = FALSE, trim_ws = TRUE)

# load kinship node list (to distinguish biography from relative)

kinship_nodes <- read_csv("data/kinship_nodes.csv")
head(kinship_nodes)

Build the Network

# create network 

g <- graph_from_data_frame(d=kinship, directed = TRUE, vertices=(kinship_nodes))
g

## IGRAPH 119964f DN-- 376 216 -- 
## + attr: name (v/c), Type (v/c), DocId (v/c), RelationMain (e/c),
## | Relation2 (e/c)
## + edges from 119964f (vertex names):
##  [1] 余日章      ->余日宣 兪慶恩      ->兪慶堯 卞壽孫      ->卞福孫
##  [4] 張景芬      ->張福生 方伯樑      ->方伯麟 曹芳芸      ->曹錫庚
##  [7] 朱兆莘      ->朱兆燮 朱復        ->朱維民 歐陽祺      ->歐陽庚
## [10] 沈寳善      ->沈寳勳 盧壽汶      ->盧壽澂 羅泮忱      ->羅忠在
## [13] 胡詒穀      ->胡詒芳 諶湛溪      ->諶石勳 謝學瀛      ->謝學濤
## [16] 謝恩隆      ->謝欣榮 鄒應藼      ->鄒應歡 鄺孫謀      ->鄺芹生
## [19] 鍾文邦      ->鍾文鰲 陳同壽      ->陳明壽 陳宗良      ->陳宗賢
## + ... omitted several edges

# components(g)

The kinship network contains 376 nodes and 216 ties, representing 172 family components, with sizes ranging from 2 to 6 members in the case of Shi Zhaoji’s (施肇基) family.

Visualize the Network

In the next step, we visualize the kinship network.We use the color and shape of nodes to differentiate between the students who are the subject of a biography (red squares) and the relatives mentioned in their biographies (blue circles). The color of ties represent the different types of relations:

Grey ties represent father-son relations
Yellow ties represent uncle-nephew relations
Green ties represent siblings
Blue ties represent spouse

# Index nodes shape on nodes type 
V(g)[kinship_nodes$Type == "ego"]$shape <- "square"
V(g)[kinship_nodes$Type == "relative"]$shape <- "circle"
V(g)[kinship_nodes$Type == "ego"]$color <- "red"
V(g)[kinship_nodes$Type == "relative"]$color <- "light blue"

# Index edge color on relation type 
E(g)[kinship$RelationMain == "Father"]$color <- "grey"
E(g)[kinship$RelationMain == "Uncle"]$color <- "gold"
E(g)[kinship$RelationMain == "Brother"]$color <- "yellowgreen"
E(g)[kinship$RelationMain == "Spouse"]$color <- "light blue"

# plot with igraph 
plot.igraph(g, vertex.size = 3, 
            vertex.label.color = "black", 
            vertex.label.cex = 0.3, 
            edge.width=0.5,
            edge.arrow.size=0, 
            edge.curved=0.1, 
            main="Kinship Network of Early Liumei")

To improve legibility, we will remove dyads and isolated nodes by selecting the largest components (size > 2):

# Get components
cl <- components(g)

# Extract membership data 

family_components <- data.frame(cl$membership) %>% 
  rownames_to_column("name") %>%
  rename (comp_no = cl.membership) %>% 
  group_by(comp_no) %>% add_tally() %>%
  rename(size = n) %>% 
  relocate(name, .after = size) 

# join with node attributes 

family_components_attributes <- inner_join(family_components, kinship_nodes)

Remove isolated nodes

family_filtered <- lapply(seq_along(cl$csize)[cl$csize > 1], function(x) 
  V(g)$name[cl$membership %in% x])

subv <- unlist(family_filtered)
kin1 <- as.data.frame(subv) # convert into dataframe

g1 <- induced.subgraph(graph=g,vids=subv)

# g1 has 371 nodes and 213 ties 

# plot reduced graphs

# index nodes shape/color on nodes type 


plot.igraph(g1, vertex.size = 3, 
            vertex.label.color = "black", 
            vertex.label.cex = 0.3, 
            edge.width=2,
            edge.arrow.size=0, 
            edge.curved=0, 
            main="Kinship Network of Early Liumei (no isolates)")

# remove dyads 

family_filtered2 <- lapply(seq_along(cl$csize)[cl$csize > 2], function(x) 
  V(g)$name[cl$membership %in% x])

subv2 <- unlist(family_filtered2)
kin2 <- as.data.frame(subv2) # convert into dataframe
write.csv(kin2, "kin2.csv")

g2 <- induced.subgraph(graph=g,vids=subv2)

# g2 has 83 nodes and 64 ties 


plot.igraph(g2, vertex.size = 5, 
            vertex.label.color = "black", 
            vertex.label.cex = 0.5, 
            edge.width=2.5,
            edge.arrow.size=0, 
            edge.curved=0, 
            main="Kinship Network of Early Liuei (cluster > 2)")

Affiliation Networks

All Affiliations

We upload the affiliation data retrieved using Named Entities Recognition (NER), as described in the Data Extraction Script.

# load packages

library(readr)
library(tidyverse)

# load data

library(readr)
affiliation_eng <- read_csv("data/affiliation_eng.csv")


head(affiliation_eng)

The dataset contains a total of 3,431 affiliations retrieved from the 401 English biographies. For each affiliation, the table provides the following information:

DocId: unique identifier of the biography in the “imh” collection
name : the name of the person
Organization: the name of the organization he/she was affiliated to
Position: the timing of the affiliation in the person’s life, according to the following typology: PRE = before going to the U.S., US = during his/her studies in the U.S., POST = after his/her return to China), and
the nature of the organization, with two levels of granularity (category, category_main).

There is a total of 1,305 unique organizations, with frequency ranging from 1 to 96 (in the case of St John’s University). The educational sector represents the largest number (536, 40%), followed by associations (254, 19.5%), and media (newspapers, journals) (108, 8%). The largest number of affiliations refer to their period of studies in the US (1460, 42%) (mostly educational institutions, 810, 55%), followed by their post-return career (1154, 33.6%), their life prior to going to the US (803), and other (non-dated) affiliations.

# distribution of affiliations

affiliation_eng %>% group_by(Organization) %>% count(sort = TRUE) # distribution by organization

affiliation_eng %>% distinct(category_main, Organization) %>% group_by(category_main) %>% count(sort = TRUE) # distribution by category

affiliation_eng %>% group_by(Position) %>% count(sort = TRUE) # distribution by life period

Create the Network

To build the network, we need to create the edge list (list of links between students and organizations) and the node list (category of node, persons or organizations):

# create the edge list 

affiliation_edge <- affiliation_eng %>% 
  mutate(DocName = paste(DocId, name, sep = "_")) %>% # we create a synthetic variable compiling the doc Id with the name, in case of homonyms (wife/husbands)
  select(DocName, Organization, Position)

# create the node list

person_node <- affiliation_edge %>% 
  distinct(DocName) %>% 
  mutate(Type = "Person") %>% 
  mutate(Type2 = "Person") %>% 
  rename(Name = DocName)

org_node <- affiliation_eng %>% 
  distinct(Organization, category_main) %>% 
  mutate(Type = "Organization") %>% 
  rename(Name = Organization, 
         Type2 = category_main) %>% 
  select(Name, Type, Type2)

affiliation_node <- bind_rows(person_node, org_node)

Next, we create the bipartite network with igraph:

library(igraph)

# Creating a network from the edge list
Net <- graph_from_data_frame(affiliation_edge, directed = FALSE)
## Transformation into a 2-mode network
V(Net)$type <- bipartite_mapping(Net)$type
# Projection
projNet <- bipartite_projection(Net, multiplicity = TRUE)
Net1 <- projNet$proj1  # Network of persons
Net2 <- projNet$proj2  # Network of organizations

Visualize the Networks

# assign color to type of node
V(Net)$shape <- ifelse(V(Net)$Type == "Person", "square","circle")
V(Net)$color <- ifelse(V(Net)$Type == "Person", "tomato","blue")

plot(Net, vertex.size = 3, 
     vertex.color = V(Net)$color, 
      vertex.shape = V(Net)$shape ,
     vertex.label.color = "black", 
     vertex.label.cex = 0.3, 
     main="Early Liumei Affiliation Network")

# remove labels and make size proportionate to degree

layout <- layout_nicely(Net)
  
plot(Net, vertex.size = degree(Net)*0.15, 
     vertex.color = V(Net)$color, 
      vertex.shape = V(Net)$shape ,
     vertex.label = NA, 
     layout = layout, 
     main="Early Liumei Affiliation Network")

Plot projections

plot(Net1, vertex.size = 3, 
     vertex.color = "orange", 
     vertex.label.color = "black", 
     vertex.label.cex = 0.3, 
     main="Network of persons linked by organizations")

plot(Net2, vertex.size = 3, 
     vertex.color = "steel blue", 
     vertex.label.color = "black", 
     vertex.label.cex = 0.3, 
     main="Network of organizations linked by persons")

Network Analysis of the Who’s Who of American Returned Students (1917)

Cécile Armand

2024-08-20