-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPopulation_Analysis.R
219 lines (146 loc) · 8.37 KB
/
Population_Analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# Project on Analysis of population in India
getwd()
#we have some data file in directory
# Data files are stored in working directory for R :
db <- read.csv("india-pop.csv")
View(db)
summary(db)
head(db,10)
tail(db,10)
#-------------------------
# Adding a new column named "Regions" to generalize Data:
db$Regions <- "NA"
# defining values :
db$Regions[which(db$States %in% c("Delhi","J & K","Punjab","Himachal Pradesh","Uttarakhand","Haryana"))]<- "Northern India"
db$Regions[which(db$States %in% c("Rajasthan","Goa","Gujarat","Maharashtra","Madhya Pradesh"))]<- "Western India"
db$Regions[which(db$States %in% c("Karnataka","Telangana","Andra Pradesh","Tamil Nadu","Kerala"))]<- "Southern India"
db$Regions[which(db$States %in% c("Uttar Pradesh","Chhattisgarh","Odisha","Jharkhand","Bihar","West Bengal"))]<- "Central India"
db$Regions[which(db$States %in% c("Assam","Meghalaya","Tripura","Mizoram","Manipur","Nagaland","Arunachal Pradesh","Sikkim"))]<- "Eastern India"
View(db)
#--------------------------
# Aggregating the Data :
# CREATING mean of data in terms of Regions :
db <- aggregate(db[3:11],list(db$Regions),mean)
View(db)
#Since some Data haven't manipulated due to symbols etc. We will manipulate it by self :
# I used Excel to add values manually ...
#-------------------------
# Based on Population :
db$Population[c(1,2,3,4,5)] <- c(82615910,5685848,14676727,50245833,78497317)
#Decadal.growth :
db$Decadal.growth[c(1,2,3,4,5)] <- c( 19.73,17.67,18.39,9.42,19.23)
# Rural.Population :
db$Rural.Population[c(1,2,3,4,5)] <- c(64912570,4591230,9424969,9424970,50077110)
# Urban.population :
db$Urban.population[c(1,2,3,4,5)] <- c(17703339,1043205,7091808,20539480,2842020)
#Area.km2 :
db$Area.km2[c(1,2,3,4,5)] <- c(132409,32772,55124,150442,231584)
#Density..km2 :
db$Density..km2[c(1,2,3,4,5)] <- c(638,141,2138,469,300)
View(db)
# Our Data Manipulation for this Data is completed...
#-------------------------------
#Lets plot the Graphs:
# Plotting Corrplot Graph:
#install.packages('corrplot) :
library(corrplot)
corrplot(cor(db[2:9]),method = 'square')
# Using different type and method functions:
corrplot(cor(db[2:9]),method = 'number',type='upper')
#--------------------------------
# Re-reading the file :
db <- read.csv("india-pop.csv")
View(db)
# Adding a new column named Regions :
db$Regions <- "NA"
# defining values :
db$Regions[which(db$States %in% c("Delhi","J & K","Punjab","Himachal Pradesh","Uttarakhand","Haryana"))]<- "Northern India"
db$Regions[which(db$States %in% c("Rajasthan","Goa","Gujarat","Maharashtra","Madhya Pradesh"))]<- "Western India"
db$Regions[which(db$States %in% c("Karnataka","Telangana","Andra Pradesh","Tamil Nadu","Kerala"))]<- "Southern India"
db$Regions[which(db$States %in% c("Uttar Pradesh","Chhattisgarh","Odisha","Jharkhand","Bihar","West Bengal"))]<- "Central India"
db$Regions[which(db$States %in% c("Assam","Meghalaya","Tripura","Mizoram","Manipur","Nagaland","Arunachal Pradesh","Sikkim"))]<- "Eastern India"
View(db)
# Now we will plot the Factors that can affect Population:
# Plot for sex ratio :
box <- ggplot(db, aes(x=Regions, y=Sex.Ratio,fill=Regions))
# boxplot() With legend:
box + geom_boxplot() + geom_jitter(aes(color=States),size=1.0) + ggtitle("Sex Ratio of states in Sub-Region") + coord_flip() + ylab("Sex Ratio in terms (1:1000) women to mens") + xlab("Regions in India")
# boxplot() Without Legend :
box + geom_boxplot() + geom_jitter(aes(color=States),size=1.0) +ggtitle("Sex Ratio of states in Sub-Region") + coord_flip() + ylab("Sex Ratio in terms (1:1000) women to mens") + xlab("Regions in India") +theme(legend.position = "none")
# Plot for Area and Density of the Region :
ggplot(db, aes(y=Area.km2,x=Density..km2)) + geom_point(aes(color=Regions),size=3,alpha=0.8) + geom_smooth(aes(color=States,fill=States) ,method="lm",fullrange=T) + facet_wrap(~Regions) + theme_bw() + ggtitle("Area(km2) vs Density(km2) rate in Regions of India") + ylab("Area (in km2)") + xlab("Density (in km2)")
#Rural population plots :
ggplot(db,aes(x=States,y=Rural.Population,color=States)) + geom_point(size=2) + ggtitle("Plots for Urban and Rural Population in each States") + geom_jitter(aes(x=Urban.population),color="black",legend=TRUE) + xlab("States") + ylab("Population (Rural and Urban")
# colourfull plots are for " Rural Population " and " Black for Urban "
# plotting Birth vs Death Rate plots :
ggplot(db, aes(y=Birth.Rate,x=Death.Rate)) + geom_point(aes(color=Regions),size=3,alpha=0.8) + geom_smooth(aes(color=Regions,fill=Regions) ,method="lm",fullrange=T) + facet_wrap(~Regions) + theme_bw() + ggtitle("Birth vs Death rate in Regions of India") + ylab("Birth Rates") + xlab("Death Rates")
# smooth plot for birth and death rate :
ggplot(db,aes(x=Birth.Rate,y=Death.Rate)) + geom_smooth(aes(color=Regions),model="glm",alpha=0.8,size=1,position="identity",se=FALSE) +ggtitle("Birth vs Death rate in Regions of India") + ylab("Birth Rates") + xlab("Death Rates")
# Now the plot of factors is completed...
#------------------------------------------------
# Past Year Records of Population for India:
# Data set containing set of past years :
db <- read.csv("india.csv")
View(db)
# Lets plot the graphs :
library(ggplot2)
# Passing data to Plot:
plot <- ggplot(db,aes(x=Year,y=TotalPopulation,label=rownames(db)))
# Plotting Graph :
plot + geom_text(aes(label=Year),hjust=-0.1,angle=80) + geom_point() + geom_smooth(model=lm) + ggtitle("Past Year Records in India") + xlab("Years") + ylab("Population (in Crore)")
#-----------------------------------------------------
# Creating new Segments to simplify our plots:
db$Segment <-"NA"
# defining values :
db$Segment[which(db$Year%in% c(db$Year[1:9]))]<- c("Segment 2")
db$Segment[which(db$Year%in% c(db$Year[10:19]))] <- c("Segment 1")
View(db)
# Plotting for GrowthRate and Density for Past Years :
ggplot(db, aes(x=Density,y=GrowthRate)) + geom_point(aes(color=Year),size=2,alpha=0.8,color="black") + geom_smooth(aes(color=Year,fill=Year) ,method="lm",fullrange=T) + facet_wrap(~Segment) + theme_bw() + ggtitle("Past Year Records (1950-2020)") + ylab("Growth Rates") + xlab("Density")
# segment 1 = 1950-2000
# segment 2 =2001-2020
#-----------------------------------------------
# Let's see continental Graphs
# Continental Bargraph:
db <- read.csv("continent.csv")
View(db)
library(rworldmap)
library(ggplot2)
# Bar graph :
ggplot(db,aes(x=Name,y=Pop ,fill= Name)) +geom_bar(stat="identity") + ggtitle("World Population (on basis of Continents)") + ylab("Population (in Billion)") + xlab("Continents")
#----------------------------------------------------
#
# Sub-contonental graphs :
population <- read.csv("sub-continental.csv")
population <- population[-1,]
View(population)
library(ggplot2)
ggplot(population,aes(x=Name,y=Pop ,fill= Name)) +geom_bar(stat="identity") + ggtitle("World Population (on basis of Sub-Continents)") + ylab("Population (in Billion)") + xlab(" Sub-Continents")
#Growth rate :
ggplot(population,aes(x=Name,y=Pop ,fill= GrowthRate)) +geom_bar(stat="identity") + ggtitle("World Population (on basis of Sub-Continents)") + ylab("Population (in Billion)") + xlab(" Sub-Continents")
#--------------------------------------------------------
#
# World Map on population :
db1 <- read.csv("country_wise_population.csv")
View(db1)
#this was 2020 projection :
d <- data.frame(country=db1$name, value=db1$pop2020)
#View(d)
n <- joinCountryData2Map(d, joinCode ="NAME", nameJoinColumn = "country" )
#View(n)
mapCountryData(n, nameColumnToPlot = "value",mapTitle= "World Population Map 2020", colourPalette="terrain")
# World map projection on Growth Rate:
d <- data.frame(country=db1$name, value=db1$GrowthRate)
#View(d)
n <- joinCountryData2Map(d, joinCode ="NAME", nameJoinColumn = "country" )
#View(n)
mapCountryData(n, nameColumnToPlot = "value",mapTitle= "Growth Rate of Population in World", colourPalette="terrain")
# World Projection for population 1970 :
# similarly we can plot data of 2000
db1 <- read.csv("country_wise_population.csv")
View(db1)
d <- data.frame(country=db1$name, value=db1$pop2000)
#View(d)
n <- joinCountryData2Map(d, joinCode ="NAME", nameJoinColumn = "country" )
#View(n)
mapCountryData(n, nameColumnToPlot = "value",mapTitle= "World Population Map 2000", colourPalette="terrain")