-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlyricsMining.R
111 lines (77 loc) · 3.52 KB
/
lyricsMining.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
devtools::install_github("mjockers/syuzhet")
install.packages("textcat")
library(ggplot2)
library(tm)
library(wordcloud)
library(syuzhet)
library(SnowballC)
library(textcat)
texts <-file("/Users/PP/Desktop/vs/basicData-A~Z/song_labels_R.txt","r")##texts is the whole file
outfile <- file("/Users/PP/Desktop/vs/Output_R/songR_labels_R.txt", "w")
line<-readLines(texts,n=1)
##paste(line)
while( length(line) != 0) {
line1 <- strsplit(line,",")
LyricsLine <- line1[[1]][3]##find the lyrics part
ArtistLine <- line1[[1]][1]
SongNameLine <- line1[[1]][2]
if((!is.na(textcat(LyricsLine)))&&(textcat(LyricsLine) == 'english')&&(length(LyricsLine)>=1)){
##print(LyricsLine)
docs <- Corpus(VectorSource(LyricsLine))##read the text
docs <- tm_map(docs, removePunctuation)
docs <- tm_map(docs, removeNumbers)
docs <- tm_map(docs, tolower)
docs <- tm_map(docs, removeWords, stopwords("english"))
## docs <- tm_map(docs, removeWords, c("black", "bones"))
docs <- tm_map(docs, stemDocument)
docs <- tm_map(docs, stripWhitespace)
docs <- tm_map(docs, PlainTextDocument)
######build TDM
dtm <- TermDocumentMatrix(docs)
mat <- as.matrix(dtm)
v <- sort(rowSums(mat),decreasing = TRUE)
d <- data.frame(word = names(v),freq = v)
##head (d,10)##get top 10 frequent words
topWords <- as.character(head(d,10)$word)##get the first row
#
#paste0(topWords)
##while(length(topWords)>1){
Sentiment <- get_nrc_sentiment(LyricsLine)
text <- cbind(LyricsLine,Sentiment)
TotalSentiment <- data.frame(colSums(text[,c(2:11)]))
names(TotalSentiment) <- "count"
TotalSentiment <- cbind("sentiment" = rownames(TotalSentiment),TotalSentiment)
rownames(TotalSentiment) <- NULL
##ggplot(data = TotalSentiment, aes(x = sentiment, y = count))+
##geom_bar(aes(fill = sentiment), stat = "identity")+
##theme(legend.position = "none") +
##xlab("Sentiment") + ylab("Total Count") + ggtitle("Total Sentiment Score")
sentiText <- text[,c(2:11)]
DescSentiment <- TotalSentiment[order(TotalSentiment[,2],decreasing=T),]
topSentiment <- head(DescSentiment,5)[,1]
##print(topSentiment)
##score
totolScore <- sentiText[,1]+sentiText[,2]+sentiText[,3]+sentiText[,4]+sentiText[,5]+sentiText[,6]+sentiText[,7]+sentiText[,8]+sentiText[,9]+sentiText[,10]
positiveScore <- sentiText[,2]+sentiText[,5]+sentiText[,7]+sentiText[,8]+sentiText[,10]
SentiScore<- floor((positiveScore/totolScore*10))
joySentiment <- sentiText[,2]+sentiText[,5]+sentiText[,7]+sentiText[,8]
joyScore<- floor(joySentiment/totolScore*10)
sorrowSentiment <- sentiText[,6]+sentiText[,4]
sorrowScore<- floor(sorrowSentiment/totolScore*10)
surpriseScore <- floor(sentiText[,7]/totolScore*10)
angryScore <- floor(sentiText[,1]/totolScore*10)
######
totalResult <- c(paste0(ArtistLine),sep=",",paste0(SongNameLine),sep=",",
SentiScore,sep=",",
joyScore,sep=",",sorrowScore,sep=",",angryScore,sep=",",surpriseScore,sep=",",
paste0(topWords),sep="\n")
##paste0(totalResult)
##write.table(paste(totalResult, sep=" ", collapse=", "), "/Users/kein/Desktop/tryResult/Rdata1.txt", sep="\t")
cat(totalResult,file=outfile)
line<-readLines(texts,n=1)
}else{
line<-readLines(texts,n=1)
}
}
print ("Done")
##outfile <- file("/Users/PP/Desktop/vs/basicData-A~J/Done.txt", "w")