Initial import: Music_Server, MusicFree, catalog-sync
This commit is contained in:
@@ -0,0 +1,118 @@
|
||||
'''
|
||||
Function:
|
||||
Implementation of SingerLyricsAnalysis
|
||||
Author:
|
||||
Zhenchao Jin
|
||||
WeChat Official Account (微信公众号):
|
||||
Charles的皮卡丘
|
||||
'''
|
||||
import os
|
||||
import jieba
|
||||
import pickle
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from snownlp import SnowNLP
|
||||
from wordcloud import WordCloud
|
||||
from musicdl.modules.sources import MiguMusicClient
|
||||
|
||||
|
||||
'''SingerLyricsAnalysis'''
|
||||
class SingerLyricsAnalysis():
|
||||
def __init__(self):
|
||||
self.root_dir = os.path.split(os.path.abspath(__file__))[0]
|
||||
self.music_client = MiguMusicClient(search_size_per_source=2000)
|
||||
'''start'''
|
||||
def start(self):
|
||||
while True:
|
||||
singer_name = input('Input singer to analyze: ')
|
||||
print(f'[INFO]: Searching {singer_name}')
|
||||
infos = self.crawler(singer_name)
|
||||
print(f'[INFO]: Analyzing {singer_name}')
|
||||
self.analysis(infos)
|
||||
'''crawler'''
|
||||
def crawler(self, singer_name):
|
||||
song_infos = self.music_client.search(keyword=singer_name)
|
||||
self.save(singer_name=singer_name, song_infos=song_infos)
|
||||
return song_infos
|
||||
'''analysis'''
|
||||
def analysis(self, song_infos):
|
||||
# data clean
|
||||
lyrics = []
|
||||
for song_info in song_infos:
|
||||
lyric = song_info['lyric']
|
||||
lyric = lyric.split('\r\n')
|
||||
lyric_filtered = []
|
||||
for sentence in lyric:
|
||||
sentence = sentence[10:]
|
||||
if (not sentence) or (':' in sentence) or (self.root_dir in sentence) or ('[' in sentence) or (']' in sentence) or ('歌曲' in sentence): continue
|
||||
lyric_filtered.append(sentence)
|
||||
lyrics += lyric_filtered
|
||||
# generatewordcloud
|
||||
words_dict = {}
|
||||
for sentence in lyrics:
|
||||
words = jieba.cut(sentence)
|
||||
for word in words:
|
||||
word = word.strip()
|
||||
if not word: continue
|
||||
if len(word) < 2: continue
|
||||
if word in words_dict: words_dict[word] += 1
|
||||
else: words_dict[word] = 1
|
||||
words_freq_sorted = sorted(words_dict.items(), key=lambda item: item[1])
|
||||
words_freq_top10 = words_freq_sorted[-10:]
|
||||
self.generatewordcloud(words_dict)
|
||||
self.drawbar('%s歌曲中的词语TOP10' % self.root_dir, words_freq_top10)
|
||||
# nlp analysis
|
||||
nlp_dict = {'内容极度负面': 0, '内容较为负面': 0, '内容中性': 0, '内容较为正面': 0, '内容非常正面': 0}
|
||||
for sentence in lyrics:
|
||||
score = SnowNLP(sentence).sentiments
|
||||
if score < 0.2:
|
||||
nlp_dict['内容极度负面'] += 1
|
||||
elif score >= 0.2 and score < 0.4:
|
||||
nlp_dict['内容较为负面'] += 1
|
||||
elif score >= 0.4 and score < 0.6:
|
||||
nlp_dict['内容中性'] += 1
|
||||
elif score >= 0.6 and score < 0.8:
|
||||
nlp_dict['内容较为正面'] += 1
|
||||
else:
|
||||
nlp_dict['内容非常正面'] += 1
|
||||
self.drawpie('%s的歌词情感分析' % self.root_dir, nlp_dict)
|
||||
'''drawbar'''
|
||||
def drawbar(self, title, infos):
|
||||
from pyecharts.charts import Bar
|
||||
from pyecharts import options as opts
|
||||
from pyecharts.globals import ThemeType
|
||||
bar = Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
|
||||
bar.add_xaxis([item[0] for item in infos])
|
||||
bar.add_yaxis('freq', [item[1] for item in infos])
|
||||
bar.set_global_opts(title_opts=opts.TitleOpts(title=title))
|
||||
bar.render(os.path.join(self.root_dir, title+'.html'))
|
||||
'''drawpie'''
|
||||
def drawpie(self, title, infos):
|
||||
from pyecharts.charts import Pie
|
||||
from pyecharts import options as opts
|
||||
pie = Pie(init_opts=dict(theme='westeros', page_title=title)).add(title, data_pair=tuple(zip(infos.keys(), infos.values())), rosetype='area')
|
||||
pie.set_global_opts(title_opts=opts.TitleOpts(title=title))
|
||||
pie.render(os.path.join(self.root_dir, '%s.html' % title))
|
||||
'''generatewordcloud'''
|
||||
def generatewordcloud(self, infos):
|
||||
mask = Image.open(os.path.join(self.root_dir, 'resources/mask.jpg'))
|
||||
mask = np.array(mask)
|
||||
wc = WordCloud(background_color='white', font_path=os.path.join(self.root_dir, 'resources/font_cn.TTF'), mask=mask)
|
||||
result = wc.generate_from_frequencies(infos)
|
||||
result.to_file(os.path.join(self.root_dir, 'wordcloud.png'))
|
||||
'''save'''
|
||||
def save(self, song_infos, singer_name):
|
||||
data_save_path = os.path.join(self.root_dir, f'song_infos_{singer_name}.pkl')
|
||||
with open(data_save_path, 'wb') as fp:
|
||||
pickle.dump(song_infos, fp)
|
||||
'''load'''
|
||||
def load(self, singer_name):
|
||||
data_save_path = os.path.join(self.root_dir, f'song_infos_{singer_name}.pkl')
|
||||
fp = open(data_save_path, 'rb')
|
||||
return pickle.load(fp)
|
||||
|
||||
|
||||
'''tests'''
|
||||
if __name__ == '__main__':
|
||||
client = SingerLyricsAnalysis()
|
||||
client.start()
|
||||
Reference in New Issue
Block a user