12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- #!/usr/bin/env python3
- #------------------------------------------------------------------------#
- # "THE BEER-WARE LICENSE" (Revision 42): #
- # <xry111@mengyan1223.wang> wrote this file. As long as you retain this #
- # notice you can do whatever you want with this stuff. If we meet some #
- # day, and you think this stuff is worth it, you can buy me a beer in #
- # return. #
- # Xi Ruoyao #
- #------------------------------------------------------------------------#
- import argparse
- import jieba
- import jieba.posseg as posseg
- import sys
- from collections import Counter
- from wordcloud import WordCloud
- def readfile(fn):
- f = open(fn)
- s = ''
- for line in f:
- s = s + line
- f.close()
- return s
- def cnt(words, parts):
- if not parts is None:
- S = set()
- pt = parts.split(',')
- for x in pt:
- S.add(x)
- w = []
- for x in words:
- if parts is None or x.flag in S:
- w.append(x.word)
- return Counter(w)
- def main():
- parser = argparse.ArgumentParser()
- parser.add_argument("-i", "--input", help = "input file name")
- parser.add_argument("-p", "--parts",
- help = "the parts you want of speech, seperated by comma")
- parser.add_argument("-j", "--jobs", type = int,
- help = "the number of parallel jobs")
- parser.add_argument("-t", "--top", type = int,
- help = "the number of words you want to show on the cloud")
- parser.add_argument("-o", "--output", help = "output image file name")
- parser.add_argument("-f", "--fontpath",
- help = "the path to the TTF/OTF fonts to use")
- parser.add_argument("-d", "--debug", type=bool, default=False,
- help = "if you want to dump debug information")
- parser.add_argument("-m", "--mask",
- help = "the path to the mask image")
- parser.add_argument("-c", "--contour", type = int, default = 0,
- help = "the width of contour if you want it")
- args = parser.parse_args()
- if not args.jobs is None:
- jieba.enable_parallel(args.jobs)
- if args.input is None:
- parser.print_help()
- return 1
- s = readfile(args.input)
- words = posseg.cut(s)
- c = cnt(words, args.parts)
- if not args.top is None:
- c = dict(c.most_common(args.top))
- if args.debug:
- print(c)
- mask = None
- if not args.mask is None:
- from PIL import Image
- import numpy as np
- mask = np.array(Image.open(args.mask))
- wc = WordCloud(font_path = args.fontpath,
- mask = mask,
- contour_width = args.contour,
- contour_color = "steelblue").generate_from_frequencies(c)
- wc.to_image().save(args.output)
- if __name__ == "__main__":
- sys.exit(main())
|