1 | # -*- coding: utf-8 -*- |
---|
2 | """ |
---|
3 | Sistema de Modelado de Tópicos |
---|
4 | |
---|
5 | Copyleft (@) 2014 CENDITEL nodo Mérida - https://planificacion.cenditel.gob.ve/trac/ |
---|
6 | """ |
---|
7 | ## @package django_topic_explorer.utils |
---|
8 | # |
---|
9 | # Métodos para el trato de json |
---|
10 | # @author Jorge Redondo (jredondo at cenditel.gob.ve) |
---|
11 | # @author <a href='http://www.cenditel.gob.ve'>Centro Nacional de Desarrollo e Investigación en TecnologÃas Libres |
---|
12 | # (CENDITEL) nodo Mérida - Venezuela</a> |
---|
13 | # @copyright <a href='http://www.gnu.org/licenses/gpl-2.0.html'>GNU Public License versión 2 (GPLv2)</a> |
---|
14 | # @version 1.3 |
---|
15 | from utils import colorlib |
---|
16 | import itertools |
---|
17 | |
---|
18 | label = lambda x: x |
---|
19 | |
---|
20 | def populateJson(lda_v): |
---|
21 | """! |
---|
22 | Función para cargar un json |
---|
23 | |
---|
24 | @author Jorge Redondo (jredondo at cenditel.gob.ve) |
---|
25 | @copyright GNU/GPLv2 |
---|
26 | @param lda_v <b>{object}</b> Recibe el objeto del lda |
---|
27 | @return Retorna un json |
---|
28 | """ |
---|
29 | # populate entropy values |
---|
30 | data = lda_v.topic_oscillations() |
---|
31 | |
---|
32 | colors = [itertools.cycle(cs) for cs in zip(*colorlib.brew(3,n_cls=4))] |
---|
33 | factor = len(data) / len(colors) |
---|
34 | |
---|
35 | js = {} |
---|
36 | for rank,topic_H in enumerate(data): |
---|
37 | topic, H = topic_H |
---|
38 | js[str(topic)] = { |
---|
39 | "H" : H, |
---|
40 | "color" : colors[min(rank / factor, len(colors)-1)].next() |
---|
41 | } |
---|
42 | |
---|
43 | # populate word values |
---|
44 | data = lda_v.topics() |
---|
45 | for i,topic in enumerate(data): |
---|
46 | js[str(i)].update({'words' : dict([(w, p) for w,p in topic[:20]])}) |
---|
47 | return js |
---|
48 | |
---|
49 | |
---|
50 | def doc_json(lda_v,doc_id,N=40): |
---|
51 | """! |
---|
52 | Función para cargar los documentos y sus tópicos en un json |
---|
53 | |
---|
54 | @author Jorge Redondo (jredondo at cenditel.gob.ve) |
---|
55 | @copyright GNU/GPLv2 |
---|
56 | @param lda_v <b>{object}</b> Recibe el objeto del lda |
---|
57 | @param doc_id <b>{string}</b> Recibe el id del documento |
---|
58 | @param N <b>{object}</b> Recibe la cantidad |
---|
59 | @return Retorna un json |
---|
60 | """ |
---|
61 | if doc_id!='null': |
---|
62 | try: |
---|
63 | if N > 0: |
---|
64 | data = lda_v.dist_doc_doc(doc_id)[:N] |
---|
65 | else: |
---|
66 | data = lda_v.dist_doc_doc(doc_id)[N:] |
---|
67 | data = reversed(data) |
---|
68 | #data = docs_data(lda_v,doc_id,N) |
---|
69 | docs = [doc for doc,prob in data] |
---|
70 | doc_topics_mat = lda_v.doc_topics(docs) |
---|
71 | |
---|
72 | js = [] |
---|
73 | for doc_prob, topics in zip(data, doc_topics_mat): |
---|
74 | doc, prob = doc_prob |
---|
75 | js.append({'doc' : doc, 'label': label(doc), 'prob' : 1-prob, |
---|
76 | 'topics' : dict([(str(t), p) for t,p in topics])}) |
---|
77 | return js |
---|
78 | except: |
---|
79 | import sys,traceback |
---|
80 | exc_type, exc_value, exc_traceback = sys.exc_info() |
---|
81 | print "*** print_tb:" |
---|
82 | traceback.print_tb(exc_traceback, limit=1, file=sys.stdout) |
---|
83 | print "*** print_exception:" |
---|
84 | traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout) |
---|
85 | return "error" |
---|
86 | |
---|
87 | |
---|
88 | |
---|
89 | |
---|