[0ff122b] | 1 | # -*- coding: utf-8 -*- |
---|
| 2 | """ |
---|
| 3 | Sistema de Modelado de Tópicos |
---|
| 4 | |
---|
| 5 | Copyleft (@) 2014 CENDITEL nodo Mérida - https://planificacion.cenditel.gob.ve/trac/ |
---|
| 6 | """ |
---|
| 7 | ## @package django_topic_explorer.utils |
---|
| 8 | # |
---|
| 9 | # Métodos para el trato de json |
---|
| 10 | # @author Jorge Redondo (jredondo at cenditel.gob.ve) |
---|
| 11 | # @author <a href='http://www.cenditel.gob.ve'>Centro Nacional de Desarrollo e Investigación en TecnologÃas Libres |
---|
| 12 | # (CENDITEL) nodo Mérida - Venezuela</a> |
---|
| 13 | # @copyright <a href='http://www.gnu.org/licenses/gpl-2.0.html'>GNU Public License versión 2 (GPLv2)</a> |
---|
| 14 | # @version 1.3 |
---|
| 15 | from utils import colorlib |
---|
| 16 | import itertools |
---|
| 17 | |
---|
| 18 | label = lambda x: x |
---|
| 19 | |
---|
| 20 | def populateJson(lda_v): |
---|
| 21 | """! |
---|
| 22 | Función para cargar un json |
---|
| 23 | |
---|
| 24 | @author Jorge Redondo (jredondo at cenditel.gob.ve) |
---|
| 25 | @copyright GNU/GPLv2 |
---|
| 26 | @param lda_v <b>{object}</b> Recibe el objeto del lda |
---|
| 27 | @return Retorna un json |
---|
| 28 | """ |
---|
| 29 | # populate entropy values |
---|
| 30 | data = lda_v.topic_oscillations() |
---|
| 31 | |
---|
| 32 | colors = [itertools.cycle(cs) for cs in zip(*colorlib.brew(3,n_cls=4))] |
---|
| 33 | factor = len(data) / len(colors) |
---|
| 34 | |
---|
| 35 | js = {} |
---|
| 36 | for rank,topic_H in enumerate(data): |
---|
| 37 | topic, H = topic_H |
---|
| 38 | js[str(topic)] = { |
---|
| 39 | "H" : H, |
---|
| 40 | "color" : colors[min(rank / factor, len(colors)-1)].next() |
---|
| 41 | } |
---|
| 42 | |
---|
| 43 | # populate word values |
---|
| 44 | data = lda_v.topics() |
---|
| 45 | for i,topic in enumerate(data): |
---|
| 46 | js[str(i)].update({'words' : dict([(w, p) for w,p in topic[:20]])}) |
---|
| 47 | return js |
---|
| 48 | |
---|
| 49 | |
---|
| 50 | def doc_json(lda_v,doc_id,N=40): |
---|
| 51 | """! |
---|
| 52 | Función para cargar los documentos y sus tópicos en un json |
---|
| 53 | |
---|
| 54 | @author Jorge Redondo (jredondo at cenditel.gob.ve) |
---|
| 55 | @copyright GNU/GPLv2 |
---|
| 56 | @param lda_v <b>{object}</b> Recibe el objeto del lda |
---|
| 57 | @param doc_id <b>{string}</b> Recibe el id del documento |
---|
| 58 | @param N <b>{object}</b> Recibe la cantidad |
---|
| 59 | @return Retorna un json |
---|
| 60 | """ |
---|
| 61 | if doc_id!='null': |
---|
| 62 | try: |
---|
| 63 | if N > 0: |
---|
| 64 | data = lda_v.dist_doc_doc(doc_id)[:N] |
---|
| 65 | else: |
---|
| 66 | data = lda_v.dist_doc_doc(doc_id)[N:] |
---|
| 67 | data = reversed(data) |
---|
| 68 | #data = docs_data(lda_v,doc_id,N) |
---|
| 69 | docs = [doc for doc,prob in data] |
---|
| 70 | doc_topics_mat = lda_v.doc_topics(docs) |
---|
| 71 | |
---|
| 72 | js = [] |
---|
| 73 | for doc_prob, topics in zip(data, doc_topics_mat): |
---|
| 74 | doc, prob = doc_prob |
---|
| 75 | js.append({'doc' : doc, 'label': label(doc), 'prob' : 1-prob, |
---|
| 76 | 'topics' : dict([(str(t), p) for t,p in topics])}) |
---|
| 77 | return js |
---|
| 78 | except: |
---|
| 79 | import sys,traceback |
---|
| 80 | exc_type, exc_value, exc_traceback = sys.exc_info() |
---|
| 81 | print "*** print_tb:" |
---|
| 82 | traceback.print_tb(exc_traceback, limit=1, file=sys.stdout) |
---|
| 83 | print "*** print_exception:" |
---|
| 84 | traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout) |
---|
| 85 | return "error" |
---|
| 86 | |
---|
| 87 | |
---|
| 88 | |
---|
| 89 | |
---|