source: consulta_publica/vsm/examples/church-corpus-beagle.ipynb @ 8edb1df

baseconstituyenteestudiantesgeneralplan_patriasala
Last change on this file since 8edb1df was 0ff122b, checked in by rudmanmrrod <rudman22@…>, 7 años ago

Agregado módulo de gestión de perfiles de procesamiento, incorporado el módulo de visualización de modelado de tópicos

  • Propiedad mode establecida a 100644
File size: 7.8 KB
Línea 
1{
2 "metadata": {
3  "name": "",
4  "signature": "sha256:64ae4d65acccf80ba7ddc89b3b5c48be726e91cc3d5ec52f15071cae5adbd93a"
5 },
6 "nbformat": 3,
7 "nbformat_minor": 0,
8 "worksheets": [
9  {
10   "cells": [
11    {
12     "cell_type": "code",
13     "collapsed": false,
14     "input": [
15      "from vsm import *\n",
16      "from vsm.extensions.corpusbuilders import toy_corpus\n",
17      "\n",
18      "\n",
19      "plain_corpus = \"\"\"\n",
20      "His theology challenged the Pope of the Roman Catholic Church by\n",
21      "teaching that the Bible is the only source of divinely revealed\n",
22      "knowledge.\n",
23      "\n",
24      "Augustine is held in the Catholic Church to be the model teacher.\n",
25      "\n",
26      "Augustine was recognized as a Doctor of the Church by Pope Boniface\n",
27      "VIII.\n",
28      "\n",
29      "Roman Catholic theology stated that faith alone cannot justify man.\n",
30      "\n",
31      "In the Catholic Church the Pope is regarded as the successor of Saint\n",
32      "Peter.\n",
33      "\n",
34      "Alonzo Church was an American mathematician and logician who made\n",
35      "major contributions to mathematical logic and the foundations of\n",
36      "theoretical computer science.\n",
37      "\n",
38      "The lambda calculus was introduced by mathematician Alonzo Church as\n",
39      "an investigation into the foundations of mathematics.\n",
40      "\n",
41      "The Church Turing thesis states that a function is algorithmically\n",
42      "computable if and only if it is computable by a Turing machine.\n",
43      "\n",
44      "Mathematical logic has close connections to the foundations of\n",
45      "mathematics, theoretical computer science.\n",
46      "\n",
47      "A Turing machine can be adapted to simulate the logic of any computer\n",
48      "algorithm.\n",
49      "\"\"\"\n",
50      "\n",
51      "metadata = ['Ecclesiastical ' + str(i) for i in xrange(1, 6)]\n",
52      "\n",
53      "metadata += ['Logic ' + str(i) for i in xrange(1, 6)]\n",
54      "\n",
55      "env_c = toy_corpus(plain_corpus, nltk_stop=False, metadata=metadata)\n",
56      "c = toy_corpus(plain_corpus, nltk_stop=True, metadata=metadata)\n",
57      "\n",
58      "e = BeagleEnvironment(env_c, context_type='document')\n",
59      "e.train()\n",
60      "\n",
61      "m = BeagleContextSeq(c, env_c, e.matrix, context_type='document')\n",
62      "m.train()\n",
63      "\n",
64      "v = BeagleViewer(c, m)"
65     ],
66     "language": "python",
67     "metadata": {},
68     "outputs": [],
69     "prompt_number": 1
70    },
71    {
72     "cell_type": "code",
73     "collapsed": false,
74     "input": [
75      "v.dist_word_word('logic', print_len=24)"
76     ],
77     "language": "python",
78     "metadata": {},
79     "outputs": [
80      {
81       "html": [
82        "<table style=\"margin: 0\"><tr><th style=\"text-align: center; background: #CEE3F6\" colspan                    =\"4\">Words: logic</th></tr><tr><th style=\"text-align: center; background: #EFF2FB; \">Word                    </th><th style=\"text-align: center; background: #EFF2FB; \">Distance                    </th><th style=\"text-align: center; background: #EFF2FB; \">Word                    </th><th style=\"text-align: center; background: #EFF2FB; \">Distance                    </th></tr><tr><td>logic               </td><td>0.00000   </td><td>connections         </td><td>0.76702   </td></tr><tr><td>mathematical        </td><td>0.61011   </td><td>alonzo              </td><td>0.80445   </td></tr><tr><td>theoretical         </td><td>0.61208   </td><td>mathematician       </td><td>0.80459   </td></tr><tr><td>science             </td><td>0.62316   </td><td>mathematics         </td><td>0.83869   </td></tr><tr><td>major               </td><td>0.66616   </td><td>church              </td><td>1.10103   </td></tr><tr><td>logician            </td><td>0.67041   </td><td>adapted             </td><td>1.10351   </td></tr><tr><td>computer            </td><td>0.67150   </td><td>algorithm           </td><td>1.11369   </td></tr><tr><td>contributions       </td><td>0.67310   </td><td>simulate            </td><td>1.11534   </td></tr><tr><td>american            </td><td>0.67989   </td><td>introduced          </td><td>1.21524   </td></tr><tr><td>made                </td><td>0.68400   </td><td>lambda              </td><td>1.22073   </td></tr><tr><td>foundations         </td><td>0.71923   </td><td>calculus            </td><td>1.22433   </td></tr><tr><td>close               </td><td>0.75850   </td><td>investigation       </td><td>1.22615   </td></tr></table>"
83       ],
84       "metadata": {},
85       "output_type": "pyout",
86       "prompt_number": 2,
87       "text": [
88        "LabeledColumn([('logic', 0.0), ('mathematical', 0.6101055862083946),\n",
89        "       ('theoretical', 0.6120753393846052),\n",
90        "       ('science', 0.6231596204347992), ('major', 0.6661561305085367),\n",
91        "       ('logician', 0.6704136760448762), ('computer', 0.6714984183058912),\n",
92        "       ('contributions', 0.6731025516642305),\n",
93        "       ('american', 0.6798934692261327), ('made', 0.6839999625506846),\n",
94        "       ('foundations', 0.719229991642062), ('close', 0.7585014156843009),\n",
95        "       ('connections', 0.767018583921025), ('alonzo', 0.8044512289909166),\n",
96        "       ('mathematician', 0.8045906177632307),\n",
97        "       ('mathematics', 0.8386881603326829), ('church', 1.1010315856023223),\n",
98        "       ('adapted', 1.1035113489503063), ('algorithm', 1.1136939203005587),\n",
99        "       ('simulate', 1.1153373791273127),\n",
100        "       ('introduced', 1.2152425500248623), ('lambda', 1.220733368628201),\n",
101        "       ('calculus', 1.2243342122688632),\n",
102        "       ('investigation', 1.2261465306164703),\n",
103        "       ('machine', 1.269417682514073), ('turing', 1.3211064581969558),\n",
104        "       ('computable', 1.429471397103359),\n",
105        "       ('algorithmically', 1.438249529963765),\n",
106        "       ('thesis', 1.4407465300952085), ('function', 1.4428796239962542),\n",
107        "       ('states', 1.451198974678721), ('regarded', 1.48288688714894),\n",
108        "       ('augustine', 1.4839206355080299), ('pope', 1.4901360299448079),\n",
109        "       ('held', 1.4955110549432036), ('successor', 1.4994001138838537),\n",
110        "       ('teacher', 1.5002618508131433), ('model', 1.508414402427858),\n",
111        "       ('peter', 1.5097737324875613), ('catholic', 1.5182696911494529),\n",
112        "       ('saint', 1.5200635742191289), ('recognized', 1.5298638726650464),\n",
113        "       ('doctor', 1.5299486476988773), ('viii', 1.5313652271508718),\n",
114        "       ('source', 1.5358331193775343), ('boniface', 1.537475215267472),\n",
115        "       ('revealed', 1.5390566920932474), ('bible', 1.539912865371827),\n",
116        "       ('challenged', 1.5416800657102432),\n",
117        "       ('divinely', 1.5443999441008813), ('teaching', 1.545039019845761),\n",
118        "       ('knowledge', 1.549882372801595), ('theology', 1.5685637414507483),\n",
119        "       ('roman', 1.577005531262446), ('justify', 1.5849890563012603),\n",
120        "       ('alone', 1.5874528497618008), ('stated', 1.5911502226284824),\n",
121        "       ('man', 1.596215557394277), ('faith', 1.5978268253734298)], \n",
122        "      dtype=[('word', '|S15'), ('value', '<f8')])"
123       ]
124      }
125     ],
126     "prompt_number": 2
127    },
128    {
129     "cell_type": "code",
130     "collapsed": false,
131     "input": [
132      "np.around(v.dismat_word(['logic','church','catholic','pope']), decimals=2)"
133     ],
134     "language": "python",
135     "metadata": {},
136     "outputs": [
137      {
138       "metadata": {},
139       "output_type": "pyout",
140       "prompt_number": 3,
141       "text": [
142        "array([[ 0.  ,  1.1 ,  1.52,  1.49],\n",
143        "       [ 1.1 ,  0.  ,  1.11,  1.06],\n",
144        "       [ 1.52,  1.11,  0.  ,  0.8 ],\n",
145        "       [ 1.49,  1.06,  0.8 ,  0.  ]])"
146       ]
147      }
148     ],
149     "prompt_number": 3
150    }
151   ],
152   "metadata": {}
153  }
154 ]
155}
Nota: Vea TracBrowser para ayuda de uso del navegador del repositorio.