1 | { |
---|
2 | "metadata": { |
---|
3 | "name": "", |
---|
4 | "signature": "sha256:64ae4d65acccf80ba7ddc89b3b5c48be726e91cc3d5ec52f15071cae5adbd93a" |
---|
5 | }, |
---|
6 | "nbformat": 3, |
---|
7 | "nbformat_minor": 0, |
---|
8 | "worksheets": [ |
---|
9 | { |
---|
10 | "cells": [ |
---|
11 | { |
---|
12 | "cell_type": "code", |
---|
13 | "collapsed": false, |
---|
14 | "input": [ |
---|
15 | "from vsm import *\n", |
---|
16 | "from vsm.extensions.corpusbuilders import toy_corpus\n", |
---|
17 | "\n", |
---|
18 | "\n", |
---|
19 | "plain_corpus = \"\"\"\n", |
---|
20 | "His theology challenged the Pope of the Roman Catholic Church by\n", |
---|
21 | "teaching that the Bible is the only source of divinely revealed\n", |
---|
22 | "knowledge.\n", |
---|
23 | "\n", |
---|
24 | "Augustine is held in the Catholic Church to be the model teacher.\n", |
---|
25 | "\n", |
---|
26 | "Augustine was recognized as a Doctor of the Church by Pope Boniface\n", |
---|
27 | "VIII.\n", |
---|
28 | "\n", |
---|
29 | "Roman Catholic theology stated that faith alone cannot justify man.\n", |
---|
30 | "\n", |
---|
31 | "In the Catholic Church the Pope is regarded as the successor of Saint\n", |
---|
32 | "Peter.\n", |
---|
33 | "\n", |
---|
34 | "Alonzo Church was an American mathematician and logician who made\n", |
---|
35 | "major contributions to mathematical logic and the foundations of\n", |
---|
36 | "theoretical computer science.\n", |
---|
37 | "\n", |
---|
38 | "The lambda calculus was introduced by mathematician Alonzo Church as\n", |
---|
39 | "an investigation into the foundations of mathematics.\n", |
---|
40 | "\n", |
---|
41 | "The Church Turing thesis states that a function is algorithmically\n", |
---|
42 | "computable if and only if it is computable by a Turing machine.\n", |
---|
43 | "\n", |
---|
44 | "Mathematical logic has close connections to the foundations of\n", |
---|
45 | "mathematics, theoretical computer science.\n", |
---|
46 | "\n", |
---|
47 | "A Turing machine can be adapted to simulate the logic of any computer\n", |
---|
48 | "algorithm.\n", |
---|
49 | "\"\"\"\n", |
---|
50 | "\n", |
---|
51 | "metadata = ['Ecclesiastical ' + str(i) for i in xrange(1, 6)]\n", |
---|
52 | "\n", |
---|
53 | "metadata += ['Logic ' + str(i) for i in xrange(1, 6)]\n", |
---|
54 | "\n", |
---|
55 | "env_c = toy_corpus(plain_corpus, nltk_stop=False, metadata=metadata)\n", |
---|
56 | "c = toy_corpus(plain_corpus, nltk_stop=True, metadata=metadata)\n", |
---|
57 | "\n", |
---|
58 | "e = BeagleEnvironment(env_c, context_type='document')\n", |
---|
59 | "e.train()\n", |
---|
60 | "\n", |
---|
61 | "m = BeagleContextSeq(c, env_c, e.matrix, context_type='document')\n", |
---|
62 | "m.train()\n", |
---|
63 | "\n", |
---|
64 | "v = BeagleViewer(c, m)" |
---|
65 | ], |
---|
66 | "language": "python", |
---|
67 | "metadata": {}, |
---|
68 | "outputs": [], |
---|
69 | "prompt_number": 1 |
---|
70 | }, |
---|
71 | { |
---|
72 | "cell_type": "code", |
---|
73 | "collapsed": false, |
---|
74 | "input": [ |
---|
75 | "v.dist_word_word('logic', print_len=24)" |
---|
76 | ], |
---|
77 | "language": "python", |
---|
78 | "metadata": {}, |
---|
79 | "outputs": [ |
---|
80 | { |
---|
81 | "html": [ |
---|
82 | "<table style=\"margin: 0\"><tr><th style=\"text-align: center; background: #CEE3F6\" colspan =\"4\">Words: logic</th></tr><tr><th style=\"text-align: center; background: #EFF2FB; \">Word </th><th style=\"text-align: center; background: #EFF2FB; \">Distance </th><th style=\"text-align: center; background: #EFF2FB; \">Word </th><th style=\"text-align: center; background: #EFF2FB; \">Distance </th></tr><tr><td>logic </td><td>0.00000 </td><td>connections </td><td>0.76702 </td></tr><tr><td>mathematical </td><td>0.61011 </td><td>alonzo </td><td>0.80445 </td></tr><tr><td>theoretical </td><td>0.61208 </td><td>mathematician </td><td>0.80459 </td></tr><tr><td>science </td><td>0.62316 </td><td>mathematics </td><td>0.83869 </td></tr><tr><td>major </td><td>0.66616 </td><td>church </td><td>1.10103 </td></tr><tr><td>logician </td><td>0.67041 </td><td>adapted </td><td>1.10351 </td></tr><tr><td>computer </td><td>0.67150 </td><td>algorithm </td><td>1.11369 </td></tr><tr><td>contributions </td><td>0.67310 </td><td>simulate </td><td>1.11534 </td></tr><tr><td>american </td><td>0.67989 </td><td>introduced </td><td>1.21524 </td></tr><tr><td>made </td><td>0.68400 </td><td>lambda </td><td>1.22073 </td></tr><tr><td>foundations </td><td>0.71923 </td><td>calculus </td><td>1.22433 </td></tr><tr><td>close </td><td>0.75850 </td><td>investigation </td><td>1.22615 </td></tr></table>" |
---|
83 | ], |
---|
84 | "metadata": {}, |
---|
85 | "output_type": "pyout", |
---|
86 | "prompt_number": 2, |
---|
87 | "text": [ |
---|
88 | "LabeledColumn([('logic', 0.0), ('mathematical', 0.6101055862083946),\n", |
---|
89 | " ('theoretical', 0.6120753393846052),\n", |
---|
90 | " ('science', 0.6231596204347992), ('major', 0.6661561305085367),\n", |
---|
91 | " ('logician', 0.6704136760448762), ('computer', 0.6714984183058912),\n", |
---|
92 | " ('contributions', 0.6731025516642305),\n", |
---|
93 | " ('american', 0.6798934692261327), ('made', 0.6839999625506846),\n", |
---|
94 | " ('foundations', 0.719229991642062), ('close', 0.7585014156843009),\n", |
---|
95 | " ('connections', 0.767018583921025), ('alonzo', 0.8044512289909166),\n", |
---|
96 | " ('mathematician', 0.8045906177632307),\n", |
---|
97 | " ('mathematics', 0.8386881603326829), ('church', 1.1010315856023223),\n", |
---|
98 | " ('adapted', 1.1035113489503063), ('algorithm', 1.1136939203005587),\n", |
---|
99 | " ('simulate', 1.1153373791273127),\n", |
---|
100 | " ('introduced', 1.2152425500248623), ('lambda', 1.220733368628201),\n", |
---|
101 | " ('calculus', 1.2243342122688632),\n", |
---|
102 | " ('investigation', 1.2261465306164703),\n", |
---|
103 | " ('machine', 1.269417682514073), ('turing', 1.3211064581969558),\n", |
---|
104 | " ('computable', 1.429471397103359),\n", |
---|
105 | " ('algorithmically', 1.438249529963765),\n", |
---|
106 | " ('thesis', 1.4407465300952085), ('function', 1.4428796239962542),\n", |
---|
107 | " ('states', 1.451198974678721), ('regarded', 1.48288688714894),\n", |
---|
108 | " ('augustine', 1.4839206355080299), ('pope', 1.4901360299448079),\n", |
---|
109 | " ('held', 1.4955110549432036), ('successor', 1.4994001138838537),\n", |
---|
110 | " ('teacher', 1.5002618508131433), ('model', 1.508414402427858),\n", |
---|
111 | " ('peter', 1.5097737324875613), ('catholic', 1.5182696911494529),\n", |
---|
112 | " ('saint', 1.5200635742191289), ('recognized', 1.5298638726650464),\n", |
---|
113 | " ('doctor', 1.5299486476988773), ('viii', 1.5313652271508718),\n", |
---|
114 | " ('source', 1.5358331193775343), ('boniface', 1.537475215267472),\n", |
---|
115 | " ('revealed', 1.5390566920932474), ('bible', 1.539912865371827),\n", |
---|
116 | " ('challenged', 1.5416800657102432),\n", |
---|
117 | " ('divinely', 1.5443999441008813), ('teaching', 1.545039019845761),\n", |
---|
118 | " ('knowledge', 1.549882372801595), ('theology', 1.5685637414507483),\n", |
---|
119 | " ('roman', 1.577005531262446), ('justify', 1.5849890563012603),\n", |
---|
120 | " ('alone', 1.5874528497618008), ('stated', 1.5911502226284824),\n", |
---|
121 | " ('man', 1.596215557394277), ('faith', 1.5978268253734298)], \n", |
---|
122 | " dtype=[('word', '|S15'), ('value', '<f8')])" |
---|
123 | ] |
---|
124 | } |
---|
125 | ], |
---|
126 | "prompt_number": 2 |
---|
127 | }, |
---|
128 | { |
---|
129 | "cell_type": "code", |
---|
130 | "collapsed": false, |
---|
131 | "input": [ |
---|
132 | "np.around(v.dismat_word(['logic','church','catholic','pope']), decimals=2)" |
---|
133 | ], |
---|
134 | "language": "python", |
---|
135 | "metadata": {}, |
---|
136 | "outputs": [ |
---|
137 | { |
---|
138 | "metadata": {}, |
---|
139 | "output_type": "pyout", |
---|
140 | "prompt_number": 3, |
---|
141 | "text": [ |
---|
142 | "array([[ 0. , 1.1 , 1.52, 1.49],\n", |
---|
143 | " [ 1.1 , 0. , 1.11, 1.06],\n", |
---|
144 | " [ 1.52, 1.11, 0. , 0.8 ],\n", |
---|
145 | " [ 1.49, 1.06, 0.8 , 0. ]])" |
---|
146 | ] |
---|
147 | } |
---|
148 | ], |
---|
149 | "prompt_number": 3 |
---|
150 | } |
---|
151 | ], |
---|
152 | "metadata": {} |
---|
153 | } |
---|
154 | ] |
---|
155 | } |
---|