{ "metadata": { "name": "", "signature": "sha256:64ae4d65acccf80ba7ddc89b3b5c48be726e91cc3d5ec52f15071cae5adbd93a" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "from vsm import *\n", "from vsm.extensions.corpusbuilders import toy_corpus\n", "\n", "\n", "plain_corpus = \"\"\"\n", "His theology challenged the Pope of the Roman Catholic Church by\n", "teaching that the Bible is the only source of divinely revealed\n", "knowledge.\n", "\n", "Augustine is held in the Catholic Church to be the model teacher.\n", "\n", "Augustine was recognized as a Doctor of the Church by Pope Boniface\n", "VIII.\n", "\n", "Roman Catholic theology stated that faith alone cannot justify man.\n", "\n", "In the Catholic Church the Pope is regarded as the successor of Saint\n", "Peter.\n", "\n", "Alonzo Church was an American mathematician and logician who made\n", "major contributions to mathematical logic and the foundations of\n", "theoretical computer science.\n", "\n", "The lambda calculus was introduced by mathematician Alonzo Church as\n", "an investigation into the foundations of mathematics.\n", "\n", "The Church Turing thesis states that a function is algorithmically\n", "computable if and only if it is computable by a Turing machine.\n", "\n", "Mathematical logic has close connections to the foundations of\n", "mathematics, theoretical computer science.\n", "\n", "A Turing machine can be adapted to simulate the logic of any computer\n", "algorithm.\n", "\"\"\"\n", "\n", "metadata = ['Ecclesiastical ' + str(i) for i in xrange(1, 6)]\n", "\n", "metadata += ['Logic ' + str(i) for i in xrange(1, 6)]\n", "\n", "env_c = toy_corpus(plain_corpus, nltk_stop=False, metadata=metadata)\n", "c = toy_corpus(plain_corpus, nltk_stop=True, metadata=metadata)\n", "\n", "e = BeagleEnvironment(env_c, context_type='document')\n", "e.train()\n", "\n", "m = BeagleContextSeq(c, env_c, e.matrix, context_type='document')\n", "m.train()\n", "\n", "v = BeagleViewer(c, m)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "v.dist_word_word('logic', print_len=24)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
Words: logic
Word Distance Word Distance
logic 0.00000 connections 0.76702
mathematical 0.61011 alonzo 0.80445
theoretical 0.61208 mathematician 0.80459
science 0.62316 mathematics 0.83869
major 0.66616 church 1.10103
logician 0.67041 adapted 1.10351
computer 0.67150 algorithm 1.11369
contributions 0.67310 simulate 1.11534
american 0.67989 introduced 1.21524
made 0.68400 lambda 1.22073
foundations 0.71923 calculus 1.22433
close 0.75850 investigation 1.22615
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 2, "text": [ "LabeledColumn([('logic', 0.0), ('mathematical', 0.6101055862083946),\n", " ('theoretical', 0.6120753393846052),\n", " ('science', 0.6231596204347992), ('major', 0.6661561305085367),\n", " ('logician', 0.6704136760448762), ('computer', 0.6714984183058912),\n", " ('contributions', 0.6731025516642305),\n", " ('american', 0.6798934692261327), ('made', 0.6839999625506846),\n", " ('foundations', 0.719229991642062), ('close', 0.7585014156843009),\n", " ('connections', 0.767018583921025), ('alonzo', 0.8044512289909166),\n", " ('mathematician', 0.8045906177632307),\n", " ('mathematics', 0.8386881603326829), ('church', 1.1010315856023223),\n", " ('adapted', 1.1035113489503063), ('algorithm', 1.1136939203005587),\n", " ('simulate', 1.1153373791273127),\n", " ('introduced', 1.2152425500248623), ('lambda', 1.220733368628201),\n", " ('calculus', 1.2243342122688632),\n", " ('investigation', 1.2261465306164703),\n", " ('machine', 1.269417682514073), ('turing', 1.3211064581969558),\n", " ('computable', 1.429471397103359),\n", " ('algorithmically', 1.438249529963765),\n", " ('thesis', 1.4407465300952085), ('function', 1.4428796239962542),\n", " ('states', 1.451198974678721), ('regarded', 1.48288688714894),\n", " ('augustine', 1.4839206355080299), ('pope', 1.4901360299448079),\n", " ('held', 1.4955110549432036), ('successor', 1.4994001138838537),\n", " ('teacher', 1.5002618508131433), ('model', 1.508414402427858),\n", " ('peter', 1.5097737324875613), ('catholic', 1.5182696911494529),\n", " ('saint', 1.5200635742191289), ('recognized', 1.5298638726650464),\n", " ('doctor', 1.5299486476988773), ('viii', 1.5313652271508718),\n", " ('source', 1.5358331193775343), ('boniface', 1.537475215267472),\n", " ('revealed', 1.5390566920932474), ('bible', 1.539912865371827),\n", " ('challenged', 1.5416800657102432),\n", " ('divinely', 1.5443999441008813), ('teaching', 1.545039019845761),\n", " ('knowledge', 1.549882372801595), ('theology', 1.5685637414507483),\n", " ('roman', 1.577005531262446), ('justify', 1.5849890563012603),\n", " ('alone', 1.5874528497618008), ('stated', 1.5911502226284824),\n", " ('man', 1.596215557394277), ('faith', 1.5978268253734298)], \n", " dtype=[('word', '|S15'), ('value', '