You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
37 KiB
37 KiB
In [1]:
from pattern.search import STRICT, search from pattern.en import parsetree
https://github.com/clips/pattern/wiki/pattern-search ( inspired by videogrep search )
In [2]:
text = open("../txt/words-for-the-future/OTHERNESS.txt").read()
In [3]:
text[:100]
Out[3]:
'Otherness | Daniel L. Everett\n\nWhen I was 26, I moved to the Amazon, from California, in order to st'
In [3]:
tree = parsetree(text)
--------------------------------------------------------------------------- StopIteration Traceback (most recent call last) ~/.local/lib/python3.7/site-packages/pattern/text/__init__.py in _read(path, encoding, comment) 608 yield line --> 609 raise StopIteration 610 StopIteration: The above exception was the direct cause of the following exception: RuntimeError Traceback (most recent call last) <ipython-input-3-ac287bca8f52> in <module> ----> 1 tree = parsetree(text) ~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py in parsetree(s, *args, **kwargs) 173 """ Returns a parsed Text from the given string. 174 """ --> 175 return Text(parse(s, *args, **kwargs)) 176 177 ~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py in parse(s, *args, **kwargs) 167 """ Returns a tagged Unicode string. 168 """ --> 169 return parser.parse(s, *args, **kwargs) 170 171 ~/.local/lib/python3.7/site-packages/pattern/text/__init__.py in parse(self, s, tokenize, tags, chunks, relations, lemmata, encoding, **kwargs) 1170 # Tagger (required by chunker, labeler & lemmatizer). 1171 if tags or chunks or relations or lemmata: -> 1172 s[i] = self.find_tags(s[i], **kwargs) 1173 else: 1174 s[i] = [[w] for w in s[i]] ~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py in find_tags(self, tokens, **kwargs) 112 if kwargs.get("tagset") == UNIVERSAL: 113 kwargs.setdefault("map", lambda token, tag: penntreebank2universal(token, tag)) --> 114 return _Parser.find_tags(self, tokens, **kwargs) 115 116 ~/.local/lib/python3.7/site-packages/pattern/text/__init__.py in find_tags(self, tokens, **kwargs) 1111 # ["The", "cat", "purs"] => [["The", "DT"], ["cat", "NN"], ["purs", "VB"]] 1112 return find_tags(tokens, -> 1113 lexicon = kwargs.get("lexicon", self.lexicon or {}), 1114 model = kwargs.get("model", self.model), 1115 morphology = kwargs.get("morphology", self.morphology), ~/.local/lib/python3.7/site-packages/pattern/text/__init__.py in __len__(self) 374 375 def __len__(self): --> 376 return self._lazy("__len__") 377 378 def __iter__(self): ~/.local/lib/python3.7/site-packages/pattern/text/__init__.py in _lazy(self, method, *args) 366 """ 367 if dict.__len__(self) == 0: --> 368 self.load() 369 setattr(self, method, types.MethodType(getattr(dict, method), self)) 370 return getattr(dict, method)(self, *args) ~/.local/lib/python3.7/site-packages/pattern/text/__init__.py in load(self) 623 def load(self): 624 # Arnold NNP x --> 625 dict.update(self, (x.split(" ")[:2] for x in _read(self._path) if len(x.split(" ")) > 1)) 626 627 #--- FREQUENCY ------------------------------------------------------------------------------------- ~/.local/lib/python3.7/site-packages/pattern/text/__init__.py in <genexpr>(.0) 623 def load(self): 624 # Arnold NNP x --> 625 dict.update(self, (x.split(" ")[:2] for x in _read(self._path) if len(x.split(" ")) > 1)) 626 627 #--- FREQUENCY ------------------------------------------------------------------------------------- RuntimeError: generator raised StopIteration
In [5]:
tree
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-5-7a1e081e78d4> in <module> ----> 1 tree NameError: name 'tree' is not defined
In [24]:
tree[7]
Out[24]:
Sentence("This/DT/O/O encounter/RB/B-ADVP/O with/IN/B-PP/O these/DT/O/O ‘/''/O/O others/NNS/B-NP/O ,/,/O/O ’/''/O/O so/RB/B-ADVP/O unlike/IN/B-PP/B-PNP myself/PRP/B-NP/I-PNP ,/,/O/O was/VBD/B-VP/O to/TO/I-VP/O be/VB/I-VP/O the/DT/O/O defining/VBG/B-VP/O experience/NN/B-NP/O for/IN/B-PP/B-PNP the/DT/B-NP/I-PNP rest/NN/I-NP/I-PNP of/IN/B-PP/B-PNP my/PRP$/B-NP/I-PNP life/NN/I-NP/I-PNP ././O/O")
In [16]:
search("JJ", tree)
Out[16]:
[Match(words=[Word('unrelated/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('small/JJ')]), Match(words=[Word('missionary/JJ')]), Match(words=[Word('bumpy/JJ')]), Match(words=[Word('first/JJ')]), Match(words=[Word('weak/JJ')]), Match(words=[Word('taut/JJ')]), Match(words=[Word('unrelated/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('many/JJ')]), Match(words=[Word('little/JJ')]), Match(words=[Word('easy/JJ')]), Match(words=[Word('enough/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('uncomfortable/JJ')]), Match(words=[Word('suspicious/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('different/JJ')]), Match(words=[Word('simple/JJ')]), Match(words=[Word('binary/JJ')]), Match(words=[Word('old/JJ')]), Match(words=[Word('religious/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('different/JJ')]), Match(words=[Word('unintelligible/JJ')]), Match(words=[Word('different-looking/JJ')]), Match(words=[Word('otherness/JJ')]), Match(words=[Word('live/JJ')]), Match(words=[Word('unacceptable/JJ')]), Match(words=[Word('missionary/JJ')]), Match(words=[Word('eternal/JJ')]), Match(words=[Word('encounter/JJ')]), Match(words=[Word('uneasy/JJ')]), Match(words=[Word('dangerous/JJ')]), Match(words=[Word('insufficient/JJ')]), Match(words=[Word('ethno-centric/JJ')]), Match(words=[Word('own/JJ')]), Match(words=[Word('fortunate/JJ')]), Match(words=[Word('gentle/JJ')]), Match(words=[Word('many/JJ')]), Match(words=[Word('silly/JJ')]), Match(words=[Word('years-long/JJ')]), Match(words=[Word('first/JJ')]), Match(words=[Word('first/JJ')]), Match(words=[Word('young/JJ')]), Match(words=[Word('large/JJ')]), Match(words=[Word('small/JJ')]), Match(words=[Word('fresh/JJ')]), Match(words=[Word('young/JJ')]), Match(words=[Word('then-unintelligible/JJ')]), Match(words=[Word('easy/JJ')]), Match(words=[Word('polite/JJ')]), Match(words=[Word('Many/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('Western/JJ')]), Match(words=[Word('polite/JJ')]), Match(words=[Word('Western/JJ')]), Match(words=[Word('first/JJ')]), Match(words=[Word('close/JJ')]), Match(words=[Word('first/JJ')]), Match(words=[Word('small/JJ')]), Match(words=[Word('early/JJ')]), Match(words=[Word('subsequent/JJ')]), Match(words=[Word('individual/JJ')]), Match(words=[Word('normal/JJ')]), Match(words=[Word('comfortable/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('dissonant/JJ')]), Match(words=[Word('steady/JJ')]), Match(words=[Word('familiar/JJ')]), Match(words=[Word('Comfort/JJ')]), Match(words=[Word('acquired/JJ')]), Match(words=[Word('different/JJ')]), Match(words=[Word('different/JJ')]), Match(words=[Word('sexual/JJ')]), Match(words=[Word('different/JJ')]), Match(words=[Word('biological/JJ')]), Match(words=[Word('little/JJ')]), Match(words=[Word('possible/JJ')]), Match(words=[Word('worthwhile/JJ')]), Match(words=[Word('obvious/JJ')]), Match(words=[Word('first/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('behavioral/JJ')]), Match(words=[Word('earliest/JJ')]), Match(words=[Word('normal/JJ')]), Match(words=[Word('correct/JJ')]), Match(words=[Word('crucial/JJ')]), Match(words=[Word('in-group/JJ')]), Match(words=[Word('social/JJ')]), Match(words=[Word('else/JJ')]), Match(words=[Word('own/JJ')]), Match(words=[Word('own/JJ')]), Match(words=[Word('familiar/JJ')]), Match(words=[Word('fit/JJ')]), Match(words=[Word('several/JJ')]), Match(words=[Word('Pirahã/JJ')]), Match(words=[Word('full/JJ')]), Match(words=[Word('noticed/JJ')]), Match(words=[Word('old/JJ')]), Match(words=[Word('sharp/JJ')]), Match(words=[Word('30cm/JJ')]), Match(words=[Word('dangerous/JJ')]), Match(words=[Word('handed/JJ')]), Match(words=[Word('non-life-threatening/JJ')]), Match(words=[Word('necessary/JJ')]), Match(words=[Word('Dutch/JJ')]), Match(words=[Word('sharp/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('m)otherness/JJ')]), Match(words=[Word('sure/JJ')]), Match(words=[Word('able/JJ')]), Match(words=[Word('occasional/JJ')]), Match(words=[Word('interesting/JJ')]), Match(words=[Word('crooked/JJ')]), Match(words=[Word('straight/JJ')]), Match(words=[Word('bizarre/JJ')]), Match(words=[Word('American/JJ')]), Match(words=[Word('missionary/JJ')]), Match(words=[Word('excited/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('Pirahã/JJ')]), Match(words=[Word('native/JJ')]), Match(words=[Word('native/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('local/JJ')]), Match(words=[Word('comfortable/JJ')]), Match(words=[Word('similar/JJ')]), Match(words=[Word('many/JJ')]), Match(words=[Word('favorite/JJ')]), Match(words=[Word('American/JJ')]), Match(words=[Word('different/JJ')]), Match(words=[Word('irrelevant/JJ')]), Match(words=[Word('brilliant/JJ')]), Match(words=[Word('boring/JJ')]), Match(words=[Word('adjacent/JJ')]), Match(words=[Word('full/JJ')]), Match(words=[Word('brilliant/JJ')]), Match(words=[Word('good/JJ')]), Match(words=[Word('human/JJ')]), Match(words=[Word('independent/JJ')]), Match(words=[Word('natural/JJ')]), Match(words=[Word('solitary/JJ')]), Match(words=[Word('strange/JJ')]), Match(words=[Word('slow/JJ')]), Match(words=[Word('otherness/JJ')]), Match(words=[Word('original/JJ')]), Match(words=[Word('paradoxical/JJ')]), Match(words=[Word('otherness/JJ')]), Match(words=[Word('otherness/JJ')]), Match(words=[Word('panoramic/JJ')]), Match(words=[Word('own/JJ')]), Match(words=[Word('unique/JJ')]), Match(words=[Word('important/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('individual/JJ')]), Match(words=[Word('s/JJ')]), Match(words=[Word('small/JJ')]), Match(words=[Word('read/JJ')]), Match(words=[Word('possible/JJ')]), Match(words=[Word('own/JJ')]), Match(words=[Word('i]The/JJ')]), Match(words=[Word('poor/JJ')]), Match(words=[Word('good/JJ')]), Match(words=[Word('measurable/JJ')]), Match(words=[Word('daily/JJ')]), Match(words=[Word('social/JJ')]), Match(words=[Word('little/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('conceptual/JJ')]), Match(words=[Word('cultural/JJ')]), Match(words=[Word('social/JJ')]), Match(words=[Word('predictable/JJ')]), Match(words=[Word('predictable/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('thinking/JJ')]), Match(words=[Word('strong/JJ')]), Match(words=[Word('desirable/JJ')]), Match(words=[Word('unexpected/JJ')]), Match(words=[Word('constant/JJ')]), Match(words=[Word('useful/JJ')]), Match(words=[Word('biological/JJ')]), Match(words=[Word('cognitive/JJ')]), Match(words=[Word('cultural/JJ')]), Match(words=[Word('unsuccessful/JJ')]), Match(words=[Word('strange/JJ')]), Match(words=[Word('successful/JJ')]), Match(words=[Word('own/JJ')]), Match(words=[Word('little/JJ')]), Match(words=[Word('such/JJ')]), Match(words=[Word('political/JJ')]), Match(words=[Word('important/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('unable/JJ')]), Match(words=[Word('little/JJ')]), Match(words=[Word('little/JJ')]), Match(words=[Word('18th/JJ')]), Match(words=[Word('identical/JJ')]), Match(words=[Word('light/JJ')]), Match(words=[Word('multiple/JJ')]), Match(words=[Word('familiar/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('same/JJ')]), Match(words=[Word('same/JJ')]), Match(words=[Word('same/JJ')]), Match(words=[Word('same/JJ')]), Match(words=[Word('same/JJ')]), Match(words=[Word('same/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('anti-immigration/JJ')]), Match(words=[Word('political/JJ')]), Match(words=[Word('otherness/JJ')]), Match(words=[Word('otherness/JJ')]), Match(words=[Word('motivated/JJ')]), Match(words=[Word('ultimate/JJ')]), Match(words=[Word('otherness/JJ')]), Match(words=[Word('cognitive/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('new/JJ')]), Match(words=[Word('own/JJ')]), Match(words=[Word('only/JJ')]), Match(words=[Word('otherness/JJ')]), Match(words=[Word('otherness/JJ')]), Match(words=[Word('invented/JJ')]), Match(words=[Word('communal/JJ')]), Match(words=[Word('cultural/JJ')]), Match(words=[Word('cultural/JJ')]), Match(words=[Word('human/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('distinct/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('s/JJ')]), Match(words=[Word('other/JJ')]), Match(words=[Word('Amazonian/JJ')]), Match(words=[Word('doomed/JJ')]), Match(words=[Word('Greek/JJ')]), Match(words=[Word('repetitive/JJ')]), Match(words=[Word('daily/JJ')]), Match(words=[Word('huge/JJ')]), Match(words=[Word('only/JJ')]), Match(words=[Word('same/JJ')]), Match(words=[Word('next/JJ')])]
In [17]:
search('VB DT NN', tree)
Out[17]:
[Match(words=[Word('study/VB'), Word('the/DT'), Word('language/NN')]), Match(words=[Word('be/VB'), Word('a/DT'), Word('prostitute/NN')]), Match(words=[Word('seem/VB'), Word('that/DT'), Word('way/NN')]), Match(words=[Word('conduct/VB'), Word('a/DT'), Word('pilot/NN')]), Match(words=[Word('let/VB'), Word('the/DT'), Word('stick/NN')]), Match(words=[Word('remove/VB'), Word('the/DT'), Word('otherness/NN')]), Match(words=[Word('occupy/VB'), Word('a/DT'), Word('part/NN')])]
In [22]:
for m in search ("VB DT NN", tree): print (f"{m.string}")
study the language be a prostitute seem that way conduct a pilot let the stick remove the otherness occupy a part
In [20]:
m.string
Out[20]:
'occupy a part'
In [25]:
for m in search ("PRP$ *", tree): print (f"{m.string}")
My body my brain My task my life our species our child their differences my belief my encounter my own my silly beliefs my life my first day his hut its tongue our mother our mother our father our first experiences our values our mother and the select our subsequent lives Our earliest associations our narrow range our in-group my own writings.[1 our family or our village our own identity our identity our family our norm our experience our expectations its occupants their beliefs and children his face his mother her toddler her child his quasi-stabbing her child her child a sharp knife its contribution our lives their language their translations their comments my request their language our behavior their language their culture their language Our sense our enveloping our childhood development our conversations and the structures our interactions their phrases my favorite book his year His year its institutions our senses our sense his lessons our sense our own unique identity our oneness my life his own question his essay our behavior His example his case our lives our lives our expectations our environments our own our Homo ancestors our environment their language their culture and language our familiar environment our world Our preference our fear then itself Our languages and cognitive abilities their relationships our own our species ability our human our greatest fears our greatest treasure his efforts
In [9]:
from pattern.en import wordnet
In [19]:
sense = wordnet.synsets("language")[0]
In [20]:
sense.hypernym
Out[20]:
Synset('communication.n.02')
In [45]:
output = [] search_word="person" for search_word in search_word.split('|'): synset = wordnet.synsets(search_word)[0] pos = synset.pos possible_words = search(pos, tree) for match in possible_words: # print (f"match {match}") word = match[0].string synsets = wordnet.synsets(word) if len(synsets) > 0: hypernyms = synsets[0].hypernyms(recursive=True) if any(search_word == h.senses[0] for h in hypernyms): print(f"matching {word}") output.append(word)
matching neighbor matching friend matching child matching woman matching prostitute matching man matching man matching guest matching host matching mother matching mother matching father matching mother matching professor matching cowboy matching psychologist matching pilot matching toddler matching mother matching mother matching toddler matching woman matching baby matching child matching mother matching baby matching mother matching child matching mother matching child matching child matching child matching speaker matching speaker matching foreigner matching tourist matching friend matching man matching foreigner matching handyman matching stranger matching Homo matching Homo matching Homo
In [31]:
output
Out[31]:
['phrase']
In [ ]: