bo-graduation/venv/lib/python3.7/site-packages/nltk/test/dependency.doctest

.. Copyright (C) 2001-2019 NLTK Project
.. For license information, see LICENSE.TXT

===================
Dependency Grammars
===================

    >>> from nltk.grammar import DependencyGrammar
    >>> from nltk.parse import (
    ...     DependencyGraph,
    ...     ProjectiveDependencyParser,
    ...     NonprojectiveDependencyParser,
    ... )

CoNLL Data
----------

    >>> treebank_data = """Pierre  NNP     2       NMOD
    ... Vinken  NNP     8       SUB
    ... ,       ,       2       P
    ... 61      CD      5       NMOD
    ... years   NNS     6       AMOD
    ... old     JJ      2       NMOD
    ... ,       ,       2       P
    ... will    MD      0       ROOT
    ... join    VB      8       VC
    ... the     DT      11      NMOD
    ... board   NN      9       OBJ
    ... as      IN      9       VMOD
    ... a       DT      15      NMOD
    ... nonexecutive    JJ      15      NMOD
    ... director        NN      12      PMOD
    ... Nov.    NNP     9       VMOD
    ... 29      CD      16      NMOD
    ... .       .       9       VMOD
    ... """

    >>> dg = DependencyGraph(treebank_data)
    >>> dg.tree().pprint()
    (will
      (Vinken Pierre , (old (years 61)) ,)
      (join (board the) (as (director a nonexecutive)) (Nov. 29) .))
    >>> for head, rel, dep in dg.triples():
    ...     print(
    ...         '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
    ...         .format(h=head, r=rel, d=dep)
    ...     )
    (will, MD), SUB, (Vinken, NNP)
    (Vinken, NNP), NMOD, (Pierre, NNP)
    (Vinken, NNP), P, (,, ,)
    (Vinken, NNP), NMOD, (old, JJ)
    (old, JJ), AMOD, (years, NNS)
    (years, NNS), NMOD, (61, CD)
    (Vinken, NNP), P, (,, ,)
    (will, MD), VC, (join, VB)
    (join, VB), OBJ, (board, NN)
    (board, NN), NMOD, (the, DT)
    (join, VB), VMOD, (as, IN)
    (as, IN), PMOD, (director, NN)
    (director, NN), NMOD, (a, DT)
    (director, NN), NMOD, (nonexecutive, JJ)
    (join, VB), VMOD, (Nov., NNP)
    (Nov., NNP), NMOD, (29, CD)
    (join, VB), VMOD, (., .)

Using a custom cell extractor.

    >>> def custom_extractor(cells):
    ...     _, tag, head, rel = cells
    ...     return 'spam', 'spam', tag, tag, '', head, rel
    >>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
    >>> dg.tree().pprint()
    (spam
      (spam spam spam (spam (spam spam)) spam)
      (spam (spam spam) (spam (spam spam spam)) (spam spam) spam))

Custom cell extractors can take in and return an index.

    >>> def custom_extractor(cells, index):
    ...     word, tag, head, rel = cells
    ...     return (index, '{}-{}'.format(word, index), word,
    ...             tag, tag, '', head, rel)
    >>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
    >>> dg.tree().pprint()
    (will-8
      (Vinken-2 Pierre-1 ,-3 (old-6 (years-5 61-4)) ,-7)
      (join-9
        (board-11 the-10)
        (as-12 (director-15 a-13 nonexecutive-14))
        (Nov.-16 29-17)
        .-18))

Using the dependency-parsed version of the Penn Treebank corpus sample.

    >>> from nltk.corpus import dependency_treebank
    >>> t = dependency_treebank.parsed_sents()[0]
    >>> print(t.to_conll(3))  # doctest: +NORMALIZE_WHITESPACE
    Pierre      NNP     2
    Vinken      NNP     8
    ,   ,       2
    61  CD      5
    years       NNS     6
    old JJ      2
    ,   ,       2
    will        MD      0
    join        VB      8
    the DT      11
    board       NN      9
    as  IN      9
    a   DT      15
    nonexecutive        JJ      15
    director    NN      12
    Nov.        NNP     9
    29  CD      16
    .   .       8

Using the output of zpar (like Malt-TAB but with zero-based indexing)

    >>> zpar_data = """
    ... Pierre	NNP	1	NMOD
    ... Vinken	NNP	7	SUB
    ... ,	,	1	P
    ... 61	CD	4	NMOD
    ... years	NNS	5	AMOD
    ... old	JJ	1	NMOD
    ... ,	,	1	P
    ... will	MD	-1	ROOT
    ... join	VB	7	VC
    ... the	DT	10	NMOD
    ... board	NN	8	OBJ
    ... as	IN	8	VMOD
    ... a	DT	14	NMOD
    ... nonexecutive	JJ	14	NMOD
    ... director	NN	11	PMOD
    ... Nov.	NNP	8	VMOD
    ... 29	CD	15	NMOD
    ... .	.	7	P
    ... """

    >>> zdg = DependencyGraph(zpar_data, zero_based=True)
    >>> print(zdg.tree())
    (will
      (Vinken Pierre , (old (years 61)) ,)
      (join (board the) (as (director a nonexecutive)) (Nov. 29))
      .)


Projective Dependency Parsing
-----------------------------

    >>> grammar = DependencyGrammar.fromstring("""
    ... 'fell' -> 'price' | 'stock'
    ... 'price' -> 'of' 'the'
    ... 'of' -> 'stock'
    ... 'stock' -> 'the'
    ... """)
    >>> print(grammar)
    Dependency grammar with 5 productions
      'fell' -> 'price'
      'fell' -> 'stock'
      'price' -> 'of' 'the'
      'of' -> 'stock'
      'stock' -> 'the'

    >>> dp = ProjectiveDependencyParser(grammar)
    >>> for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
    ...     print(t)
    (fell (price the (of (stock the))))
    (fell (price the of) (stock the))
    (fell (price the of the) stock)

Non-Projective Dependency Parsing
---------------------------------

    >>> grammar = DependencyGrammar.fromstring("""
    ... 'taught' -> 'play' | 'man'
    ... 'man' -> 'the'
    ... 'play' -> 'golf' | 'dog' | 'to'
    ... 'dog' -> 'his'
    ... """)
    >>> print(grammar)
    Dependency grammar with 7 productions
      'taught' -> 'play'
      'taught' -> 'man'
      'man' -> 'the'
      'play' -> 'golf'
      'play' -> 'dog'
      'play' -> 'to'
      'dog' -> 'his'

    >>> dp = NonprojectiveDependencyParser(grammar)
    >>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])

    >>> print(g.root['word'])
    taught

    >>> for _, node in sorted(g.nodes.items()):
    ...     if node['word'] is not None:
    ...         print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
    1 the: []
    2 man: [1]
    3 taught: [2, 7]
    4 his: []
    5 dog: [4]
    6 to: []
    7 play: [5, 6, 8]
    8 golf: []

    >>> print(g.tree())
    (taught (man the) (play (dog his) to golf))

Integration with MALT parser
============================

In case the top relation is different from the default, we can set it. In case
of MALT parser, it's set to `'null'`.

>>> dg_str = """1       I       _       NN      NN      _       2       nn      _       _
... 2   shot    _       NN      NN      _       0       null    _       _
... 3   an      _       AT      AT      _       2       dep     _       _
... 4   elephant        _       NN      NN      _       7       nn      _       _
... 5   in      _       NN      NN      _       7       nn      _       _
... 6   my      _       NN      NN      _       7       nn      _       _
... 7   pajamas _       NNS     NNS     _       3       dobj    _       _
... """
>>> dg = DependencyGraph(dg_str, top_relation_label='null')

>>> len(dg.nodes)
8

>>> dg.root['word'], dg.root['address']
('shot', 2)

>>> print(dg.to_conll(10))  # doctest: +NORMALIZE_WHITESPACE
1   I       _       NN      NN      _       2       nn      _       _
2   shot    _       NN      NN      _       0       null    _       _
3   an      _       AT      AT      _       2       dep     _       _
4   elephant        _       NN      NN      _       7       nn      _       _
5   in      _       NN      NN      _       7       nn      _       _
6   my      _       NN      NN      _       7       nn      _       _
7   pajamas _       NNS     NNS     _       3       dobj    _       _
readme check 5 years ago			`.. Copyright (C) 2001-2019 NLTK Project`
			`.. For license information, see LICENSE.TXT`

			`===================`
			`Dependency Grammars`
			`===================`

			`>>> from nltk.grammar import DependencyGrammar`
			`>>> from nltk.parse import (`
			`... DependencyGraph,`
			`... ProjectiveDependencyParser,`
			`... NonprojectiveDependencyParser,`
			`... )`

			`CoNLL Data`
			`----------`

			`>>> treebank_data = """Pierre NNP 2 NMOD`
			`... Vinken NNP 8 SUB`
			`... , , 2 P`
			`... 61 CD 5 NMOD`
			`... years NNS 6 AMOD`
			`... old JJ 2 NMOD`
			`... , , 2 P`
			`... will MD 0 ROOT`
			`... join VB 8 VC`
			`... the DT 11 NMOD`
			`... board NN 9 OBJ`
			`... as IN 9 VMOD`
			`... a DT 15 NMOD`
			`... nonexecutive JJ 15 NMOD`
			`... director NN 12 PMOD`
			`... Nov. NNP 9 VMOD`
			`... 29 CD 16 NMOD`
			`... . . 9 VMOD`
			`... """`

			`>>> dg = DependencyGraph(treebank_data)`
			`>>> dg.tree().pprint()`
			`(will`
			`(Vinken Pierre , (old (years 61)) ,)`
			`(join (board the) (as (director a nonexecutive)) (Nov. 29) .))`
			`>>> for head, rel, dep in dg.triples():`
			`... print(`
			`... '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'`
			`... .format(h=head, r=rel, d=dep)`
			`... )`
			`(will, MD), SUB, (Vinken, NNP)`
			`(Vinken, NNP), NMOD, (Pierre, NNP)`
			`(Vinken, NNP), P, (,, ,)`
			`(Vinken, NNP), NMOD, (old, JJ)`
			`(old, JJ), AMOD, (years, NNS)`
			`(years, NNS), NMOD, (61, CD)`
			`(Vinken, NNP), P, (,, ,)`
			`(will, MD), VC, (join, VB)`
			`(join, VB), OBJ, (board, NN)`
			`(board, NN), NMOD, (the, DT)`
			`(join, VB), VMOD, (as, IN)`
			`(as, IN), PMOD, (director, NN)`
			`(director, NN), NMOD, (a, DT)`
			`(director, NN), NMOD, (nonexecutive, JJ)`
			`(join, VB), VMOD, (Nov., NNP)`
			`(Nov., NNP), NMOD, (29, CD)`
			`(join, VB), VMOD, (., .)`

			`Using a custom cell extractor.`

			`>>> def custom_extractor(cells):`
			`... _, tag, head, rel = cells`
			`... return 'spam', 'spam', tag, tag, '', head, rel`
			`>>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)`
			`>>> dg.tree().pprint()`
			`(spam`
			`(spam spam spam (spam (spam spam)) spam)`
			`(spam (spam spam) (spam (spam spam spam)) (spam spam) spam))`

			`Custom cell extractors can take in and return an index.`

			`>>> def custom_extractor(cells, index):`
			`... word, tag, head, rel = cells`
			`... return (index, '{}-{}'.format(word, index), word,`
			`... tag, tag, '', head, rel)`
			`>>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)`
			`>>> dg.tree().pprint()`
			`(will-8`
			`(Vinken-2 Pierre-1 ,-3 (old-6 (years-5 61-4)) ,-7)`
			`(join-9`
			`(board-11 the-10)`
			`(as-12 (director-15 a-13 nonexecutive-14))`
			`(Nov.-16 29-17)`
			`.-18))`

			`Using the dependency-parsed version of the Penn Treebank corpus sample.`

			`>>> from nltk.corpus import dependency_treebank`
			`>>> t = dependency_treebank.parsed_sents()[0]`
			`>>> print(t.to_conll(3)) # doctest: +NORMALIZE_WHITESPACE`
			`Pierre NNP 2`
			`Vinken NNP 8`
			`, , 2`
			`61 CD 5`
			`years NNS 6`
			`old JJ 2`
			`, , 2`
			`will MD 0`
			`join VB 8`
			`the DT 11`
			`board NN 9`
			`as IN 9`
			`a DT 15`
			`nonexecutive JJ 15`
			`director NN 12`
			`Nov. NNP 9`
			`29 CD 16`
			`. . 8`

			`Using the output of zpar (like Malt-TAB but with zero-based indexing)`

			`>>> zpar_data = """`
			`... Pierre NNP 1 NMOD`
			`... Vinken NNP 7 SUB`
			`... , , 1 P`
			`... 61 CD 4 NMOD`
			`... years NNS 5 AMOD`
			`... old JJ 1 NMOD`
			`... , , 1 P`
			`... will MD -1 ROOT`
			`... join VB 7 VC`
			`... the DT 10 NMOD`
			`... board NN 8 OBJ`
			`... as IN 8 VMOD`
			`... a DT 14 NMOD`
			`... nonexecutive JJ 14 NMOD`
			`... director NN 11 PMOD`
			`... Nov. NNP 8 VMOD`
			`... 29 CD 15 NMOD`
			`... . . 7 P`
			`... """`

			`>>> zdg = DependencyGraph(zpar_data, zero_based=True)`
			`>>> print(zdg.tree())`
			`(will`
			`(Vinken Pierre , (old (years 61)) ,)`
			`(join (board the) (as (director a nonexecutive)) (Nov. 29))`
			`.)`


			`Projective Dependency Parsing`
			`-----------------------------`

			`>>> grammar = DependencyGrammar.fromstring("""`
			`... 'fell' -> 'price' \| 'stock'`
			`... 'price' -> 'of' 'the'`
			`... 'of' -> 'stock'`
			`... 'stock' -> 'the'`
			`... """)`
			`>>> print(grammar)`
			`Dependency grammar with 5 productions`
			`'fell' -> 'price'`
			`'fell' -> 'stock'`
			`'price' -> 'of' 'the'`
			`'of' -> 'stock'`
			`'stock' -> 'the'`

			`>>> dp = ProjectiveDependencyParser(grammar)`
			`>>> for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):`
			`... print(t)`
			`(fell (price the (of (stock the))))`
			`(fell (price the of) (stock the))`
			`(fell (price the of the) stock)`

			`Non-Projective Dependency Parsing`
			`---------------------------------`

			`>>> grammar = DependencyGrammar.fromstring("""`
			`... 'taught' -> 'play' \| 'man'`
			`... 'man' -> 'the'`
			`... 'play' -> 'golf' \| 'dog' \| 'to'`
			`... 'dog' -> 'his'`
			`... """)`
			`>>> print(grammar)`
			`Dependency grammar with 7 productions`
			`'taught' -> 'play'`
			`'taught' -> 'man'`
			`'man' -> 'the'`
			`'play' -> 'golf'`
			`'play' -> 'dog'`
			`'play' -> 'to'`
			`'dog' -> 'his'`

			`>>> dp = NonprojectiveDependencyParser(grammar)`
			`>>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])`

			`>>> print(g.root['word'])`
			`taught`

			`>>> for _, node in sorted(g.nodes.items()):`
			`... if node['word'] is not None:`
			`... print('{address} {word}: {d}'.format(d=node['deps'][''], **node))`
			`1 the: []`
			`2 man: [1]`
			`3 taught: [2, 7]`
			`4 his: []`
			`5 dog: [4]`
			`6 to: []`
			`7 play: [5, 6, 8]`
			`8 golf: []`

			`>>> print(g.tree())`
			`(taught (man the) (play (dog his) to golf))`

			`Integration with MALT parser`
			`============================`

			`In case the top relation is different from the default, we can set it. In case`
			of MALT parser, it's set to `'null'`.

			`>>> dg_str = """1 I _ NN NN _ 2 nn _ _`
			`... 2 shot _ NN NN _ 0 null _ _`
			`... 3 an _ AT AT _ 2 dep _ _`
			`... 4 elephant _ NN NN _ 7 nn _ _`
			`... 5 in _ NN NN _ 7 nn _ _`
			`... 6 my _ NN NN _ 7 nn _ _`
			`... 7 pajamas _ NNS NNS _ 3 dobj _ _`
			`... """`
			`>>> dg = DependencyGraph(dg_str, top_relation_label='null')`

			`>>> len(dg.nodes)`
			`8`

			`>>> dg.root['word'], dg.root['address']`
			`('shot', 2)`

			`>>> print(dg.to_conll(10)) # doctest: +NORMALIZE_WHITESPACE`
			`1 I _ NN NN _ 2 nn _ _`
			`2 shot _ NN NN _ 0 null _ _`
			`3 an _ AT AT _ 2 dep _ _`
			`4 elephant _ NN NN _ 7 nn _ _`
			`5 in _ NN NN _ 7 nn _ _`
			`6 my _ NN NN _ 7 nn _ _`
			`7 pajamas _ NNS NNS _ 3 dobj _ _`