# search, a bzr plugin for searching within bzr branches/repositories.
# Copyright (C) 2008 Robert Collins
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as published
# by the Free Software Foundation.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
# 

"""Tests for the index layer."""

from bzrlib import version_info as bzrlib_version
from bzrlib.errors import NotBranchError, UnknownFormatError
from bzrlib.btree_index import BTreeGraphIndex, BTreeBuilder
from bzrlib.index import InMemoryGraphIndex, GraphIndex
from bzrlib import log
from bzrlib.plugins import search
from bzrlib.plugins.search import errors, index
from bzrlib.tests import (
    condition_isinstance,
    multiply_tests,
    split_suite_by_condition,
    TestCaseWithTransport,
    )


def load_tests(basic_tests, module, test_loader):
    """Parameterise the class tests to test all formats."""
    component_tests, other_tests = split_suite_by_condition(basic_tests,
        condition_isinstance((
            TestComponentIndexBuilder,
            TestComponentCombiner)))
    graph_suggestion, other_tests = split_suite_by_condition(other_tests,
        condition_isinstance(TestGraphIndexSuggestions))
    scenarios = [(format_string[:-1], {'format':format}) for
        format_string, format in index._FORMATS.items()]
    multiply_tests(component_tests, scenarios, other_tests)
    scenarios = [
        ("GraphIndex", {'format': (InMemoryGraphIndex, index.SuggestableGraphIndex)}),
        ("BTree", {'format': (BTreeBuilder, index.SuggestableBTreeGraphIndex)})]
    multiply_tests(graph_suggestion, scenarios, other_tests)
    return other_tests


class TestIndex(TestCaseWithTransport):

    def test_init_index_default(self):
        branch = self.make_branch('foo')
        search_index = index.init_index(branch)
        # We should have some basic files on disk, and a valid index returned.
        self.assertIsInstance(search_index, index.Index)
        transport = self.get_transport('foo/.bzr/bzr-search')
        # We expect two files:
        # - format, containing 'bzr-search search folder 1\n'
        # - a names file, which is an empty GraphIndex
        self.assertEqual('bzr-search search folder 2\n',
            transport.get_bytes('format'))
        names_list = BTreeGraphIndex(transport, 'names', None)
        self.assertEqual([], list(names_list.iter_all_entries()))
        # And a number of empty directories
        self.assertTrue(transport.has('obsolete'))
        self.assertTrue(transport.has('upload'))
        self.assertTrue(transport.has('indices'))

    def test_init_index_1(self):
        branch = self.make_branch('foo')
        search_index = index.init_index(branch, 1)
        # We should have some basic files on disk, and a valid index returned.
        self.assertIsInstance(search_index, index.Index)
        transport = self.get_transport('foo/.bzr/bzr-search')
        # We expect two files:
        # - format, containing 'bzr-search search folder 1\n'
        # - a names file, which is an empty GraphIndex
        self.assertEqual('bzr-search search folder 1\n',
            transport.get_bytes('format'))
        names_list = GraphIndex(transport, 'names', None)
        self.assertEqual([], list(names_list.iter_all_entries()))
        # And a number of empty directories
        self.assertTrue(transport.has('obsolete'))
        self.assertTrue(transport.has('upload'))
        self.assertTrue(transport.has('indices'))

    def test_init_index_2(self):
        branch = self.make_branch('foo')
        search_index = index.init_index(branch, 2)
        # We should have some basic files on disk, and a valid index returned.
        self.assertIsInstance(search_index, index.Index)
        transport = self.get_transport('foo/.bzr/bzr-search')
        # We expect two files:
        # - format, containing 'bzr-search search folder 1\n'
        # - a names file, which is an empty GraphIndex
        self.assertEqual('bzr-search search folder 2\n',
            transport.get_bytes('format'))
        names_list = BTreeGraphIndex(transport, 'names', None)
        self.assertEqual([], list(names_list.iter_all_entries()))
        # And a number of empty directories
        self.assertTrue(transport.has('obsolete'))
        self.assertTrue(transport.has('upload'))
        self.assertTrue(transport.has('indices'))

    def test_init_index_unindexable(self):
        # any non-metadir will do here:
        branch = self.make_branch('foo', format='weave')
        self.assertRaises(errors.CannotIndex, index.init_index, branch)

    def test_open_no_index_error(self):
        err = self.assertRaises(errors.NoSearchIndex, index.open_index_url,
            self.get_url())
        self.assertEqual(self.get_url(), err.url)

    def test_open_index_wrong_format_errors(self):
        branch = self.make_branch('foo', format='pack-0.92')
        search_index = index.init_index(branch)
        transport = self.get_transport('foo/.bzr/bzr-search')
        transport.put_bytes('format', 'garbage\n')
        self.assertRaises(UnknownFormatError, index.Index, transport, branch)

    def test_open_index_missing_format_raises_NoSearchIndex(self):
        branch = self.make_branch('foo', format='pack-0.92')
        transport = self.get_transport('foo/.bzr/bzr-search')
        transport.mkdir('.')
        self.assertRaises(errors.NoSearchIndex, index.Index, transport, branch)

    def test_index_url_not_branch(self):
        self.assertRaises(NotBranchError, index.index_url,
            self.get_url())

    def test_index_url_returns_index(self):
        branch = self.make_branch('foo')
        search_index = index.index_url(self.get_url('foo'))
        self.assertIsInstance(search_index, index.Index)

    def test_index_url_does_index(self):
        tree = self.make_branch_and_tree('foo')
        revid = tree.commit('first post')
        rev_index = index.index_url(self.get_url('foo'))
        self.assertEqual(set([(revid,)]), set(rev_index.indexed_revisions()))

    def test_index_url_is_incremental(self):
        tree = self.make_branch_and_tree('foo')
        # two initial commits (as we want to avoid the first autopack)
        revid1 = tree.commit('1')
        revid2 = tree.commit('2')
        rev_index = index.index_url(self.get_url('foo'))
        self.assertEqual(set([(revid1,), (revid2,)]),
            set(rev_index.indexed_revisions()))
        base_names = rev_index._current_names.keys()
        self.assertEqual(1, len(base_names))
        revid3 = tree.commit('3')
        rev_index = index.index_url(self.get_url('foo'))
        self.assertEqual(set([(revid1,), (revid2,), (revid3,)]),
            set(rev_index.indexed_revisions()))
        new_names = rev_index._current_names.keys()
        self.assertSubset(base_names, new_names)
        self.assertEqual(2, len(new_names))
        # The new index should only have revid3 in it.
        new_name = list(set(new_names) - set(base_names))[0]
        new_component = rev_index._current_names[new_name][1]
        self.assertEqual([(revid3,)], [node[1] for node in
            new_component.revision_index.iter_all_entries()])

    def test_index_combining(self):
        # After inserting 1 revision, we get one pack,
        # After 2 we should still have 1, but also two discards
        # 3 should map to 2 packs, as should 4 (but with 2 discard)
        # To test: we create four revisions:
        tree = self.make_branch_and_tree('foo')
        tree.add(['README.txt'], ['an-id'], ['file'])
        tree.put_file_bytes_non_atomic('an-id', "file")
        revid1 = tree.commit('1')
        revid2 = tree.commit('2')
        revid3 = tree.commit('3')
        revid4 = tree.commit('4')
        rev_index = index.init_index(tree.branch)
        def get_names():
            return [name + '.pack' for name in rev_index._current_names]
        # index one revision
        rev_index.index_revisions(tree.branch, [revid1])
        self.assertEqual(1, len(rev_index._current_names))
        names = get_names()
        # index the second, should pack
        rev_index.index_revisions(tree.branch, [revid2])
        self.assertEqual(1, len(rev_index._current_names))
        obsolete_names = rev_index._obsolete_transport.list_dir('.')
        # There should be two - the old name and one more.
        self.assertSubset(names, obsolete_names)
        self.assertEqual(2, len(obsolete_names))
        names = get_names()
        # index the third, should not pack, and not clean obsoletes, and leave
        # the existing pack in place.
        rev_index.index_revisions(tree.branch, [revid3])
        self.assertEqual(2, len(rev_index._current_names))
        self.assertEqual(obsolete_names,
            rev_index._obsolete_transport.list_dir('.'))
        # new names should be the pack for revid3
        new_names = set(get_names()) - set(names)
        self.assertEqual(1, len(new_names))
        # index the fourth, which should pack the new name and the fourth one
        # stil leaving the previous one untouched, should clean obsoletes and
        # put what was new on three into it
        rev_index.index_revisions(tree.branch, [revid4])
        self.assertEqual(2, len(rev_index._current_names))
        obsolete_names = rev_index._obsolete_transport.list_dir('.')
        # the revid3 pack should have been obsoleted
        self.assertSubset(new_names, obsolete_names)
        self.assertEqual(2, len(obsolete_names))
        new_names = set(get_names()) - set(names)
        self.assertEqual(1, len(new_names))
        self.assertEqual({
            ("1",):set([('r', '', revid1)]),
            ("2",):set([('r', '', revid2)]),
            ("3",):set([('r', '', revid3)]),
            ("4",):set([('r', '', revid4)]),
            ('jrandom@example.com',): set([('r', '', revid1),
                ('r', '', revid2), ('r', '', revid3), ('r', '', revid4)]),
            ('an-id', revid1):set([('p', '', 'README.txt')]),
            ("file",):set([('f', 'an-id', revid1)]),
            }, dict(rev_index.all_terms()))
        self.assertEqual(set([(revid1,), (revid2,), (revid3,), (revid4,)]),
            set(rev_index.indexed_revisions()))


class TestIndexRevisions(TestCaseWithTransport):
    """Tests for indexing of a set of revisions."""

    def test_empty_one_revision(self):
        # Hugish smoke test - really want smaller units of testing...
        tree = self.make_branch_and_tree('')
        tree.add(['README.txt'], ['an-id'], ['file'])
        tree.put_file_bytes_non_atomic('an-id',
            "This is the first commit to this working tree.\n"
            )
        rev_index = index.init_index(tree.branch)
        # The double-space is a cheap smoke test for the tokeniser.
        bugs = "http://bugtrack.org/1234\nhttp://bugtrack.org/5678"
        revid = tree.commit('first  post', committer="Joe Soap <joe@acme.com>",
                            authors=["Foo Baa <foo@example.com>"],
                            revprops={'bugs':bugs})
        rev_index.index_revisions(tree.branch, [revid])
        self.assertEqual(set([(revid,)]), set(rev_index.indexed_revisions()))
        # reopen - it should retain the indexed revisions.
        rev_index = index.open_index_url('')
        self.assertEqual(set([(revid,)]), set(rev_index.indexed_revisions()))
        # The terms posting-lists for a simple commit should be:
        # The date (TODO, needs some thought on how to represent a date term)
        # The commiter name, email, commit message, bug fixes, properties
        # paths present
        # content of documents.
        expected_terms = {
            ('first',): set([('r', '', revid), ('f', 'an-id', revid)]),
            ('post',): set([('r', '', revid)]),
            ("This",): set([('f', 'an-id', revid)]),
            ("is",): set([('f', 'an-id', revid)]),
            ("the",): set([('f', 'an-id', revid)]),
            ("commit",): set([('f', 'an-id', revid)]),
            ("to",): set([('f', 'an-id', revid)]),
            ("this",): set([('f', 'an-id', revid)]),
            ("working",): set([('f', 'an-id', revid)]),
            ("tree",): set([('f', 'an-id', revid)]),
            ('an-id', revid): set([('p', '', 'README.txt')]),
            ('Baa',): set([('r', '', revid)]),
            ('Foo',): set([('r', '', revid)]),
            ('Joe',): set([('r', '', revid)]),
            ('Soap',): set([('r', '', revid)]),
            ('foo@example.com',): set([('r', '', revid)]),
            ('joe@acme.com',): set([('r', '', revid)]),
            ('http://bugtrack.org/1234',): set([('r', '', revid)]),
            ('http://bugtrack.org/5678',): set([('r', '', revid)]),
            }
        all_terms = {}
        for term, posting_list in rev_index.all_terms():
            all_terms[term] = set(posting_list)
        self.assertEqual(expected_terms, all_terms)

    def test_deleted_path_not_indexed_format_1(self):
        tree = self.make_branch_and_tree('')
        rev_index = index.init_index(tree.branch, 1)
        tree.add(['README.txt'], ['an-id'], ['file'])
        tree.put_file_bytes_non_atomic('an-id', "content.\n")
        revid = tree.commit('add')
        tree.remove(['README.txt'])
        revid2 = tree.commit('delete')
        rev_index.index_revisions(tree.branch, [revid, revid2])
        self.assertEqual(set([(revid,), (revid2,)]),
            set(rev_index.indexed_revisions()))
        rev_index = index.open_index_url('')
        all_terms = {}
        for term, posting_list in rev_index.all_terms():
            all_terms[term] = set(posting_list)
        # A deleted path is indexed at the point of deletion, and format one
        # does not support this, so must not have a posting list for it.
        self.assertFalse(('an-id', revid2) in all_terms)
        return
        # To test for presence, we would look for:
        self.assertSubset([('an-id', revid2)], all_terms)
        self.assertEqual(set([('p', '', 'README.txt')]),
            all_terms[('an-id', revid2)])

    def test_knit_snapshots_not_indexed(self):
        # knit snapshots are a contributing factor to getting too-many hits.
        # instead only new lines should really be considered.
        # Setup - knits do not expose where snapshots occur, so to test
        # this we create three versions of a file, which differ nearly entirely
        # between serial versions. This should trigger the heuristics on
        # aggregate size causing the third one to be a snapshot; it should not
        # be indexed with content matching the lines carried across from the
        # first or second commits.
        # Need a knit-compression using format to test:
        tree = self.make_branch_and_tree('', format="1.9")
        rev_index, revid3 = self.make_indexes_deltas_fixture(tree)
        tree.lock_read()
        self.assertEqual('fulltext',
            tree.branch.repository.texts._index.get_method(('an-id', revid3)))
        tree.unlock()
        self.assertIndexedDeltas(tree, rev_index, revid3)

    def test_2a_indexes_deltas(self):
        # With 2a formats we should be indexing deltas.
        # Setup - All 2a commits are full text, so its pretty simple, we just
        # reuse the setup for test_knit_snapshots_not_indexed but do not make
        # any assertions about the storage of the texts.
        tree = self.make_branch_and_tree('', format="2a")
        rev_index, revid3 = self.make_indexes_deltas_fixture(tree)
        self.assertIndexedDeltas(tree, rev_index, revid3)

    def make_indexes_deltas_fixture(self, tree):
        """Setup a tree with tree commits to be indexed."""
        tree.add(['README.txt'], ['an-id'], ['file'])
        tree.put_file_bytes_non_atomic('an-id',
            "small\ncontent\n")
        rev_index = index.init_index(tree.branch)
        tree.commit('')
        tree.put_file_bytes_non_atomic('an-id',
            "more\nmore\ncontent\nmore\nmore\nmore\n")
        tree.commit('')
        tree.put_file_bytes_non_atomic('an-id',
            "other\nother\ncontent\nother\nother\nother\n")
        revid3 = tree.commit('')
        return rev_index, revid3

    def assertIndexedDeltas(self, tree, rev_index, revid3):
        """Assert that tree's text get indexed using deltas not full texts."""
        rev_index.index_revisions(tree.branch, [revid3])
        self.assertEqual(set([(revid3,)]), set(rev_index.indexed_revisions()))
        rev_index = index.open_index_url('')
        expected_terms = {
            ('an-id', revid3): set([('p', '', 'README.txt')]),
            ('jrandom@example.com',): set([('r', '', revid3)]),
            ('other',): set([('f', 'an-id', revid3)]),
            }
        all_terms = {}
        for term, posting_list in rev_index.all_terms():
            all_terms[term] = set(posting_list)
        self.assertEqual(expected_terms, all_terms)


class TestSearching(TestCaseWithTransport):

    def test_search_no_hits(self):
        tree = self.make_branch_and_tree('')
        rev_index = index.init_index(tree.branch)
        # No exception because its a generator (and thus not guaranteed to run
        # to completion).
        self.assertEqual([], list(rev_index.search([('missing_term',)])))

    def test_search_trivial(self):
        tree = self.make_branch_and_tree('tree')
        rev_index = index.init_index(tree.branch)
        # The double-space is a cheap smoke test for the tokeniser.
        revid = tree.commit('first  post')
        rev_index.index_revisions(tree.branch, [revid])
        results = list(rev_index.search([('post',)]))
        self.assertEqual(1, len(results))
        self.assertIsInstance(results[0], index.RevisionHit)
        self.assertEqual((revid,), results[0].revision_key)

    def test_search_trivial_exclude(self):
        tree = self.make_branch_and_tree('tree')
        rev_index = index.init_index(tree.branch)
        # The double-space is a cheap smoke test for the tokeniser.
        revid1 = tree.commit('first post')
        revid2 = tree.commit('second post')
        rev_index.index_revisions(tree.branch, [revid1, revid2])
        results = list(rev_index.search([('post',), ('-first',)]))
        self.assertEqual(1, len(results))
        self.assertIsInstance(results[0], index.RevisionHit)
        self.assertEqual((revid2,), results[0].revision_key)

    def test_search_only_exclude(self):
        tree = self.make_branch_and_tree('tree')
        rev_index = index.init_index(tree.branch)
        # The double-space is a cheap smoke test for the tokeniser.
        revid1 = tree.commit('first post')
        revid2 = tree.commit('second post')
        rev_index.index_revisions(tree.branch, [revid1, revid2])
        self.assertRaises(TypeError, list, rev_index.search([('-first',)]))
        self.knownFailure('exclude-only searches not implemented')
        results = list(rev_index.search([('-first',)]))
        self.assertEqual(1, len(results))
        self.assertIsInstance(results[0], index.RevisionHit)
        self.assertEqual((revid2,), results[0].revision_key)

    def test_suggestions_trivial(self):
        tree = self.make_branch_and_tree('tree')
        rev_index = index.init_index(tree.branch)
        revid = tree.commit('first')
        rev_index.index_branch(tree.branch, revid)
        # f matches
        self.assertEqual([('first',)], list(rev_index.suggest([('f',)])))
        self.assertEqual([('first',)], list(rev_index.suggest([('fi',)])))
        self.assertEqual([('first',)], list(rev_index.suggest([('fir',)])))
        self.assertEqual([('first',)], list(rev_index.suggest([('fir',)])))
        self.assertEqual([('first',)], list(rev_index.suggest([('firs',)])))
        self.assertEqual([('first',)], list(rev_index.suggest([('first',)])))
        self.assertEqual([], list(rev_index.suggest([('firste',)])))

    def test_suggestions_two_terms(self):
        """With two terms only matching suggestions are made."""
        tree = self.make_branch_and_tree('tree')
        rev_index = index.init_index(tree.branch)
        revid = tree.commit('term suggestion')
        rev_index.index_branch(tree.branch, revid)
        # suggesting ('term',), ('suggest',) matches suggestion,
        # and suggestion ('missing',), ('suggest',) matches nothing.
        self.assertEqual([('suggestion',)],
            list(rev_index.suggest([('term',), ('suggest',)])))
        self.assertEqual([],
            list(rev_index.suggest([('missing',), ('suggest',)])))


class TestResults(TestCaseWithTransport):

    def test_TextHit(self):
        tree = self.make_branch_and_tree('tree')
        search_index = index.init_index(tree.branch)
        tree.add(['README.txt'], ['an-id'], ['file'])
        tree.put_file_bytes_non_atomic('an-id',
            "This is the \nfirst commit \nto this working tree.\n"
            )
        rev_id1 = tree.commit('commit')
        search_index.index_branch(tree.branch, rev_id1)
        query = [('commit',)]
        result = index.FileTextHit(search_index, tree.branch.repository,
            ('an-id', rev_id1), query)
        tree.lock_read()
        self.addCleanup(tree.unlock)
        self.assertEqualDiff(
            u"README.txt in revision '%s'." % (rev_id1),
            result.document_name())
        self.assertEqual(('an-id', rev_id1), result.text_key)
        self.assertEqual('first commit ', result.summary())

    def test_RevisionHit(self):
        tree = self.make_branch_and_tree('tree')
        rev_id1 = tree.commit('a multi\nline message')
        result = index.RevisionHit(tree.branch.repository, (rev_id1,))
        tree.lock_read()
        self.addCleanup(tree.unlock)
        self.assertEqualDiff(u"Revision id '%s'." % rev_id1,
            result.document_name())
        self.assertEqual((rev_id1,), result.revision_key)
        self.assertEqual('a multi', result.summary())


class TestComponentIndexBuilder(TestCaseWithTransport):

    def test_documents(self):
        builder = index.ComponentIndexBuilder(self.format)
        self.assertEqual("0", builder.add_document(('r', '', 'revid')))
        self.assertEqual("1", builder.add_document(('r', '', 'other-revid')))
        self.assertEqual("0", builder.add_document(('r', '', 'revid')))
        doc_index = builder.document_index
        nodes = sorted(list(doc_index.iter_all_entries()))
        self.assertEqual([(doc_index, ("0",), "r  revid"),
            (doc_index, ("1",), "r  other-revid")], nodes)

    def test_posting_list(self):
        builder = index.ComponentIndexBuilder(self.format)
        # adding a term adds its documents
        builder.add_term(("term1",), [('r', '', 'revid'),
            ('r', '', 'other-revid')])
        doc_index = builder.document_index
        nodes = sorted(list(doc_index.iter_all_entries()))
        self.assertEqual([(doc_index, ("0",), "r  revid"),
            (doc_index, ("1",), "r  other-revid")], nodes)
        # and the term refers to document ids
        self.assertEqual(set(["0", "1"]), set(builder.posting_list(("term1",))))
        # adding a new term adds unique documents
        builder.add_term(("term2",), [('r', '', 'revid'),
            ('r', '', 'third-revid')])
        nodes = sorted(list(doc_index.iter_all_entries()))
        # and refers to the correct ids
        self.assertEqual([(doc_index, ("0",), "r  revid"),
            (doc_index, ("1",), "r  other-revid"),
            (doc_index, ("2",), "r  third-revid")], nodes)
        self.assertEqual(set(["0", "1"]), set(builder.posting_list(("term1",))))
        self.assertEqual(set(["0", "2"]), set(builder.posting_list(("term2",))))
        # adding a term twice extends the posting list rather than replacing it
        # or erroring.
        builder.add_term(("term1",), [('r', '', 'revid'),
            ('r', '', 'fourth-revid')])
        nodes = sorted(list(doc_index.iter_all_entries()))
        # and refers to the correct ids
        self.assertEqual([(doc_index, ("0",), "r  revid"),
            (doc_index, ("1",), "r  other-revid"),
            (doc_index, ("2",), "r  third-revid"),
            (doc_index, ("3",), "r  fourth-revid"),
            ], nodes)
        self.assertEqual(set(["0", "1", "3"]),
            set(builder.posting_list(("term1",))))
        self.assertEqual(set(["0", "2"]), set(builder.posting_list(("term2",))))

    def test_2_term_posting_list(self):
        builder = index.ComponentIndexBuilder(self.format)
        # adding a term adds its documents
        builder.add_term(("term1", "term12"), [('r', '', 'revid'),
            ('r', '', 'other-revid')])
        doc_index = builder.document_index
        nodes = sorted(list(doc_index.iter_all_entries()))
        self.assertEqual([(doc_index, ("0",), "r  revid"),
            (doc_index, ("1",), "r  other-revid")], nodes)
        # and the term refers to document ids
        self.assertEqual(set(["0", "1"]),
            set(builder.posting_list(("term1", "term12"))))
        # adding a new term adds unique documents
        builder.add_term(("term2", "term12"), [('r', '', 'revid'),
            ('r', '', 'third-revid')])
        nodes = sorted(list(doc_index.iter_all_entries()))
        # and refers to the correct ids
        self.assertEqual([(doc_index, ("0",), "r  revid"),
            (doc_index, ("1",), "r  other-revid"),
            (doc_index, ("2",), "r  third-revid")], nodes)
        self.assertEqual(set(["0", "1"]),
            set(builder.posting_list(("term1", "term12"))))
        self.assertEqual(set(["0", "2"]),
            set(builder.posting_list(("term2", "term12"))))
        # adding a term twice extends the posting list rather than replacing it
        # or erroring.
        builder.add_term(("term1", "term12"), [('r', '', 'revid'),
            ('r', '', 'fourth-revid')])
        nodes = sorted(list(doc_index.iter_all_entries()))
        # and refers to the correct ids
        self.assertEqual([(doc_index, ("0",), "r  revid"),
            (doc_index, ("1",), "r  other-revid"),
            (doc_index, ("2",), "r  third-revid"),
            (doc_index, ("3",), "r  fourth-revid"),
            ], nodes)
        self.assertEqual(set(["0", "1", "3"]),
            set(builder.posting_list(("term1", "term12"))))
        self.assertEqual(set(["0", "2"]),
            set(builder.posting_list(("term2", "term12"))))
        # Single-element terms are not erroneously being used
        self.assertEqual(set(), set(builder.posting_list(("term1",))))
        self.assertEqual(set(), set(builder.posting_list(("term2",))))

    def test_add_revision(self):
        builder = index.ComponentIndexBuilder(self.format)
        # adding a revision lists the revision, does not alter document keys
        # etc.
        builder.add_revision('foo')
        nodes = sorted(list(builder.document_index.iter_all_entries()))
        self.assertEqual([], nodes)
        self.assertEqual({}, builder.terms)
        nodes = sorted(list(builder.revision_index.iter_all_entries()))
        self.assertEqual([(builder.revision_index, ("foo",), "")], nodes)


class TestComponentCombiner(TestCaseWithTransport):

    def test_combine_two_components_overlapping_data(self):
        # create one component:
        transport = self.get_transport()
        components = []
        builder = index.ComponentIndexBuilder(self.format)
        builder.add_revision('rev1')
        builder.add_revision('rev-common')
        builder.add_term(("term1",), [('r', '', 'rev1'),
            ('r', '', 'rev-common')])
        builder.add_term(("term-common",), [('r', '', 'rev1'),
            ('r', '', 'rev-common')])
        builder.add_term(("term", "complex"), [('f', 'foo', 'rev1')])
        name, value, elements = builder.upload_index(transport)
        component1 = index.ComponentIndex(self.format, name, value, transport)
        components.append(component1)
        builder = index.ComponentIndexBuilder(self.format)
        builder.add_revision('rev-common')
        builder.add_revision('rev2')
        builder.add_term(("term-common",), [('r', '', 'rev2'),
            ('r', '', 'rev-common')])
        builder.add_term(("term2",), [('r', '', 'rev2'), ('r', '', 'other-revid')])
        name, value, elements = builder.upload_index(transport)
        component2 = index.ComponentIndex(self.format, name, value, transport)
        components.append(component2)
        combiner = index.ComponentCombiner(self.format, components, transport)
        name, value, elements = combiner.combine()
        combined = index.ComponentIndex(self.format, name, value, transport)
        terms = {}
        terms[('term-common',)] = set([('r', '', 'rev-common'),
            ('r', '', 'rev1'), ('r', '', 'rev2')])
        terms[('term1',)] = set([('r', '', 'rev-common'), ('r', '', 'rev1')])
        terms[('term2',)] = set([('r', '', 'other-revid'), ('r', '', 'rev2')])
        terms[('term', 'complex')] = set([('f', 'foo', 'rev1')])
        self.assertEqual(terms, combined.all_terms())
        self.assertEqual(set([('rev1',), ('rev2',), ('rev-common',)]),
            set(combined.indexed_revisions()))

    def test_combine_two_components_path_spaces(self):
        # create one component:
        transport = self.get_transport()
        components = []
        builder = index.ComponentIndexBuilder(self.format)
        builder.add_revision('revid')
        builder.add_term(("file", "revid"), [('p', '', 'file path')])
        name, value, elements = builder.upload_index(transport)
        component1 = index.ComponentIndex(self.format, name, value, transport)
        components.append(component1)
        builder = index.ComponentIndexBuilder(self.format)
        builder.add_revision('revid1')
        name, value, elements = builder.upload_index(transport)
        component2 = index.ComponentIndex(self.format, name, value, transport)
        components.append(component2)
        combiner = index.ComponentCombiner(self.format, components, transport)
        name, value, elements = combiner.combine()
        combined = index.ComponentIndex(self.format, name, value, transport)
        terms = {('file', 'revid'): set([('p', '', 'file path')])}
        self.assertEqual(terms, combined.all_terms())
        self.assertEqual(set([('revid',), ('revid1',)]),
            set(combined.indexed_revisions()))


class TestAutoIndex(TestCaseWithTransport):

    def test_no_index_no_error(self):
        tree = self.make_branch_and_tree("foo")
        search._install_hooks()
        tree.commit('foo')

    def test_index_is_populated(self):
        search._install_hooks()
        tree = self.make_branch_and_tree("foo")
        search_index = index.init_index(tree.branch)
        revid1 = tree.commit('foo')
        self.assertEqual(set([(revid1,)]),
            set(search_index.indexed_revisions()))


class TestGraphIndexSuggestions(TestCaseWithTransport):
    """Tests for the SuggestableGraphIndex subclass."""

    def test_key_length_1_no_hits(self):
        builder = self.format[0](0, 1)
        # We want nodes before and after the suggestions, to check boundaries.
        builder.add_node(('pre',), '', ())
        builder.add_node(('prep',), '', ())
        transport = self.get_transport()
        length = transport.put_file('index', builder.finish())
        query_index = self.format[1](transport, 'index', length)
        # Now, searching for suggestions for 'pref' should find nothing.
        self.assertEqual([],
            list(query_index.iter_entries_starts_with(('pref',))))

    def test_key_length_1_iteration(self):
        builder = self.format[0](0, 1)
        # We want nodes before and after the suggestions, to check boundaries.
        builder.add_node(('pre',), '', ())
        builder.add_node(('prep',), '', ())
        # We want some entries to find.
        builder.add_node(('pref',), '', ())
        builder.add_node(('preferential',), '', ())
        transport = self.get_transport()
        length = transport.put_file('index', builder.finish())
        query_index = self.format[1](transport, 'index', length)
        # Now, searching for suggestions for 'pref' should find 'pref' and
        # 'preferential'.
        self.assertEqual([
            (query_index, ('pref',), ''),
            (query_index, ('preferential',), ''),
            ],
            list(query_index.iter_entries_starts_with(('pref',))))

    def test_key_length_2_no_hits(self):
        builder = self.format[0](0, 2)
        # We want nodes before and after the suggestions, to check boundaries.
        # As there are two elements in each key, we want to check this for each
        # element, which implies 4 boundaries:
        builder.add_node(('pre', 'pref'), '', ())
        builder.add_node(('pref', 'pre'), '', ())
        builder.add_node(('pref', 'prep'), '', ())
        builder.add_node(('prep', 'pref'), '', ())
        transport = self.get_transport()
        length = transport.put_file('index', builder.finish())
        query_index = self.format[1](transport, 'index', length)
        # Now, searching for suggestions for 'pref', 'pref' should find
        # nothing.
        self.assertEqual([],
            list(query_index.iter_entries_starts_with(('pref', 'pref'))))

    def test_key_length_2_iteration(self):
        builder = self.format[0](0, 2)
        # We want nodes before and after the suggestions, to check boundaries.
        # - the first element of the key must be an exact match, the second is
        # a startswith match, so provide non-match entries that match the second
        # in case of bugs there.
        builder.add_node(('pre', 'pref'), '', ())
        builder.add_node(('pref', 'pre'), '', ())
        builder.add_node(('pref', 'prep'), '', ())
        builder.add_node(('prep', 'pref'), '', ())
        # We want some entries to find.
        builder.add_node(('pref', 'pref'), '', ())
        builder.add_node(('pref', 'preferential'), '', ())
        transport = self.get_transport()
        length = transport.put_file('index', builder.finish())
        query_index = self.format[1](transport, 'index', length)
        # Now, searching for suggestions for 'pref' should find 'pref' and
        # 'preferential'.
        self.assertEqual([
            (query_index, ('pref', 'pref'), ''),
            (query_index, ('pref', 'preferential'), ''),
            ],
            sorted(query_index.iter_entries_starts_with(('pref', 'pref'))))


class TestLogFilter(TestCaseWithTransport):

    def test_registered(self):
        self.assertTrue(index.make_disable_search_filter in log.log_adapters)
        self.assertTrue(index.make_log_search_filter in log.log_adapters)
        self.assertFalse(index._original_make_search_filter in log.log_adapters)

    def test_get_filter_no_index(self):
        tree = self.make_branch_and_tree('foo')
        base_iterator = 'base'
        # bzr-search won't kick in
        self.assertEqual(base_iterator, index.make_log_search_filter(
            tree.branch, False, {'': "\\bword\\b"}, base_iterator))
        # so the disabling wrapper must.
        self.assertNotEqual(base_iterator, index.make_disable_search_filter(
            tree.branch, False, {'': "\\bword\\b"}, base_iterator))

    def test_get_filter_too_complex(self):
        """A too complex regex becomes a baseline search."""
        # We test this by searching for something that a index search would
        # miss but a regex search finds
        tree = self.make_branch_and_tree('foo')
        revid = tree.commit('first post')
        index.index_url(self.get_url('foo'))
        rev = tree.branch.repository.get_revision(revid)
        input_iterator = [[((revid, 1, 0), rev, None)]]
        if bzrlib_version >= (2, 5):
            match = {'': "st po"}
        else:
            match = "st po"
        rev_log_iterator = index.make_disable_search_filter(
            tree.branch, False, match, input_iterator)
        self.assertNotEqual(input_iterator, rev_log_iterator)
        # everything matches
        self.assertEqual(input_iterator, list(rev_log_iterator))
        # bzr-search won't kick in
        self.assertEqual(input_iterator, index.make_log_search_filter(
            tree.branch, False, match, input_iterator))

    def test_get_filter_searchable_regex(self):
        """A parsable regex becomes a index search."""
        # We test this by searching for something that a index search would
        # miss hit, and crippling the baseline search reference.
        self.saved_orig = index._original_make_search_filter
        def restore():
            index._original_make_search_filter = self.saved_orig
        self.addCleanup(restore)
        index._original_make_search_filter = None
        tree = self.make_branch_and_tree('foo')
        revid = tree.commit('first post')
        revid2 = tree.commit('second post')
        index.index_url(self.get_url('foo'))
        input_iterator = [
            [((revid2, 2, 0), None, None), ((revid, 1, 0), None, None)]]
        # the disabled filter must not kick in
        self.assertEqual(input_iterator, index.make_disable_search_filter(
            tree.branch, False, {'': "\\bfirst\\b"}, input_iterator))
        # we must get a functional search from the log search filter.
        rev_log_iterator = index.make_log_search_filter(
            tree.branch, False, {'': "\\bfirst\\b"}, input_iterator)
        self.assertNotEqual(input_iterator, rev_log_iterator)
        # rev id 2 should be filtered out.
        expected_result = [[((revid, 1, 0), None, None)]]
        self.assertEqual(expected_result, list(rev_log_iterator))

    def test_query_from_regex(self):
        self.assertEqual(None, index.query_from_regex("foo"))
        self.assertEqual(None, index.query_from_regex("fo o"))
        self.assertEqual(None, index.query_from_regex("\\bfoo \\b"))
        self.assertEqual([("foo",)], index.query_from_regex("\\bfoo\\b"))
