From 8059db894d640946f580ef2af6c0933e2bfd7d9a Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 7 Sep 2017 08:33:20 -0700 Subject: [PATCH] Use the Ferret engine fulltext document table to drive auxiliary fulltext constraints Summary: Ref T12819. I started trying to get individual engines to drive these constraints (e.g., `ManiphestTaskQuery` can do most of the work) but this is a big pain, especially since most engines don't support "any owner" or "no owner", and not everything has an owner, and so on and so on. Going down this path would have meant a huge pile of stub functions everywhere, I think. Instead, drive these through the main engine using the fulltext document table, which already has everything we need to apply these constraints in a uniform way. Also tweak some parts of query construction and result ordering. Test Plan: Searched for documents by author, owner, unowned, any owner, tags, subscribers, fulltext in global search. Got sensible results without any application-specific code. Reviewers: chad Reviewed By: chad Maniphest Tasks: T12819 Differential Revision: https://secure.phabricator.com/D18550 --- .../DifferentialRevisionFerretEngine.php | 2 +- .../search/ManiphestTaskFerretEngine.php | 2 +- ...abricatorFerretFulltextEngineExtension.php | 32 ++++- .../ferret/PhabricatorFerretDocument.php | 12 ++ .../search/ferret/PhabricatorFerretEngine.php | 56 +-------- ...PhabricatorFerretFulltextStorageEngine.php | 30 +++-- ...PhabricatorCursorPagedPolicyAwareQuery.php | 114 ++++++++++++++++-- 7 files changed, 171 insertions(+), 77 deletions(-) diff --git a/src/applications/differential/search/DifferentialRevisionFerretEngine.php b/src/applications/differential/search/DifferentialRevisionFerretEngine.php index b51a486ef5..ebe59c2780 100644 --- a/src/applications/differential/search/DifferentialRevisionFerretEngine.php +++ b/src/applications/differential/search/DifferentialRevisionFerretEngine.php @@ -15,7 +15,7 @@ final class DifferentialRevisionFerretEngine return new DifferentialRevisionFerretField(); } - protected function newSearchEngine() { + public function newSearchEngine() { return new DifferentialRevisionSearchEngine(); } diff --git a/src/applications/maniphest/search/ManiphestTaskFerretEngine.php b/src/applications/maniphest/search/ManiphestTaskFerretEngine.php index 2eab7db74c..7de368402a 100644 --- a/src/applications/maniphest/search/ManiphestTaskFerretEngine.php +++ b/src/applications/maniphest/search/ManiphestTaskFerretEngine.php @@ -15,7 +15,7 @@ final class ManiphestTaskFerretEngine return new ManiphestTaskFerretField(); } - protected function newSearchEngine() { + public function newSearchEngine() { return new ManiphestTaskSearchEngine(); } diff --git a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php index 8d1b6070e6..27694e3a01 100644 --- a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php +++ b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php @@ -23,11 +23,37 @@ final class PhabricatorFerretFulltextEngineExtension $phid = $document->getPHID(); $engine = $object->newFerretEngine(); + $is_closed = 0; + $author_phid = null; + $owner_phid = null; + foreach ($document->getRelationshipData() as $relationship) { + list($related_type, $related_phid) = $relationship; + switch ($related_type) { + case PhabricatorSearchRelationship::RELATIONSHIP_OPEN: + $is_closed = 0; + break; + case PhabricatorSearchRelationship::RELATIONSHIP_CLOSED: + $is_closed = 1; + break; + case PhabricatorSearchRelationship::RELATIONSHIP_OWNER: + $owner_phid = $related_phid; + break; + case PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED: + $owner_phid = null; + break; + case PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR: + $author_phid = $related_phid; + break; + } + } + $ferret_document = $engine->newDocumentObject() ->setObjectPHID($phid) - ->setIsClosed(0) - ->setEpochCreated(0) - ->setEpochModified(0); + ->setIsClosed($is_closed) + ->setEpochCreated($document->getDocumentCreated()) + ->setEpochModified($document->getDocumentModified()) + ->setAuthorPHID($author_phid) + ->setOwnerPHID($owner_phid); $stemmer = $engine->newStemmer(); diff --git a/src/applications/search/ferret/PhabricatorFerretDocument.php b/src/applications/search/ferret/PhabricatorFerretDocument.php index fa816c8d17..6f25eb93b6 100644 --- a/src/applications/search/ferret/PhabricatorFerretDocument.php +++ b/src/applications/search/ferret/PhabricatorFerretDocument.php @@ -27,6 +27,18 @@ abstract class PhabricatorFerretDocument 'columns' => array('objectPHID'), 'unique' => true, ), + 'key_author' => array( + 'columns' => array('authorPHID'), + ), + 'key_owner' => array( + 'columns' => array('ownerPHID'), + ), + 'key_created' => array( + 'columns' => array('epochCreated'), + ), + 'key_modified' => array( + 'columns' => array('epochModified'), + ), ), ) + parent::getConfiguration(); } diff --git a/src/applications/search/ferret/PhabricatorFerretEngine.php b/src/applications/search/ferret/PhabricatorFerretEngine.php index 7e30189fb8..7b8520ac71 100644 --- a/src/applications/search/ferret/PhabricatorFerretEngine.php +++ b/src/applications/search/ferret/PhabricatorFerretEngine.php @@ -5,7 +5,7 @@ abstract class PhabricatorFerretEngine extends Phobject { abstract public function newNgramsObject(); abstract public function newDocumentObject(); abstract public function newFieldObject(); - abstract protected function newSearchEngine(); + abstract public function newSearchEngine(); public function getDefaultFunctionKey() { return 'all'; @@ -70,60 +70,6 @@ abstract class PhabricatorFerretEngine extends Phobject { return new PhutilSearchStemmer(); } - public function newConfiguredFulltextQuery( - $object, - PhabricatorSavedQuery $query, - PhabricatorUser $viewer) { - - $local_query = new PhabricatorSavedQuery(); - $local_query->setParameter('query', $query->getParameter('query')); - - // TODO: Modularize this piece. - $project_phids = $query->getParameter('projectPHIDs'); - if ($project_phids) { - $local_query->setParameter('projectPHIDs', $project_phids); - } - - $subscriber_phids = $query->getParameter('subscriberPHIDs'); - if ($subscriber_phids) { - $local_query->setParameter('subscriberPHIDs', $subscriber_phids); - } - - $author_phids = $query->getParameter('authorPHIDs'); - if ($author_phids) { - $local_query->setParameter('authorPHIDs', $author_phids); - } - - $owner_phids = $query->getParameter('ownerPHIDs'); - if ($owner_phids) { - $local_query->setParameter('ownerPHIDs', $owner_phids); - } - - $rel_open = PhabricatorSearchRelationship::RELATIONSHIP_OPEN; - $rel_closed = PhabricatorSearchRelationship::RELATIONSHIP_CLOSED; - - $statuses = $query->getParameter('statuses'); - if ($statuses) { - $statuses = array_fuse($statuses); - if (count($statuses) == 1) { - if (isset($statuses[$rel_open])) { - $local_query->setParameter('statuses', array('open()')); - } - if (isset($statuses[$rel_closed])) { - $local_query->setParameter('statuses', array('closed()')); - } - } - } - - $search_engine = $this->newSearchEngine() - ->setViewer($viewer); - - $engine_query = $search_engine->buildQueryFromSavedQuery($local_query) - ->setViewer($viewer); - - return $engine_query; - } - public function tokenizeString($value) { $value = trim($value, ' '); $value = preg_split('/ +/', $value); diff --git a/src/applications/search/fulltextstorage/PhabricatorFerretFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorFerretFulltextStorageEngine.php index 0036ca928b..b67cd7d43c 100644 --- a/src/applications/search/fulltextstorage/PhabricatorFerretFulltextStorageEngine.php +++ b/src/applications/search/fulltextstorage/PhabricatorFerretFulltextStorageEngine.php @@ -47,6 +47,8 @@ final class PhabricatorFerretFulltextStorageEngine $offset = (int)$query->getParameter('offset', 0); $limit = (int)$query->getParameter('limit', 25); + // NOTE: For now, it's okay to query with the omnipotent viewer here + // because we're just returning PHIDs which we'll filter later. $viewer = PhabricatorUser::getOmnipotentUser(); $type_results = array(); @@ -54,19 +56,31 @@ final class PhabricatorFerretFulltextStorageEngine $engine = $spec['engine']; $object = $spec['object']; - // NOTE: For now, it's okay to query with the omnipotent viewer here - // because we're just returning PHIDs which we'll filter later. + $local_query = new PhabricatorSavedQuery(); + $local_query->setParameter('query', $query->getParameter('query')); - $type_query = $engine->newConfiguredFulltextQuery( - $object, - $query, - $viewer); + $project_phids = $query->getParameter('projectPHIDs'); + if ($project_phids) { + $local_query->setParameter('projectPHIDs', $project_phids); + } - $type_query + $subscriber_phids = $query->getParameter('subscriberPHIDs'); + if ($subscriber_phids) { + $local_query->setParameter('subscriberPHIDs', $subscriber_phids); + } + + $search_engine = $engine->newSearchEngine() + ->setViewer($viewer); + + $engine_query = $search_engine->buildQueryFromSavedQuery($local_query) + ->setViewer($viewer); + + $engine_query + ->withFerretQuery($engine, $query) ->setOrder('relevance') ->setLimit($offset + $limit); - $results = $type_query->execute(); + $results = $engine_query->execute(); $results = mpull($results, null, 'getPHID'); $type_results[$type] = $results; } diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php index 1411095f36..569f79d824 100644 --- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php +++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php @@ -28,8 +28,9 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery private $spaceIsArchived; private $ngrams = array(); private $ferretEngine; - private $ferretTokens; - private $ferretTables; + private $ferretTokens = array(); + private $ferretTables = array(); + private $ferretQuery; protected function getPageCursors(array $page) { return array( @@ -772,7 +773,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery if ($this->supportsFerretEngine()) { $orders['relevance'] = array( - 'vector' => array('rank', 'id'), + 'vector' => array('rank', 'fulltext-modified', 'id'), 'name' => pht('Relevence'), ); } @@ -975,6 +976,16 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery 'column' => '_ft_rank', 'type' => 'int', ); + $columns['fulltext-created'] = array( + 'table' => 'ft_doc', + 'column' => 'epochCreated', + 'type' => 'int', + ); + $columns['fulltext-modified'] = array( + 'table' => 'ft_doc', + 'column' => 'epochModified', + 'type' => 'int', + ); } $cache->setKey($cache_key, $columns); @@ -1406,6 +1417,22 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery return ($object instanceof PhabricatorFerretInterface); } + public function withFerretQuery( + PhabricatorFerretEngine $engine, + PhabricatorSavedQuery $query) { + + if (!$this->supportsFerretEngine()) { + throw new Exception( + pht( + 'Query ("%s") does not support the Ferret fulltext engine.', + get_class($this))); + } + + $this->ferretEngine = $engine; + $this->ferretQuery = $query; + + return $this; + } public function withFerretConstraint( PhabricatorFerretEngine $engine, @@ -1538,10 +1565,10 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $table_alias, $stem_value); } - - $parts[] = '0'; } + $parts[] = '0'; + $select[] = qsprintf( $conn, '%Q _ft_rank', @@ -1646,7 +1673,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $joins = array(); $joins[] = qsprintf( $conn, - 'JOIN %T ftdoc ON ftdoc.objectPHID = %Q', + 'JOIN %T ft_doc ON ft_doc.objectPHID = %Q', $document_table->getTableName(), $phid_column); @@ -1655,11 +1682,11 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $table = $spec['table']; $ngram = $spec['ngram']; - $alias = 'ft'.$idx++; + $alias = 'ftngram_'.$idx++; $joins[] = qsprintf( $conn, - 'JOIN %T %T ON %T.documentID = ftdoc.id AND %T.ngram = %s', + 'JOIN %T %T ON %T.documentID = ft_doc.id AND %T.ngram = %s', $table, $alias, $alias, @@ -1672,7 +1699,7 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery $joins[] = qsprintf( $conn, - 'JOIN %T %T ON ftdoc.id = %T.documentID + 'JOIN %T %T ON ft_doc.id = %T.documentID AND %T.fieldKey = %s', $field_table->getTableName(), $alias, @@ -1801,6 +1828,75 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery } } + if ($this->ferretQuery) { + $query = $this->ferretQuery; + + $author_phids = $query->getParameter('authorPHIDs'); + if ($author_phids) { + $where[] = qsprintf( + $conn, + 'ft_doc.authorPHID IN (%Ls)', + $author_phids); + } + + $with_unowned = $query->getParameter('withUnowned'); + $with_any = $query->getParameter('withAnyOwner'); + + if ($with_any && $with_unowned) { + throw new PhabricatorEmptyQueryException( + pht( + 'This query matches only unowned documents owned by anyone, '. + 'which is impossible.')); + } + + $owner_phids = $query->getParameter('ownerPHIDs'); + if ($owner_phids && !$with_any) { + if ($with_unowned) { + $where[] = qsprintf( + $conn, + 'ft_doc.ownerPHID IN (%Ls) OR ft_doc.ownerPHID IS NULL', + $owner_phids); + } else { + $where[] = qsprintf( + $conn, + 'ft_doc.ownerPHID IN (%Ls)', + $owner_phids); + } + } else if ($with_unowned) { + $where[] = qsprintf( + $conn, + 'ft_doc.ownerPHID IS NULL'); + } + + if ($with_any) { + $where[] = qsprintf( + $conn, + 'ft_doc.ownerPHID IS NOT NULL'); + } + + $rel_open = PhabricatorSearchRelationship::RELATIONSHIP_OPEN; + + $statuses = $query->getParameter('statuses'); + $is_closed = null; + if ($statuses) { + $statuses = array_fuse($statuses); + if (count($statuses) == 1) { + if (isset($statuses[$rel_open])) { + $is_closed = 0; + } else { + $is_closed = 1; + } + } + } + + if ($is_closed !== null) { + $where[] = qsprintf( + $conn, + 'ft_doc.isClosed = %d', + $is_closed); + } + } + return $where; }