From 9a1d59ad5bd51ef4bf56533db260cc9e11a54fe6 Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 10 Nov 2016 07:24:15 -0800 Subject: [PATCH] Separate sever-side typeahead queries into "prefix" and "content" phases Summary: Ref T8510. When users type "platypus" into a typeahead, they want "Platypus Playground" to be a higher-ranked match than "AAA Platypus", even though the latter is alphabetically first. Specifically, the rule is: results which match the query as a prefix of the result text should rank above results which do not. I believe we now always get this right on the client side. However, WMF has at least one case (described in T8510) where we do not get it right on the server side, and thus the user sees the wrong result. The remaining issue is that if "platypus" matches more than 100 results, the result "Platypus Playground" may not appear in the result set at all, beacuse there are 100 copies of "AAA Platypus 1", "AAA Platypus 2", etc., first. So even though the client will apply the correct sort, it doesn't have the result the user wants and can't show it to them. To fix this, split the server-side query into two phases: - In the first phase, the "prefix" phase, we find results that **start with** "platypus". - In the second phase, the "content" phase, we find results that contain "platypus" anywhere. We skip the "prefix" phase if the user has not typed a query (for example, in the browse view). Test Plan: This is a lot of stuff, but the new ranking here puts projects which start with "w" at the top of the list. Lower down the list, you can see some projects which contain "w" but do not appear at the top (like "Serious Work"). {F1913931} Reviewers: chad Reviewed By: chad Maniphest Tasks: T8510 Differential Revision: https://secure.phabricator.com/D16838 --- .../people/query/PhabricatorPeopleQuery.php | 17 +++ .../typeahead/PhabricatorPeopleDatasource.php | 11 +- .../project/query/PhabricatorProjectQuery.php | 17 +++ .../PhabricatorProjectDatasource.php | 5 +- ...orTypeaheadModularDatasourceController.php | 5 + ...habricatorTypeaheadCompositeDatasource.php | 140 +++++++++++++++++- .../PhabricatorTypeaheadDatasource.php | 24 +++ .../storage/PhabricatorTypeaheadResult.php | 11 ++ 8 files changed, 219 insertions(+), 11 deletions(-) diff --git a/src/applications/people/query/PhabricatorPeopleQuery.php b/src/applications/people/query/PhabricatorPeopleQuery.php index 88e2437dbd..32ae31859b 100644 --- a/src/applications/people/query/PhabricatorPeopleQuery.php +++ b/src/applications/people/query/PhabricatorPeopleQuery.php @@ -17,6 +17,7 @@ final class PhabricatorPeopleQuery private $isApproved; private $nameLike; private $nameTokens; + private $namePrefixes; private $needPrimaryEmail; private $needProfile; @@ -95,6 +96,11 @@ final class PhabricatorPeopleQuery return $this; } + public function withNamePrefixes(array $prefixes) { + $this->namePrefixes = $prefixes; + return $this; + } + public function needPrimaryEmail($need) { $this->needPrimaryEmail = $need; return $this; @@ -256,6 +262,17 @@ final class PhabricatorPeopleQuery $this->usernames); } + if ($this->namePrefixes) { + $parts = array(); + foreach ($this->namePrefixes as $name_prefix) { + $parts[] = qsprintf( + $conn, + 'user.username LIKE %>', + $name_prefix); + } + $where[] = '('.implode(' OR ', $parts).')'; + } + if ($this->emails !== null) { $where[] = qsprintf( $conn, diff --git a/src/applications/people/typeahead/PhabricatorPeopleDatasource.php b/src/applications/people/typeahead/PhabricatorPeopleDatasource.php index 494b68dbfb..df146808bb 100644 --- a/src/applications/people/typeahead/PhabricatorPeopleDatasource.php +++ b/src/applications/people/typeahead/PhabricatorPeopleDatasource.php @@ -17,13 +17,18 @@ final class PhabricatorPeopleDatasource public function loadResults() { $viewer = $this->getViewer(); - $tokens = $this->getTokens(); $query = id(new PhabricatorPeopleQuery()) ->setOrderVector(array('username')); - if ($tokens) { - $query->withNameTokens($tokens); + if ($this->getPhase() == self::PHASE_PREFIX) { + $prefix = $this->getPrefixQuery(); + $query->withNamePrefixes(array($prefix)); + } else { + $tokens = $this->getTokens(); + if ($tokens) { + $query->withNameTokens($tokens); + } } $users = $this->executeQuery($query); diff --git a/src/applications/project/query/PhabricatorProjectQuery.php b/src/applications/project/query/PhabricatorProjectQuery.php index ab41ecf0ac..87c6bb805e 100644 --- a/src/applications/project/query/PhabricatorProjectQuery.php +++ b/src/applications/project/query/PhabricatorProjectQuery.php @@ -12,6 +12,7 @@ final class PhabricatorProjectQuery private $slugMap; private $allSlugs; private $names; + private $namePrefixes; private $nameTokens; private $icons; private $colors; @@ -78,6 +79,11 @@ final class PhabricatorProjectQuery return $this; } + public function withNamePrefixes(array $prefixes) { + $this->namePrefixes = $prefixes; + return $this; + } + public function withNameTokens(array $tokens) { $this->nameTokens = array_values($tokens); return $this; @@ -464,6 +470,17 @@ final class PhabricatorProjectQuery $this->names); } + if ($this->namePrefixes) { + $parts = array(); + foreach ($this->namePrefixes as $name_prefix) { + $parts[] = qsprintf( + $conn, + 'name LIKE %>', + $name_prefix); + } + $where[] = '('.implode(' OR ', $parts).')'; + } + if ($this->icons !== null) { $where[] = qsprintf( $conn, diff --git a/src/applications/project/typeahead/PhabricatorProjectDatasource.php b/src/applications/project/typeahead/PhabricatorProjectDatasource.php index 03c2424f4a..03a6e39c33 100644 --- a/src/applications/project/typeahead/PhabricatorProjectDatasource.php +++ b/src/applications/project/typeahead/PhabricatorProjectDatasource.php @@ -28,7 +28,10 @@ final class PhabricatorProjectDatasource ->needImages(true) ->needSlugs(true); - if ($tokens) { + if ($this->getPhase() == self::PHASE_PREFIX) { + $prefix = $this->getPrefixQuery(); + $query->withNamePrefixes(array($prefix)); + } else if ($tokens) { $query->withNameTokens($tokens); } diff --git a/src/applications/typeahead/controller/PhabricatorTypeaheadModularDatasourceController.php b/src/applications/typeahead/controller/PhabricatorTypeaheadModularDatasourceController.php index a7c8381bfd..5c641b7c1f 100644 --- a/src/applications/typeahead/controller/PhabricatorTypeaheadModularDatasourceController.php +++ b/src/applications/typeahead/controller/PhabricatorTypeaheadModularDatasourceController.php @@ -325,6 +325,11 @@ final class PhabricatorTypeaheadModularDatasourceController pht('Icon'), pht('Closed'), pht('Sprite'), + pht('Color'), + pht('Type'), + pht('Unique'), + pht('Auto'), + pht('Phase'), )); $result_box = id(new PHUIObjectBoxView()) diff --git a/src/applications/typeahead/datasource/PhabricatorTypeaheadCompositeDatasource.php b/src/applications/typeahead/datasource/PhabricatorTypeaheadCompositeDatasource.php index 306b33b497..33f06e4ae5 100644 --- a/src/applications/typeahead/datasource/PhabricatorTypeaheadCompositeDatasource.php +++ b/src/applications/typeahead/datasource/PhabricatorTypeaheadCompositeDatasource.php @@ -4,6 +4,8 @@ abstract class PhabricatorTypeaheadCompositeDatasource extends PhabricatorTypeaheadDatasource { private $usable; + private $prefixString; + private $prefixLength; abstract public function getComponentDatasources(); @@ -22,9 +24,51 @@ abstract class PhabricatorTypeaheadCompositeDatasource } public function loadResults() { + $phases = array(); + + // We only need to do a prefix phase query if there's an actual query + // string. If the user didn't type anything, nothing can possibly match it. + if (strlen($this->getRawQuery())) { + $phases[] = self::PHASE_PREFIX; + } + + $phases[] = self::PHASE_CONTENT; + $offset = $this->getOffset(); $limit = $this->getLimit(); + $results = array(); + foreach ($phases as $phase) { + if ($limit) { + $phase_limit = ($offset + $limit) - count($results); + } else { + $phase_limit = 0; + } + + $phase_results = $this->loadResultsForPhase( + $phase, + $phase_limit); + + foreach ($phase_results as $result) { + $results[] = $result; + } + + if ($limit) { + if (count($results) >= $offset + $limit) { + break; + } + } + } + + return $results; + } + + protected function loadResultsForPhase($phase, $limit) { + if ($phase == self::PHASE_PREFIX) { + $this->prefixString = $this->getPrefixQuery(); + $this->prefixLength = strlen($this->prefixString); + } + // If the input query is a function like `members(platy`, and we can // parse the function, we strip the function off and hand the stripped // query to child sources. This makes it easier to implement function @@ -62,28 +106,110 @@ abstract class PhabricatorTypeaheadCompositeDatasource } $source + ->setPhase($phase) ->setFunctionStack($source_stack) ->setRawQuery($source_query) ->setQuery($this->getQuery()) ->setViewer($this->getViewer()); - if ($limit) { - $source->setLimit($offset + $limit); - } - if ($is_browse) { $source->setIsBrowse(true); } - $source_results = $source->loadResults(); - $source_results = $source->didLoadResults($source_results); + if ($limit) { + // If we are loading results from a source with a limit, it may return + // some results which belong to the wrong phase. We need an entire page + // of valid results in the correct phase AFTER any results for the + // wrong phase are filtered for pagination to work correctly. + + // To make sure we can get there, we fetch more and more results until + // enough of them survive filtering to generate a full page. + + // We start by fetching 150% of the results than we think we need, and + // double the amount we overfetch by each time. + $factor = 1.5; + while (true) { + $query_source = clone $source; + $total = (int)ceil($limit * $factor) + 1; + $query_source->setLimit($total); + + $source_results = $query_source->loadResultsForPhase( + $phase, + $limit); + + // If there are fewer unfiltered results than we asked for, we know + // this is the entire result set and we don't need to keep going. + if (count($source_results) < $total) { + $source_results = $query_source->didLoadResults($source_results); + $source_results = $this->filterPhaseResults( + $phase, + $source_results); + break; + } + + // Otherwise, this result set have everything we need, or may not. + // Filter the results that are part of the wrong phase out first... + $source_results = $query_source->didLoadResults($source_results); + $source_results = $this->filterPhaseResults($phase, $source_results); + + // Now check if we have enough results left. If we do, we're all set. + if (count($source_results) >= $total) { + break; + } + + // We filtered out too many results to have a full page left, so we + // need to run the query again, asking for even more results. We'll + // keep doing this until we get a full page or get all of the + // results. + $factor = $factor * 2; + } + } else { + $source_results = $source->loadResults(); + $source_results = $source->didLoadResults($source_results); + $source_results = $this->filterPhaseResults($phase, $source_results); + } + $results[] = $source_results; } $results = array_mergev($results); $results = msort($results, 'getSortKey'); - $count = count($results); + $results = $this->sliceResults($results); + + return $results; + } + + private function filterPhaseResults($phase, $source_results) { + foreach ($source_results as $key => $source_result) { + $result_phase = $this->getResultPhase($source_result); + + if ($result_phase != $phase) { + unset($source_results[$key]); + continue; + } + + $source_result->setPhase($result_phase); + } + + return $source_results; + } + + private function getResultPhase(PhabricatorTypeaheadResult $result) { + if ($this->prefixLength) { + $result_name = phutil_utf8_strtolower($result->getName()); + if (!strncmp($result_name, $this->prefixString, $this->prefixLength)) { + return self::PHASE_PREFIX; + } + } + + return self::PHASE_CONTENT; + } + + protected function sliceResults(array $results) { + $offset = $this->getOffset(); + $limit = $this->getLimit(); + if ($offset || $limit) { if (!$limit) { $limit = count($results); diff --git a/src/applications/typeahead/datasource/PhabricatorTypeaheadDatasource.php b/src/applications/typeahead/datasource/PhabricatorTypeaheadDatasource.php index 9fee4b2434..163b6c40b3 100644 --- a/src/applications/typeahead/datasource/PhabricatorTypeaheadDatasource.php +++ b/src/applications/typeahead/datasource/PhabricatorTypeaheadDatasource.php @@ -13,6 +13,10 @@ abstract class PhabricatorTypeaheadDatasource extends Phobject { private $parameters = array(); private $functionStack = array(); private $isBrowse; + private $phase = self::PHASE_CONTENT; + + const PHASE_PREFIX = 'prefix'; + const PHASE_CONTENT = 'content'; public function setLimit($limit) { $this->limit = $limit; @@ -46,6 +50,10 @@ abstract class PhabricatorTypeaheadDatasource extends Phobject { return $this; } + public function getPrefixQuery() { + return phutil_utf8_strtolower($this->getRawQuery()); + } + public function getRawQuery() { return $this->rawQuery; } @@ -81,6 +89,15 @@ abstract class PhabricatorTypeaheadDatasource extends Phobject { return $this->isBrowse; } + public function setPhase($phase) { + $this->phase = $phase; + return $this; + } + + public function getPhase() { + return $this->phase; + } + public function getDatasourceURI() { $uri = new PhutilURI('/typeahead/class/'.get_class($this).'/'); $uri->setQueryParams($this->parameters); @@ -106,6 +123,13 @@ abstract class PhabricatorTypeaheadDatasource extends Phobject { abstract public function getDatasourceApplicationClass(); abstract public function loadResults(); + protected function loadResultsForPhase($phase, $limit) { + // By default, sources just load all of their results in every phase and + // rely on filtering at a higher level to sequence phases correctly. + $this->setLimit($limit); + return $this->loadResults(); + } + protected function didLoadResults(array $results) { return $results; } diff --git a/src/applications/typeahead/storage/PhabricatorTypeaheadResult.php b/src/applications/typeahead/storage/PhabricatorTypeaheadResult.php index 4c5c079734..14cbe726dc 100644 --- a/src/applications/typeahead/storage/PhabricatorTypeaheadResult.php +++ b/src/applications/typeahead/storage/PhabricatorTypeaheadResult.php @@ -18,6 +18,7 @@ final class PhabricatorTypeaheadResult extends Phobject { private $unique; private $autocomplete; private $attributes = array(); + private $phase; public function setIcon($icon) { $this->icon = $icon; @@ -154,6 +155,7 @@ final class PhabricatorTypeaheadResult extends Phobject { $this->tokenType, $this->unique ? 1 : null, $this->autocomplete, + $this->phase, ); while (end($data) === null) { array_pop($data); @@ -211,4 +213,13 @@ final class PhabricatorTypeaheadResult extends Phobject { return $this; } + public function setPhase($phase) { + $this->phase = $phase; + return $this; + } + + public function getPhase() { + return $this->phase; + } + }