From 1dfa94e5712726aecc408feeffe14ed6b6fa16d7 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 1 Oct 2014 08:18:53 -0700 Subject: [PATCH] Use binary collations for most text Summary: Ref T1191. For most text columns, we either don't care if "a" and "A" are the same, or we expect them to be different (for example: keys, domains, secrets, etc). Default text columns to the `_bin` collation so they are compared by strict character value. This is safer in cases where we aren't sure. For some text columns, we allow the user to sort by the column in the UI (like Maniphest task titles) or we do care that "A" and "a" are the same (for example: project names). Introduce a new class of virtual data types, the "sort..." types, to cover these columns. These are like the "text..." types but use sorting collations which treat "A" and "a" the same. Test Plan: - Made an effort to identify all columns where the UI relies on database collation. - Ran `bin/storage adjust` and cleared all warnings. Reviewers: btrahan Reviewed By: btrahan Subscribers: beng, epriestley Maniphest Tasks: T1191 Differential Revision: https://secure.phabricator.com/D10602 --- .../schema/PhabricatorConfigSchemaQuery.php | 9 ++- .../schema/PhabricatorConfigSchemaSpec.php | 75 ++++++++++++++----- .../maniphest/storage/ManiphestNameIndex.php | 2 +- .../maniphest/storage/ManiphestTask.php | 5 +- src/applications/phame/storage/PhamePost.php | 2 +- .../phriction/storage/PhrictionContent.php | 2 +- .../phriction/storage/PhrictionDocument.php | 2 +- .../project/storage/PhabricatorProject.php | 2 +- .../storage/PhabricatorRepository.php | 4 +- ...abricatorCustomFieldStringIndexStorage.php | 2 +- 10 files changed, 73 insertions(+), 32 deletions(-) diff --git a/src/applications/config/schema/PhabricatorConfigSchemaQuery.php b/src/applications/config/schema/PhabricatorConfigSchemaQuery.php index 9fba6e03be..b830486e57 100644 --- a/src/applications/config/schema/PhabricatorConfigSchemaQuery.php +++ b/src/applications/config/schema/PhabricatorConfigSchemaQuery.php @@ -154,7 +154,8 @@ final class PhabricatorConfigSchemaQuery extends Phobject { // collation. This is most correct, and will sort properly. $utf8_charset = 'utf8mb4'; - $utf8_collation = 'utf8mb4_unicode_ci'; + $utf8_binary_collation = 'utf8mb4_bin'; + $utf8_sorting_collation = 'utf8mb4_unicode_ci'; } else { // If utf8mb4 is not available, we use binary. This allows us to store // 4-byte unicode characters. This has some tradeoffs: @@ -167,7 +168,8 @@ final class PhabricatorConfigSchemaQuery extends Phobject { // to prevent this. $utf8_charset = 'binary'; - $utf8_collation = 'binary'; + $utf8_binary_collation = 'binary'; + $utf8_sorting_collation = 'binary'; } $specs = id(new PhutilSymbolLoader()) @@ -177,8 +179,9 @@ final class PhabricatorConfigSchemaQuery extends Phobject { $server_schema = new PhabricatorConfigServerSchema(); foreach ($specs as $spec) { $spec - ->setUTF8Collation($utf8_collation) ->setUTF8Charset($utf8_charset) + ->setUTF8BinaryCollation($utf8_binary_collation) + ->setUTF8SortingCollation($utf8_sorting_collation) ->setServer($server_schema) ->buildSchemata($server_schema); } diff --git a/src/applications/config/schema/PhabricatorConfigSchemaSpec.php b/src/applications/config/schema/PhabricatorConfigSchemaSpec.php index 9915db0f61..cd68a97c4a 100644 --- a/src/applications/config/schema/PhabricatorConfigSchemaSpec.php +++ b/src/applications/config/schema/PhabricatorConfigSchemaSpec.php @@ -4,15 +4,25 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject { private $server; private $utf8Charset; - private $utf8Collation; + private $utf8BinaryCollation; + private $utf8SortingCollation; - public function setUTF8Collation($utf8_collation) { - $this->utf8Collation = $utf8_collation; + public function setUTF8SortingCollation($utf8_sorting_collation) { + $this->utf8SortingCollation = $utf8_sorting_collation; return $this; } - public function getUTF8Collation() { - return $this->utf8Collation; + public function getUTF8SortingCollation() { + return $this->utf8SortingCollation; + } + + public function setUTF8BinaryCollation($utf8_binary_collation) { + $this->utf8BinaryCollation = $utf8_binary_collation; + return $this; + } + + public function getUTF8BinaryCollation() { + return $this->utf8BinaryCollation; } public function setUTF8Charset($utf8_charset) { @@ -195,7 +205,7 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject { return id(new PhabricatorConfigDatabaseSchema()) ->setName($this->getNamespacedDatabase($name)) ->setCharacterSet($this->getUTF8Charset()) - ->setCollation($this->getUTF8Collation()); + ->setCollation($this->getUTF8BinaryCollation()); } protected function getNamespacedDatabase($name) { @@ -206,7 +216,7 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject { protected function newTable($name) { return id(new PhabricatorConfigTableSchema()) ->setName($name) - ->setCollation($this->getUTF8Collation()); + ->setCollation($this->getUTF8BinaryCollation()); } protected function newColumn($name) { @@ -276,70 +286,95 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject { case 'bytes': $column_type = 'longblob'; break; + case 'sort255': + $column_type = 'varchar(255)'; + $charset = $this->getUTF8Charset(); + $collation = $this->getUTF8SortingCollation(); + break; + case 'sort128': + $column_type = 'varchar(128)'; + $charset = $this->getUTF8Charset(); + $collation = $this->getUTF8SortingCollation(); + break; + case 'sort64': + $column_type = 'varchar(64)'; + $charset = $this->getUTF8Charset(); + $collation = $this->getUTF8SortingCollation(); + break; + case 'sort32': + $column_type = 'varchar(32)'; + $charset = $this->getUTF8Charset(); + $collation = $this->getUTF8SortingCollation(); + break; + case 'sort': + $column_type = 'longtext'; + $charset = $this->getUTF8Charset(); + $collation = $this->getUTF8SortingCollation(); + break; case 'text255': $column_type = 'varchar(255)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text160': $column_type = 'varchar(160)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text128': $column_type = 'varchar(128)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text80': $column_type = 'varchar(80)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text64': $column_type = 'varchar(64)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text40': $column_type = 'varchar(40)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text32': $column_type = 'varchar(32)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text20': $column_type = 'varchar(20)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text16': $column_type = 'varchar(16)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text12': $column_type = 'varchar(12)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text8': $column_type = 'varchar(8)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text4': $column_type = 'varchar(4)'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'text': $column_type = 'longtext'; $charset = $this->getUTF8Charset(); - $collation = $this->getUTF8Collation(); + $collation = $this->getUTF8BinaryCollation(); break; case 'bool': $column_type = 'tinyint(1)'; diff --git a/src/applications/maniphest/storage/ManiphestNameIndex.php b/src/applications/maniphest/storage/ManiphestNameIndex.php index 7988dc90f9..62e4e8a524 100644 --- a/src/applications/maniphest/storage/ManiphestNameIndex.php +++ b/src/applications/maniphest/storage/ManiphestNameIndex.php @@ -13,7 +13,7 @@ final class ManiphestNameIndex extends ManiphestDAO { return array( self::CONFIG_TIMESTAMPS => false, self::CONFIG_COLUMN_SCHEMA => array( - 'indexedObjectName' => 'text128', + 'indexedObjectName' => 'sort128', ), self::CONFIG_KEY_SCHEMA => array( 'key_phid' => array( diff --git a/src/applications/maniphest/storage/ManiphestTask.php b/src/applications/maniphest/storage/ManiphestTask.php index d85c2155f5..6661b13f97 100644 --- a/src/applications/maniphest/storage/ManiphestTask.php +++ b/src/applications/maniphest/storage/ManiphestTask.php @@ -71,7 +71,7 @@ final class ManiphestTask extends ManiphestDAO 'ownerPHID' => 'phid?', 'status' => 'text12', 'priority' => 'uint32', - 'title' => 'text', + 'title' => 'sort', 'originalTitle' => 'text', 'description' => 'text', 'mailKey' => 'bytes20', @@ -114,6 +114,9 @@ final class ManiphestTask extends ManiphestDAO 'key_dateModified' => array( 'columns' => array('dateModified'), ), + 'key_title' => array( + 'columns' => array('title(64)'), + ), ), ) + parent::getConfiguration(); } diff --git a/src/applications/phame/storage/PhamePost.php b/src/applications/phame/storage/PhamePost.php index 8c65ff26a3..ea6acbe8b4 100644 --- a/src/applications/phame/storage/PhamePost.php +++ b/src/applications/phame/storage/PhamePost.php @@ -90,7 +90,7 @@ final class PhamePost extends PhameDAO ), self::CONFIG_COLUMN_SCHEMA => array( 'title' => 'text255', - 'phameTitle' => 'text64', + 'phameTitle' => 'sort64', 'visibility' => 'uint32', // T6203/NULLABILITY diff --git a/src/applications/phriction/storage/PhrictionContent.php b/src/applications/phriction/storage/PhrictionContent.php index 5ae4e225fe..c41ec0f302 100644 --- a/src/applications/phriction/storage/PhrictionContent.php +++ b/src/applications/phriction/storage/PhrictionContent.php @@ -32,7 +32,7 @@ final class PhrictionContent extends PhrictionDAO return array( self::CONFIG_COLUMN_SCHEMA => array( 'version' => 'uint32', - 'title' => 'text', + 'title' => 'sort', 'slug' => 'text128', 'content' => 'text', 'changeType' => 'uint32', diff --git a/src/applications/phriction/storage/PhrictionDocument.php b/src/applications/phriction/storage/PhrictionDocument.php index 0ce1383d68..9f384ec610 100644 --- a/src/applications/phriction/storage/PhrictionDocument.php +++ b/src/applications/phriction/storage/PhrictionDocument.php @@ -25,7 +25,7 @@ final class PhrictionDocument extends PhrictionDAO self::CONFIG_AUX_PHID => true, self::CONFIG_TIMESTAMPS => false, self::CONFIG_COLUMN_SCHEMA => array( - 'slug' => 'text128', + 'slug' => 'sort128', 'depth' => 'uint32', 'contentID' => 'id?', 'status' => 'uint32', diff --git a/src/applications/project/storage/PhabricatorProject.php b/src/applications/project/storage/PhabricatorProject.php index d3538332ae..27db584e24 100644 --- a/src/applications/project/storage/PhabricatorProject.php +++ b/src/applications/project/storage/PhabricatorProject.php @@ -122,7 +122,7 @@ final class PhabricatorProject extends PhabricatorProjectDAO 'subprojectPHIDs' => self::SERIALIZATION_JSON, ), self::CONFIG_COLUMN_SCHEMA => array( - 'name' => 'text128', + 'name' => 'sort128', 'status' => 'text32', 'phrictionSlug' => 'text128?', 'isMembershipLocked' => 'bool', diff --git a/src/applications/repository/storage/PhabricatorRepository.php b/src/applications/repository/storage/PhabricatorRepository.php index e1a96149e5..bba53b0d9c 100644 --- a/src/applications/repository/storage/PhabricatorRepository.php +++ b/src/applications/repository/storage/PhabricatorRepository.php @@ -79,8 +79,8 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO 'details' => self::SERIALIZATION_JSON, ), self::CONFIG_COLUMN_SCHEMA => array( - 'name' => 'text255', - 'callsign' => 'text32', + 'name' => 'sort255', + 'callsign' => 'sort32', 'versionControlSystem' => 'text32', 'uuid' => 'text64?', 'pushPolicy' => 'policy', diff --git a/src/infrastructure/customfield/storage/PhabricatorCustomFieldStringIndexStorage.php b/src/infrastructure/customfield/storage/PhabricatorCustomFieldStringIndexStorage.php index 51d5449dc8..8d3d75286e 100644 --- a/src/infrastructure/customfield/storage/PhabricatorCustomFieldStringIndexStorage.php +++ b/src/infrastructure/customfield/storage/PhabricatorCustomFieldStringIndexStorage.php @@ -7,7 +7,7 @@ abstract class PhabricatorCustomFieldStringIndexStorage return array( self::CONFIG_COLUMN_SCHEMA => array( 'indexKey' => 'bytes12', - 'indexValue' => 'text', + 'indexValue' => 'sort', ), self::CONFIG_KEY_SCHEMA => array( 'key_join' => array(