From cb99396c64dca28d76e36f1180f0a9f252469316 Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 27 Jul 2018 11:26:42 -0700 Subject: [PATCH] Make the "Is this JSON?" DocumentEngine heuristic a little tighter Summary: See PHI749. Ref T13164. We currently misdetect files starting with `[submodule ...` as JSON. Make this a bit stricter: - If the file is short, just see if it's actually literally real JSON. - If the file is long, give up. This should get the right result in pretty much all the cases people care about, I think. We could make the long-file guesser better some day. Test Plan: Detected a `[submodule ...` file (no longer JSON) and a `{"duck": "quack"}` file (still JSON). Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13164 Differential Revision: https://secure.phabricator.com/D19544 --- .../files/document/PhabricatorDocumentRef.php | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/applications/files/document/PhabricatorDocumentRef.php b/src/applications/files/document/PhabricatorDocumentRef.php index 38e4491615..036656c00e 100644 --- a/src/applications/files/document/PhabricatorDocumentRef.php +++ b/src/applications/files/document/PhabricatorDocumentRef.php @@ -119,11 +119,23 @@ final class PhabricatorDocumentRef } $snippet = $this->getSnippet(); - if (!preg_match('/^\s*[{[]/', $snippet)) { + + // If the file is longer than the snippet, we don't detect the content + // as JSON. We could use some kind of heuristic here if we wanted, but + // see PHI749 for a false positive. + if (strlen($snippet) < $this->getByteLength()) { return false; } - return phutil_is_utf8($snippet); + // If the snippet is the whole file, just check if the snippet is valid + // JSON. Note that `phutil_json_decode()` only accepts arrays and objects + // as JSON, so this won't misfire on files with content like "3". + try { + phutil_json_decode($snippet); + return true; + } catch (Exception $ex) { + return false; + } } public function getSnippet() {