| 1 | //===--- SortJavaScriptImports.cpp - Sort ES6 Imports -----------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file implements a sort operation for JavaScript ES6 imports. |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "SortJavaScriptImports.h" |
| 15 | #include "TokenAnalyzer.h" |
| 16 | #include "TokenAnnotator.h" |
| 17 | #include "clang/Basic/Diagnostic.h" |
| 18 | #include "clang/Basic/DiagnosticOptions.h" |
| 19 | #include "clang/Basic/LLVM.h" |
| 20 | #include "clang/Basic/SourceLocation.h" |
| 21 | #include "clang/Basic/SourceManager.h" |
| 22 | #include "clang/Format/Format.h" |
| 23 | #include "llvm/ADT/STLExtras.h" |
| 24 | #include "llvm/ADT/SmallVector.h" |
| 25 | #include "llvm/Support/Debug.h" |
| 26 | #include <algorithm> |
| 27 | #include <string> |
| 28 | |
| 29 | #define DEBUG_TYPE "format-formatter" |
| 30 | |
| 31 | namespace clang { |
| 32 | namespace format { |
| 33 | |
| 34 | class FormatTokenLexer; |
| 35 | |
| 36 | using clang::format::FormatStyle; |
| 37 | |
| 38 | // An imported symbol in a JavaScript ES6 import/export, possibly aliased. |
| 39 | struct JsImportedSymbol { |
| 40 | StringRef Symbol; |
| 41 | StringRef Alias; |
| 42 | SourceRange Range; |
| 43 | |
| 44 | bool operator==(const JsImportedSymbol &RHS) const { |
| 45 | // Ignore Range for comparison, it is only used to stitch code together, |
| 46 | // but imports at different code locations are still conceptually the same. |
| 47 | return Symbol == RHS.Symbol && Alias == RHS.Alias; |
| 48 | } |
| 49 | }; |
| 50 | |
| 51 | // An ES6 module reference. |
| 52 | // |
| 53 | // ES6 implements a module system, where individual modules (~= source files) |
| 54 | // can reference other modules, either importing symbols from them, or exporting |
| 55 | // symbols from them: |
| 56 | // import {foo} from 'foo'; |
| 57 | // export {foo}; |
| 58 | // export {bar} from 'bar'; |
| 59 | // |
| 60 | // `export`s with URLs are syntactic sugar for an import of the symbol from the |
| 61 | // URL, followed by an export of the symbol, allowing this code to treat both |
| 62 | // statements more or less identically, with the exception being that `export`s |
| 63 | // are sorted last. |
| 64 | // |
| 65 | // imports and exports support individual symbols, but also a wildcard syntax: |
| 66 | // import * as prefix from 'foo'; |
| 67 | // export * from 'bar'; |
| 68 | // |
| 69 | // This struct represents both exports and imports to build up the information |
| 70 | // required for sorting module references. |
| 71 | struct JsModuleReference { |
| 72 | bool IsExport = false; |
| 73 | // Module references are sorted into these categories, in order. |
| 74 | enum ReferenceCategory { |
| 75 | SIDE_EFFECT, // "import 'something';" |
| 76 | ABSOLUTE, // from 'something' |
| 77 | RELATIVE_PARENT, // from '../*' |
| 78 | RELATIVE, // from './*' |
| 79 | }; |
| 80 | ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT; |
| 81 | // The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`. |
| 82 | StringRef URL; |
| 83 | // Prefix from "import * as prefix". Empty for symbol imports and `export *`. |
| 84 | // Implies an empty names list. |
| 85 | StringRef Prefix; |
| 86 | // Symbols from `import {SymbolA, SymbolB, ...} from ...;`. |
| 87 | SmallVector<JsImportedSymbol, 1> Symbols; |
| 88 | // Textual position of the import/export, including preceding and trailing |
| 89 | // comments. |
| 90 | SourceRange Range; |
| 91 | }; |
| 92 | |
| 93 | bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) { |
| 94 | if (LHS.IsExport != RHS.IsExport) |
| 95 | return LHS.IsExport < RHS.IsExport; |
| 96 | if (LHS.Category != RHS.Category) |
| 97 | return LHS.Category < RHS.Category; |
| 98 | if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT) |
| 99 | // Side effect imports might be ordering sensitive. Consider them equal so |
| 100 | // that they maintain their relative order in the stable sort below. |
| 101 | // This retains transitivity because LHS.Category == RHS.Category here. |
| 102 | return false; |
| 103 | // Empty URLs sort *last* (for export {...};). |
| 104 | if (LHS.URL.empty() != RHS.URL.empty()) |
| 105 | return LHS.URL.empty() < RHS.URL.empty(); |
| 106 | if (int Res = LHS.URL.compare_lower(RHS.URL)) |
| 107 | return Res < 0; |
| 108 | // '*' imports (with prefix) sort before {a, b, ...} imports. |
| 109 | if (LHS.Prefix.empty() != RHS.Prefix.empty()) |
| 110 | return LHS.Prefix.empty() < RHS.Prefix.empty(); |
| 111 | if (LHS.Prefix != RHS.Prefix) |
| 112 | return LHS.Prefix > RHS.Prefix; |
| 113 | return false; |
| 114 | } |
| 115 | |
| 116 | // JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is |
| 117 | // implemented as a TokenAnalyzer because ES6 imports have substantial syntactic |
| 118 | // structure, making it messy to sort them using regular expressions. |
| 119 | class JavaScriptImportSorter : public TokenAnalyzer { |
| 120 | public: |
| 121 | JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style) |
| 122 | : TokenAnalyzer(Env, Style), |
| 123 | FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {} |
| 124 | |
| 125 | std::pair<tooling::Replacements, unsigned> |
| 126 | analyze(TokenAnnotator &Annotator, |
| 127 | SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, |
| 128 | FormatTokenLexer &Tokens) override { |
| 129 | tooling::Replacements Result; |
| 130 | AffectedRangeMgr.computeAffectedLines(AnnotatedLines); |
| 131 | |
| 132 | const AdditionalKeywords &Keywords = Tokens.getKeywords(); |
| 133 | SmallVector<JsModuleReference, 16> References; |
| 134 | AnnotatedLine *FirstNonImportLine; |
| 135 | std::tie(References, FirstNonImportLine) = |
| 136 | parseModuleReferences(Keywords, AnnotatedLines); |
| 137 | |
| 138 | if (References.empty()) |
| 139 | return {Result, 0}; |
| 140 | |
| 141 | SmallVector<unsigned, 16> Indices; |
| 142 | for (unsigned i = 0, e = References.size(); i != e; ++i) |
| 143 | Indices.push_back(i); |
| 144 | std::stable_sort(Indices.begin(), Indices.end(), |
| 145 | [&](unsigned LHSI, unsigned RHSI) { |
| 146 | return References[LHSI] < References[RHSI]; |
| 147 | }); |
| 148 | bool ReferencesInOrder = std::is_sorted(Indices.begin(), Indices.end()); |
| 149 | |
| 150 | std::string ReferencesText; |
| 151 | bool SymbolsInOrder = true; |
| 152 | for (unsigned i = 0, e = Indices.size(); i != e; ++i) { |
| 153 | JsModuleReference Reference = References[Indices[i]]; |
| 154 | if (appendReference(ReferencesText, Reference)) |
| 155 | SymbolsInOrder = false; |
| 156 | if (i + 1 < e) { |
| 157 | // Insert breaks between imports and exports. |
| 158 | ReferencesText += "\n"; |
| 159 | // Separate imports groups with two line breaks, but keep all exports |
| 160 | // in a single group. |
| 161 | if (!Reference.IsExport && |
| 162 | (Reference.IsExport != References[Indices[i + 1]].IsExport || |
| 163 | Reference.Category != References[Indices[i + 1]].Category)) |
| 164 | ReferencesText += "\n"; |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | if (ReferencesInOrder && SymbolsInOrder) |
| 169 | return {Result, 0}; |
| 170 | |
| 171 | SourceRange InsertionPoint = References[0].Range; |
| 172 | InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd()); |
| 173 | |
| 174 | // The loop above might collapse previously existing line breaks between |
| 175 | // import blocks, and thus shrink the file. SortIncludes must not shrink |
| 176 | // overall source length as there is currently no re-calculation of ranges |
| 177 | // after applying source sorting. |
| 178 | // This loop just backfills trailing spaces after the imports, which are |
| 179 | // harmless and will be stripped by the subsequent formatting pass. |
| 180 | // FIXME: A better long term fix is to re-calculate Ranges after sorting. |
| 181 | unsigned PreviousSize = getSourceText(InsertionPoint).size(); |
| 182 | while (ReferencesText.size() < PreviousSize) { |
| 183 | ReferencesText += " "; |
| 184 | } |
| 185 | |
| 186 | // Separate references from the main code body of the file. |
| 187 | if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2) |
| 188 | ReferencesText += "\n"; |
| 189 | |
| 190 | LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n" |
| 191 | << getSourceText(InsertionPoint) << "\nwith:\n" |
| 192 | << ReferencesText << "\n"); |
| 193 | auto Err = Result.add(tooling::Replacement( |
| 194 | Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint), |
| 195 | ReferencesText)); |
| 196 | // FIXME: better error handling. For now, just print error message and skip |
| 197 | // the replacement for the release version. |
| 198 | if (Err) { |
| 199 | llvm::errs() << llvm::toString(std::move(Err)) << "\n"; |
| 200 | assert(false); |
| 201 | } |
| 202 | |
| 203 | return {Result, 0}; |
| 204 | } |
| 205 | |
| 206 | private: |
| 207 | FormatToken *Current; |
| 208 | FormatToken *LineEnd; |
| 209 | |
| 210 | FormatToken invalidToken; |
| 211 | |
| 212 | StringRef FileContents; |
| 213 | |
| 214 | void skipComments() { Current = skipComments(Current); } |
| 215 | |
| 216 | FormatToken *skipComments(FormatToken *Tok) { |
| 217 | while (Tok && Tok->is(tok::comment)) |
| 218 | Tok = Tok->Next; |
| 219 | return Tok; |
| 220 | } |
| 221 | |
| 222 | void nextToken() { |
| 223 | Current = Current->Next; |
| 224 | skipComments(); |
| 225 | if (!Current || Current == LineEnd->Next) { |
| 226 | // Set the current token to an invalid token, so that further parsing on |
| 227 | // this line fails. |
| 228 | invalidToken.Tok.setKind(tok::unknown); |
| 229 | Current = &invalidToken; |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | StringRef getSourceText(SourceRange Range) { |
| 234 | return getSourceText(Range.getBegin(), Range.getEnd()); |
| 235 | } |
| 236 | |
| 237 | StringRef getSourceText(SourceLocation Begin, SourceLocation End) { |
| 238 | const SourceManager &SM = Env.getSourceManager(); |
| 239 | return FileContents.substr(SM.getFileOffset(Begin), |
| 240 | SM.getFileOffset(End) - SM.getFileOffset(Begin)); |
| 241 | } |
| 242 | |
| 243 | // Appends ``Reference`` to ``Buffer``, returning true if text within the |
| 244 | // ``Reference`` changed (e.g. symbol order). |
| 245 | bool appendReference(std::string &Buffer, JsModuleReference &Reference) { |
| 246 | // Sort the individual symbols within the import. |
| 247 | // E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';` |
| 248 | SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols; |
| 249 | std::stable_sort( |
| 250 | Symbols.begin(), Symbols.end(), |
| 251 | [&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) { |
| 252 | return LHS.Symbol.compare_lower(RHS.Symbol) < 0; |
| 253 | }); |
| 254 | if (Symbols == Reference.Symbols) { |
| 255 | // No change in symbol order. |
| 256 | StringRef ReferenceStmt = getSourceText(Reference.Range); |
| 257 | Buffer += ReferenceStmt; |
| 258 | return false; |
| 259 | } |
| 260 | // Stitch together the module reference start... |
| 261 | SourceLocation SymbolsStart = Reference.Symbols.front().Range.getBegin(); |
| 262 | SourceLocation SymbolsEnd = Reference.Symbols.back().Range.getEnd(); |
| 263 | Buffer += getSourceText(Reference.Range.getBegin(), SymbolsStart); |
| 264 | // ... then the references in order ... |
| 265 | for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) { |
| 266 | if (I != Symbols.begin()) |
| 267 | Buffer += ","; |
| 268 | Buffer += getSourceText(I->Range); |
| 269 | } |
| 270 | // ... followed by the module reference end. |
| 271 | Buffer += getSourceText(SymbolsEnd, Reference.Range.getEnd()); |
| 272 | return true; |
| 273 | } |
| 274 | |
| 275 | // Parses module references in the given lines. Returns the module references, |
| 276 | // and a pointer to the first "main code" line if that is adjacent to the |
| 277 | // affected lines of module references, nullptr otherwise. |
| 278 | std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *> |
| 279 | parseModuleReferences(const AdditionalKeywords &Keywords, |
| 280 | SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { |
| 281 | SmallVector<JsModuleReference, 16> References; |
| 282 | SourceLocation Start; |
| 283 | AnnotatedLine *FirstNonImportLine = nullptr; |
| 284 | bool AnyImportAffected = false; |
| 285 | for (auto Line : AnnotatedLines) { |
| 286 | Current = Line->First; |
| 287 | LineEnd = Line->Last; |
| 288 | skipComments(); |
| 289 | if (Start.isInvalid() || References.empty()) |
| 290 | // After the first file level comment, consider line comments to be part |
| 291 | // of the import that immediately follows them by using the previously |
| 292 | // set Start. |
| 293 | Start = Line->First->Tok.getLocation(); |
| 294 | if (!Current) { |
| 295 | // Only comments on this line. Could be the first non-import line. |
| 296 | FirstNonImportLine = Line; |
| 297 | continue; |
| 298 | } |
| 299 | JsModuleReference Reference; |
| 300 | Reference.Range.setBegin(Start); |
| 301 | if (!parseModuleReference(Keywords, Reference)) { |
| 302 | if (!FirstNonImportLine) |
| 303 | FirstNonImportLine = Line; // if no comment before. |
| 304 | break; |
| 305 | } |
| 306 | FirstNonImportLine = nullptr; |
| 307 | AnyImportAffected = AnyImportAffected || Line->Affected; |
| 308 | Reference.Range.setEnd(LineEnd->Tok.getEndLoc()); |
| 309 | LLVM_DEBUG({ |
| 310 | llvm::dbgs() << "JsModuleReference: {" |
| 311 | << "is_export: " << Reference.IsExport |
| 312 | << ", cat: " << Reference.Category |
| 313 | << ", url: " << Reference.URL |
| 314 | << ", prefix: " << Reference.Prefix; |
| 315 | for (size_t i = 0; i < Reference.Symbols.size(); ++i) |
| 316 | llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as " |
| 317 | << Reference.Symbols[i].Alias; |
| 318 | llvm::dbgs() << ", text: " << getSourceText(Reference.Range); |
| 319 | llvm::dbgs() << "}\n"; |
| 320 | }); |
| 321 | References.push_back(Reference); |
| 322 | Start = SourceLocation(); |
| 323 | } |
| 324 | // Sort imports if any import line was affected. |
| 325 | if (!AnyImportAffected) |
| 326 | References.clear(); |
| 327 | return std::make_pair(References, FirstNonImportLine); |
| 328 | } |
| 329 | |
| 330 | // Parses a JavaScript/ECMAScript 6 module reference. |
| 331 | // See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules |
| 332 | // for grammar EBNF (production ModuleItem). |
| 333 | bool parseModuleReference(const AdditionalKeywords &Keywords, |
| 334 | JsModuleReference &Reference) { |
| 335 | if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export)) |
| 336 | return false; |
| 337 | Reference.IsExport = Current->is(tok::kw_export); |
| 338 | |
| 339 | nextToken(); |
| 340 | if (Current->isStringLiteral() && !Reference.IsExport) { |
| 341 | // "import 'side-effect';" |
| 342 | Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT; |
| 343 | Reference.URL = |
| 344 | Current->TokenText.substr(1, Current->TokenText.size() - 2); |
| 345 | return true; |
| 346 | } |
| 347 | |
| 348 | if (!parseModuleBindings(Keywords, Reference)) |
| 349 | return false; |
| 350 | |
| 351 | if (Current->is(Keywords.kw_from)) { |
| 352 | // imports have a 'from' clause, exports might not. |
| 353 | nextToken(); |
| 354 | if (!Current->isStringLiteral()) |
| 355 | return false; |
| 356 | // URL = TokenText without the quotes. |
| 357 | Reference.URL = |
| 358 | Current->TokenText.substr(1, Current->TokenText.size() - 2); |
| 359 | if (Reference.URL.startswith("..")) |
| 360 | Reference.Category = |
| 361 | JsModuleReference::ReferenceCategory::RELATIVE_PARENT; |
| 362 | else if (Reference.URL.startswith(".")) |
| 363 | Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE; |
| 364 | else |
| 365 | Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE; |
| 366 | } else { |
| 367 | // w/o URL groups with "empty". |
| 368 | Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE; |
| 369 | } |
| 370 | return true; |
| 371 | } |
| 372 | |
| 373 | bool parseModuleBindings(const AdditionalKeywords &Keywords, |
| 374 | JsModuleReference &Reference) { |
| 375 | if (parseStarBinding(Keywords, Reference)) |
| 376 | return true; |
| 377 | return parseNamedBindings(Keywords, Reference); |
| 378 | } |
| 379 | |
| 380 | bool parseStarBinding(const AdditionalKeywords &Keywords, |
| 381 | JsModuleReference &Reference) { |
| 382 | // * as prefix from '...'; |
| 383 | if (Current->isNot(tok::star)) |
| 384 | return false; |
| 385 | nextToken(); |
| 386 | if (Current->isNot(Keywords.kw_as)) |
| 387 | return false; |
| 388 | nextToken(); |
| 389 | if (Current->isNot(tok::identifier)) |
| 390 | return false; |
| 391 | Reference.Prefix = Current->TokenText; |
| 392 | nextToken(); |
| 393 | return true; |
| 394 | } |
| 395 | |
| 396 | bool parseNamedBindings(const AdditionalKeywords &Keywords, |
| 397 | JsModuleReference &Reference) { |
| 398 | if (Current->is(tok::identifier)) { |
| 399 | nextToken(); |
| 400 | if (Current->is(Keywords.kw_from)) |
| 401 | return true; |
| 402 | if (Current->isNot(tok::comma)) |
| 403 | return false; |
| 404 | nextToken(); // eat comma. |
| 405 | } |
| 406 | if (Current->isNot(tok::l_brace)) |
| 407 | return false; |
| 408 | |
| 409 | // {sym as alias, sym2 as ...} from '...'; |
| 410 | while (Current->isNot(tok::r_brace)) { |
| 411 | nextToken(); |
| 412 | if (Current->is(tok::r_brace)) |
| 413 | break; |
| 414 | if (!Current->isOneOf(tok::identifier, tok::kw_default)) |
| 415 | return false; |
| 416 | |
| 417 | JsImportedSymbol Symbol; |
| 418 | Symbol.Symbol = Current->TokenText; |
| 419 | // Make sure to include any preceding comments. |
| 420 | Symbol.Range.setBegin( |
| 421 | Current->getPreviousNonComment()->Next->WhitespaceRange.getBegin()); |
| 422 | nextToken(); |
| 423 | |
| 424 | if (Current->is(Keywords.kw_as)) { |
| 425 | nextToken(); |
| 426 | if (!Current->isOneOf(tok::identifier, tok::kw_default)) |
| 427 | return false; |
| 428 | Symbol.Alias = Current->TokenText; |
| 429 | nextToken(); |
| 430 | } |
| 431 | Symbol.Range.setEnd(Current->Tok.getLocation()); |
| 432 | Reference.Symbols.push_back(Symbol); |
| 433 | |
| 434 | if (!Current->isOneOf(tok::r_brace, tok::comma)) |
| 435 | return false; |
| 436 | } |
| 437 | nextToken(); // consume r_brace |
| 438 | return true; |
| 439 | } |
| 440 | }; |
| 441 | |
| 442 | tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, |
| 443 | StringRef Code, |
| 444 | ArrayRef<tooling::Range> Ranges, |
| 445 | StringRef FileName) { |
| 446 | // FIXME: Cursor support. |
| 447 | return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style) |
| 448 | .process() |
| 449 | .first; |
| 450 | } |
| 451 | |
| 452 | } // end namespace format |
| 453 | } // end namespace clang |
| 454 | |