Punycode.h 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. //===--- Punycode.h - UTF-8 to Punycode transcoding -------------*- C++ -*-===//
  2. //
  3. // This source file is part of the Swift.org open source project
  4. //
  5. // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
  6. // Licensed under Apache License v2.0 with Runtime Library Exception
  7. //
  8. // See https://swift.org/LICENSE.txt for license information
  9. // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
  10. //
  11. //===----------------------------------------------------------------------===//
  12. //
  13. // These functions implement a variant of the Punycode algorithm from RFC3492,
  14. // originally designed for encoding international domain names, for the purpose
  15. // of encoding Swift identifiers into mangled symbol names. This version differs
  16. // from RFC3492 in the following respects:
  17. // - '_' is used as the encoding delimiter instead of '-'.
  18. // - Encoding digits are represented using [a-zA-J] instead of [a-z0-9], because
  19. // symbol names are case-sensitive, and Swift mangled identifiers cannot begin
  20. // with a digit.
  21. // - Optionally, non-symbol ASCII characters (characters except [$_a-zA-Z0-9])
  22. // are mapped to the code range 0xD800 - 0xD880 and are also encoded like
  23. // non-ASCII unicode characters.
  24. //
  25. //===----------------------------------------------------------------------===//
  26. #ifndef SWIFT_DEMANGLING_PUNYCODE_H
  27. #define SWIFT_DEMANGLING_PUNYCODE_H
  28. #include "StringRef.h"
  29. #include <vector>
  30. #include <cstdint>
  31. namespace swift {
  32. namespace Punycode {
  33. using llvm::StringRef;
  34. /// Encodes a sequence of code points into Punycode.
  35. ///
  36. /// Returns false if input contains surrogate code points.
  37. bool encodePunycode(const std::vector<uint32_t> &InputCodePoints,
  38. std::string &OutPunycode);
  39. /// Decodes a Punycode string into a sequence of Unicode scalars.
  40. ///
  41. /// Returns false if decoding failed.
  42. bool decodePunycode(StringRef InputPunycode,
  43. std::vector<uint32_t> &OutCodePoints);
  44. /// Encodes an UTF8 string into Punycode.
  45. ///
  46. /// If \p mapNonSymbolChars is true, non-symbol ASCII characters (characters
  47. /// except [$_a-zA-Z0-9]) are also encoded like non-ASCII unicode characters.
  48. /// Returns false if \p InputUTF8 contains surrogate code points.
  49. bool encodePunycodeUTF8(StringRef InputUTF8, std::string &OutPunycode,
  50. bool mapNonSymbolChars = false);
  51. bool decodePunycodeUTF8(StringRef InputPunycode, std::string &OutUTF8);
  52. } // end namespace Punycode
  53. } // end namespace swift
  54. #endif // SWIFT_DEMANGLING_PUNYCODE_H