diff --git a/bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala b/bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala new file mode 100644 index 0000000..082a1ac --- /dev/null +++ b/bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala @@ -0,0 +1,44 @@ +package org.typelevel.ci +package bench + +import org.scalacheck._ +import org.typelevel.ci.testing.arbitraries._ +import cats._ +import org.openjdk.jmh.annotations._ +import java.util.concurrent.TimeUnit + +@State(Scope.Thread) +@BenchmarkMode(Array(Mode.Throughput, Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +class CaseFoldedStringBench { + + var currentSeed: Long = Long.MinValue + + def nextSeed: Long = { + val seed = currentSeed + currentSeed += 1L + seed + } + + def nextString: String = + Arbitrary.arbitrary[String].apply(Gen.Parameters.default, rng.Seed(nextSeed)).getOrElse(throw new AssertionError("Failed to generate String.")) + + def nextListOfString: List[String] = + Gen.listOf(Arbitrary.arbitrary[String])(Gen.Parameters.default, rng.Seed(nextSeed)).getOrElse(throw new AssertionError("Failed to generate String.")) + + @Benchmark + def caseFoldedStringHash: Int = + CaseFoldedString(nextString).hashCode + + @Benchmark + def caseFoldedStringFoldMap: CaseFoldedString = + Foldable[List].foldMap(nextListOfString)(CaseFoldedString.apply) + + @Benchmark + def stringHash: Int = + nextString.hashCode + + @Benchmark + def stringFoldMap: String = + Foldable[List].foldMap(nextListOfString)(identity) +} diff --git a/build.sbt b/build.sbt index a3293f0..0f7204b 100644 --- a/build.sbt +++ b/build.sbt @@ -71,9 +71,12 @@ lazy val bench = project .enablePlugins(NoPublishPlugin) .enablePlugins(JmhPlugin) .settings( - name := "case-insensitive-bench" + name := "case-insensitive-bench", + libraryDependencies ++= List( + "org.scalacheck" %% "scalacheck" % scalacheckV + ) ) - .dependsOn(core.jvm) + .dependsOn(core.jvm, testing.jvm) lazy val docs = project .in(file("site")) diff --git a/core/src/main/scala/org/typelevel/ci/CIString.scala b/core/src/main/scala/org/typelevel/ci/CIString.scala index 13710be..af4ff7a 100644 --- a/core/src/main/scala/org/typelevel/ci/CIString.scala +++ b/core/src/main/scala/org/typelevel/ci/CIString.scala @@ -24,52 +24,46 @@ import scala.math.Ordered /** A case-insensitive String. * - * Two CI strings are equal if and only if they are the same length, and each corresponding - * character is equal after calling either `toUpper` or `toLower`. + * Comparisions are based on the case folded representation of the `String` + * as defined by the Unicode standard. See [[CaseFoldedString]] for a full + * discussion on those rules. * - * Ordering is based on a string comparison after folding each character to uppercase and then back - * to lowercase. - * - * All comparisons are insensitive to locales. + * @note This class differs from [[CaseFoldedString]] in that it keeps a + * reference to original input `String` in whatever form it was + * given. This makes [[CIString]] useful if you which to perform case + * insensitive operations on a `String`, but then recover the original, + * unaltered form. If you do not care about the original input form, + * and just want a single case insensitive `String` value, then + * [[CaseFoldedString]] is more efficient and you should consider using + * that directly. * * @param toString * The original value the CI String was constructed with. */ -final class CIString private (override val toString: String) +final class CIString private (override val toString: String, val asCaseFoldedString: CaseFoldedString) extends Ordered[CIString] with Serializable { + + @deprecated(message = "Please provide a CaseFoldedString directly.", since = "1.3.0") + private def this(toString: String) = { + this(toString, CaseFoldedString(toString)) + } + override def equals(that: Any): Boolean = that match { case that: CIString => - this.toString.equalsIgnoreCase(that.toString) + // Note java.lang.String.equalsIgnoreCase _does not_ handle all title + // case unicode characters, so we can't use it here. See the tests for + // an example. + this.asCaseFoldedString == that.asCaseFoldedString case _ => false } - @transient private[this] var hash = 0 - override def hashCode(): Int = { - if (hash == 0) - hash = calculateHash - hash - } - - private[this] def calculateHash: Int = { - var h = 17 - var i = 0 - val len = toString.length - while (i < len) { - // Strings are equal igoring case if either their uppercase or lowercase - // forms are equal. Equality of one does not imply the other, so we need - // to go in both directions. A character is not guaranteed to make this - // round trip, but it doesn't matter as long as all equal characters - // hash the same. - h = h * 31 + toString.charAt(i).toUpper.toLower - i += 1 - } - h - } + override def hashCode(): Int = + asCaseFoldedString.hashCode override def compare(that: CIString): Int = - this.toString.compareToIgnoreCase(that.toString) + Ordering[CaseFoldedString].compare(asCaseFoldedString, that.asCaseFoldedString) def transform(f: String => String): CIString = CIString(f(toString)) @@ -87,7 +81,15 @@ final class CIString private (override val toString: String) @suppressUnusedImportWarningForCompat object CIString { - def apply(value: String): CIString = new CIString(value) + + def apply(value: String, useTurkicFolding: Boolean): CIString = + new CIString(value, CaseFoldedString(value, useTurkicFolding)) + + def apply(value: String): CIString = + apply(value, false) + + def fromCaseFoldedString(value: CaseFoldedString): CIString = + new CIString(value.toString, value) val empty = CIString("") diff --git a/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala new file mode 100644 index 0000000..00a980f --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala @@ -0,0 +1,162 @@ +package org.typelevel.ci + +import cats._ +import cats.kernel.LowerBounded +import org.typelevel.ci.compat._ +import scala.annotation.tailrec + +/** A case folded `String`. This is a `String` which has been converted into a + * state which is suitable for case insensitive matching under the Unicode + * standard. + * + * This type differs from [[CIString]] in that it does ''not'' retain the + * original input `String` value. That is, this is a destructive + * transformation. You should use [[CaseFoldedString]] instead of + * [[CIString]] when you only want the case insensitive `String` and you + * never want to return the `String` back into the input value. In such cases + * [[CaseFoldedString]] will be more efficient than [[CIString]] as it only + * has to keep around a single `String` in memory. + * + * Case insensitive `String` values under Unicode are not always intuitive, + * especially on the JVM. There are three character cases to consider, lower + * case, upper case, and title case, and not all Unicode codePoints have all + * 3, some only have 2, some only 1. For some codePoints, the JRE standard + * operations don't always work as you'd expect. + * + * {{{ + * scala> val codePoint: Int = 8093 + * val codePoint: Int = 8093 + * + * scala> new String(Character.toChars(codePoint)) + * val res0: String = ᾝ + * + * scala> res0.toUpperCase + * val res1: String = ἭΙ + * + * scala> res0.toUpperCase.toLowerCase == res0.toLowerCase + * val res2: Boolean = false + * + * scala> Character.getName(res0.head) + * val res3: String = GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + * + * scala> res0.toUpperCase.toLowerCase.equalsIgnoreCase(res0.toLowerCase) + * val res4: Boolean = false + * }}} + * + * In this example, given the Unicode character \u1f9d, converting it to + * upper case, then to lower case, is not equal under normal String + * equality. `String.equalsIgnoreCase` also does not work correctly by the + * Unicode standard. + * + * Making matters more complicated, for certain Turkic languages, the case + * folding rules change. See the Unicode standard for a full discussion of + * the topic. + * + * @note For most `String` values the `toString` form of this is lower case + * (when the given character has more than one case), but this is not + * always the case. Certain Unicode scripts have exceptions to this and + * will be case folded into upper case. If you want/need an only lower + * case `String`, you should call `.toString.toLowerCase`. + * + * @see [[https://www.unicode.org/versions/Unicode14.0.0/ch05.pdf#G21790]] + */ +final case class CaseFoldedString private (override val toString: String) extends AnyVal { + + def isEmpty: Boolean = toString.isEmpty + + def nonEmpty: Boolean = !isEmpty + + def length: Int = toString.length + + def size: Int = length + + def trim: CaseFoldedString = + CaseFoldedString(toString.trim) + + private final def copy(toString: String): CaseFoldedString = + CaseFoldedString(toString) +} + +object CaseFoldedString { + + /** Create a [[CaseFoldedString]] from a `String`. + * + * @param turkicFoldingRules if `true`, use the case folding rules for + * applicable to some Turkic languages. + */ + def apply(value: String, turkicFoldingRules: Boolean): CaseFoldedString = { + val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) + val foldCodePoint: Int => Array[Int] = + if (turkicFoldingRules) { + CaseFolding.turkicFullCaseFoldedCodePoints + } else { + CaseFolding.fullCaseFoldedCodePoints + } + + @tailrec + def loop(index: Int): String = + if (index >= value.length) { + builder.toString + } else { + val codePoint: Int = value.codePointAt(index) + foldCodePoint(codePoint).foreach(c => builder.appendCodePoint(c)) + val inc: Int = if (codePoint >= 0x10000) 2 else 1 + loop(index + inc) + } + + new CaseFoldedString(loop(0)) + } + + /** Create a [[CaseFoldedString]] from a `String`. + * + * @note This factory method does ''not'' use the Turkic case folding + * rules. For the majority of languages this is the correct method of + * case folding. If you know your `String` is specific to one of the + * Turkic languages which use special case folding rules, you can use + * the secondary factory method to enable case folding under those + * rules. + */ + def apply(value: String): CaseFoldedString = + apply(value, false) + + val empty: CaseFoldedString = + CaseFoldedString("") + + implicit val hashAndOrderForCaseFoldedString: Hash[CaseFoldedString] with Order[CaseFoldedString] = + new Hash[CaseFoldedString] with Order[CaseFoldedString] { + override def hash(x: CaseFoldedString): Int = + x.hashCode + + override def compare(x: CaseFoldedString, y: CaseFoldedString): Int = + x.toString.compare(y.toString) + } + + implicit val orderingForCaseFoldedString: Ordering[CaseFoldedString] = + hashAndOrderForCaseFoldedString.toOrdering + + implicit val showForCaseFoldedString: Show[CaseFoldedString] = + Show.fromToString + + implicit val lowerBoundForCaseFoldedString: LowerBounded[CaseFoldedString] = + new LowerBounded[CaseFoldedString] { + override val partialOrder: PartialOrder[CaseFoldedString] = + hashAndOrderForCaseFoldedString + + override val minBound: CaseFoldedString = + empty + } + + implicit val monoidForCaseFoldedString: Monoid[CaseFoldedString] = + new Monoid[CaseFoldedString] { + override val empty: CaseFoldedString = CaseFoldedString.empty + + override def combine(x: CaseFoldedString, y: CaseFoldedString): CaseFoldedString = + new CaseFoldedString(x.toString + y.toString) + + override def combineAll(xs: IterableOnce[CaseFoldedString]): CaseFoldedString = { + val sb: StringBuilder = new StringBuilder + xs.iterator.foreach(cfs => sb.append(cfs.toString)) + new CaseFoldedString(sb.toString) + } + } +} diff --git a/core/src/main/scala/org/typelevel/ci/CaseFolding.scala b/core/src/main/scala/org/typelevel/ci/CaseFolding.scala new file mode 100644 index 0000000..00a71de --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CaseFolding.scala @@ -0,0 +1,1645 @@ +package org.typelevel.ci + +/** These are lookup tables for case folding. There are several different case + * folding algorithms which can be employed with different trade offs. + * + * @note Some case folding, in particular full case folding, can yield more + * codePoints than the original value. That is, it can ''increase'' the + * size of `String` values once folded. + * + * @see [[https://www.unicode.org/versions/Unicode14.0.0/ch05.pdf#G21790 Caseless Matching]] + * @see [[https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt Unicode Case Folding Tables]] + */ +private[ci] object CaseFolding { + + // Note to library maintainers: These functions are intentionally written + // with int based case matching so that they will compile to a fast + // lookupswitch. Please keep this in mind when making changes. + // + // From `javap -v CaseFolding\$.class` on Scala 2.13. + // + // {{{ + // 3: lookupswitch { // 1530 + // 65: 12252 + // 66: 12263 + // }}} + + /** This function transforms a Unicode codePoint into it's full case folded + * variant, with the rule changes which are applicable to ''some'' Turkic + * languages. + * + * For other languages these rules should not be applied. + */ + def turkicFullCaseFoldedCodePoints(codePoint: Int): Array[Int] = + codePoint match { + case 0x0049 => Array(0x0131) // LATIN CAPITAL LETTER I + case 0x0130 => Array(0x0069) // LATIN CAPITAL LETTER I WITH DOT ABOVE + case _ => + fullCaseFoldedCodePoints(codePoint) + } + + /** This function transforms a Unicode codePoint into it's simple case folded + * variant, with the rule changes which are applicable to ''some'' Turkic + * languages. + * + * For other languages these rules should not be applied. + */ + def turkicSimpleCaseFoldedCodePoints(codePoint: Int): Int = + codePoint match { + case 0x0049 => 0x0131 // LATIN CAPITAL LETTER I + case 0x0130 => 0x0069 // LATIN CAPITAL LETTER I WITH DOT ABOVE + case _ => + simpleCaseFoldedCodePoints(codePoint) + } + + /** This function transforms a Unicode codePoint into it's full case folded + * variant using the default rules. + * + * It is equivalent to the "C + F" rules from `CaseFolding.txt`. + */ + def fullCaseFoldedCodePoints(codePoint: Int): Array[Int] = + codePoint match { + case 0x00DF => Array(0x0073, 0x0073) // LATIN SMALL LETTER SHARP S + case 0x0130 => Array(0x0069, 0x0307) // LATIN CAPITAL LETTER I WITH DOT ABOVE + case 0x0149 => Array(0x02BC, 0x006E) // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE + case 0x01F0 => Array(0x006A, 0x030C) // LATIN SMALL LETTER J WITH CARON + case 0x0390 => Array(0x03B9, 0x0308, 0x0301) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + case 0x03B0 => Array(0x03C5, 0x0308, 0x0301) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + case 0x0587 => Array(0x0565, 0x0582) // ARMENIAN SMALL LIGATURE ECH YIWN + case 0x1E96 => Array(0x0068, 0x0331) // LATIN SMALL LETTER H WITH LINE BELOW + case 0x1E97 => Array(0x0074, 0x0308) // LATIN SMALL LETTER T WITH DIAERESIS + case 0x1E98 => Array(0x0077, 0x030A) // LATIN SMALL LETTER W WITH RING ABOVE + case 0x1E99 => Array(0x0079, 0x030A) // LATIN SMALL LETTER Y WITH RING ABOVE + case 0x1E9A => Array(0x0061, 0x02BE) // LATIN SMALL LETTER A WITH RIGHT HALF RING + case 0x1E9E => Array(0x0073, 0x0073) // LATIN CAPITAL LETTER SHARP S + case 0x1F50 => Array(0x03C5, 0x0313) // GREEK SMALL LETTER UPSILON WITH PSILI + case 0x1F52 => Array(0x03C5, 0x0313, 0x0300) // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA + case 0x1F54 => Array(0x03C5, 0x0313, 0x0301) // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA + case 0x1F56 => Array(0x03C5, 0x0313, 0x0342) // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI + case 0x1F80 => Array(0x1F00, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI + case 0x1F81 => Array(0x1F01, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI + case 0x1F82 => Array(0x1F02, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1F83 => Array(0x1F03, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1F84 => Array(0x1F04, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1F85 => Array(0x1F05, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1F86 => Array(0x1F06, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F87 => Array(0x1F07, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F88 => Array(0x1F00, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + case 0x1F89 => Array(0x1F01, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + case 0x1F8A => Array(0x1F02, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1F8B => Array(0x1F03, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1F8C => Array(0x1F04, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1F8D => Array(0x1F05, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1F8E => Array(0x1F06, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F8F => Array(0x1F07, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F90 => Array(0x1F20, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI + case 0x1F91 => Array(0x1F21, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI + case 0x1F92 => Array(0x1F22, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1F93 => Array(0x1F23, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1F94 => Array(0x1F24, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1F95 => Array(0x1F25, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1F96 => Array(0x1F26, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F97 => Array(0x1F27, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F98 => Array(0x1F20, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + case 0x1F99 => Array(0x1F21, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + case 0x1F9A => Array(0x1F22, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1F9B => Array(0x1F23, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1F9C => Array(0x1F24, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1F9D => Array(0x1F25, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1F9E => Array(0x1F26, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F9F => Array(0x1F27, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FA0 => Array(0x1F60, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI + case 0x1FA1 => Array(0x1F61, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI + case 0x1FA2 => Array(0x1F62, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1FA3 => Array(0x1F63, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1FA4 => Array(0x1F64, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1FA5 => Array(0x1F65, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1FA6 => Array(0x1F66, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1FA7 => Array(0x1F67, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1FA8 => Array(0x1F60, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + case 0x1FA9 => Array(0x1F61, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + case 0x1FAA => Array(0x1F62, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1FAB => Array(0x1F63, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1FAC => Array(0x1F64, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1FAD => Array(0x1F65, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1FAE => Array(0x1F66, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FAF => Array(0x1F67, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FB2 => Array(0x1F70, 0x03B9) // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI + case 0x1FB3 => Array(0x03B1, 0x03B9) // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI + case 0x1FB4 => Array(0x03AC, 0x03B9) // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI + case 0x1FB6 => Array(0x03B1, 0x0342) // GREEK SMALL LETTER ALPHA WITH PERISPOMENI + case 0x1FB7 => Array(0x03B1, 0x0342, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1FBC => Array(0x03B1, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + case 0x1FC2 => Array(0x1F74, 0x03B9) // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI + case 0x1FC3 => Array(0x03B7, 0x03B9) // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI + case 0x1FC4 => Array(0x03AE, 0x03B9) // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI + case 0x1FC6 => Array(0x03B7, 0x0342) // GREEK SMALL LETTER ETA WITH PERISPOMENI + case 0x1FC7 => Array(0x03B7, 0x0342, 0x03B9) // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1FCC => Array(0x03B7, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + case 0x1FD2 => Array(0x03B9, 0x0308, 0x0300) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA + case 0x1FD3 => Array(0x03B9, 0x0308, 0x0301) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA + case 0x1FD6 => Array(0x03B9, 0x0342) // GREEK SMALL LETTER IOTA WITH PERISPOMENI + case 0x1FD7 => Array(0x03B9, 0x0308, 0x0342) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI + case 0x1FE2 => Array(0x03C5, 0x0308, 0x0300) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA + case 0x1FE3 => Array(0x03C5, 0x0308, 0x0301) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA + case 0x1FE4 => Array(0x03C1, 0x0313) // GREEK SMALL LETTER RHO WITH PSILI + case 0x1FE6 => Array(0x03C5, 0x0342) // GREEK SMALL LETTER UPSILON WITH PERISPOMENI + case 0x1FE7 => Array(0x03C5, 0x0308, 0x0342) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI + case 0x1FF2 => Array(0x1F7C, 0x03B9) // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI + case 0x1FF3 => Array(0x03C9, 0x03B9) // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI + case 0x1FF4 => Array(0x03CE, 0x03B9) // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI + case 0x1FF6 => Array(0x03C9, 0x0342) // GREEK SMALL LETTER OMEGA WITH PERISPOMENI + case 0x1FF7 => Array(0x03C9, 0x0342, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1FFC => Array(0x03C9, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + case 0xFB00 => Array(0x0066, 0x0066) // LATIN SMALL LIGATURE FF + case 0xFB01 => Array(0x0066, 0x0069) // LATIN SMALL LIGATURE FI + case 0xFB02 => Array(0x0066, 0x006C) // LATIN SMALL LIGATURE FL + case 0xFB03 => Array(0x0066, 0x0066, 0x0069) // LATIN SMALL LIGATURE FFI + case 0xFB04 => Array(0x0066, 0x0066, 0x006C) // LATIN SMALL LIGATURE FFL + case 0xFB05 => Array(0x0073, 0x0074) // LATIN SMALL LIGATURE LONG S T + case 0xFB06 => Array(0x0073, 0x0074) // LATIN SMALL LIGATURE ST + case 0xFB13 => Array(0x0574, 0x0576) // ARMENIAN SMALL LIGATURE MEN NOW + case 0xFB14 => Array(0x0574, 0x0565) // ARMENIAN SMALL LIGATURE MEN ECH + case 0xFB15 => Array(0x0574, 0x056B) // ARMENIAN SMALL LIGATURE MEN INI + case 0xFB16 => Array(0x057E, 0x0576) // ARMENIAN SMALL LIGATURE VEW NOW + case 0xFB17 => Array(0x0574, 0x056D) // ARMENIAN SMALL LIGATURE MEN XEH + case _ => // The full rules defer to the common rules + Array(commonCaseFoldedCodePoints(codePoint)) + } + + /** This function transforms a Unicode codePoint into it's simple case folded + * variant using the default rules. + * + * It is equivalent to the "C + S" rules from `CaseFolding.txt`. + */ + def simpleCaseFoldedCodePoints(codePoint: Int): Int = + codePoint match { + case 0x1E9E => 0x00DF // LATIN CAPITAL LETTER SHARP S + case 0x1F88 => 0x1F80 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + case 0x1F89 => 0x1F81 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + case 0x1F8A => 0x1F82 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1F8B => 0x1F83 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1F8C => 0x1F84 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1F8D => 0x1F85 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1F8E => 0x1F86 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F8F => 0x1F87 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F98 => 0x1F90 // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + case 0x1F99 => 0x1F91 // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + case 0x1F9A => 0x1F92 // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1F9B => 0x1F93 // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1F9C => 0x1F94 // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1F9D => 0x1F95 // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1F9E => 0x1F96 // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F9F => 0x1F97 // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FA8 => 0x1FA0 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + case 0x1FA9 => 0x1FA1 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + case 0x1FAA => 0x1FA2 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1FAB => 0x1FA3 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1FAC => 0x1FA4 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1FAD => 0x1FA5 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1FAE => 0x1FA6 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FAF => 0x1FA7 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FBC => 0x1FB3 // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + case 0x1FCC => 0x1FC3 // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + case 0x1FFC => 0x1FF3 // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + case _ => commonCaseFoldedCodePoints(codePoint) + } + + /** This function transforms a Unicode codePoint into it's common case folded + * form. + * + * This lookup can only be validly used in concert with either the simple + * or full case folding rules (with or without the special cases for some + * Turkic languages). This is why this function is `private`. + */ + private def commonCaseFoldedCodePoints(codePoint: Int): Int = + codePoint match { + case 0x0041 => 0x0061 // LATIN CAPITAL LETTER A + case 0x0042 => 0x0062 // LATIN CAPITAL LETTER B + case 0x0043 => 0x0063 // LATIN CAPITAL LETTER C + case 0x0044 => 0x0064 // LATIN CAPITAL LETTER D + case 0x0045 => 0x0065 // LATIN CAPITAL LETTER E + case 0x0046 => 0x0066 // LATIN CAPITAL LETTER F + case 0x0047 => 0x0067 // LATIN CAPITAL LETTER G + case 0x0048 => 0x0068 // LATIN CAPITAL LETTER H + case 0x0049 => 0x0069 // LATIN CAPITAL LETTER I + case 0x004A => 0x006A // LATIN CAPITAL LETTER J + case 0x004B => 0x006B // LATIN CAPITAL LETTER K + case 0x004C => 0x006C // LATIN CAPITAL LETTER L + case 0x004D => 0x006D // LATIN CAPITAL LETTER M + case 0x004E => 0x006E // LATIN CAPITAL LETTER N + case 0x004F => 0x006F // LATIN CAPITAL LETTER O + case 0x0050 => 0x0070 // LATIN CAPITAL LETTER P + case 0x0051 => 0x0071 // LATIN CAPITAL LETTER Q + case 0x0052 => 0x0072 // LATIN CAPITAL LETTER R + case 0x0053 => 0x0073 // LATIN CAPITAL LETTER S + case 0x0054 => 0x0074 // LATIN CAPITAL LETTER T + case 0x0055 => 0x0075 // LATIN CAPITAL LETTER U + case 0x0056 => 0x0076 // LATIN CAPITAL LETTER V + case 0x0057 => 0x0077 // LATIN CAPITAL LETTER W + case 0x0058 => 0x0078 // LATIN CAPITAL LETTER X + case 0x0059 => 0x0079 // LATIN CAPITAL LETTER Y + case 0x005A => 0x007A // LATIN CAPITAL LETTER Z + case 0x00B5 => 0x03BC // MICRO SIGN + case 0x00C0 => 0x00E0 // LATIN CAPITAL LETTER A WITH GRAVE + case 0x00C1 => 0x00E1 // LATIN CAPITAL LETTER A WITH ACUTE + case 0x00C2 => 0x00E2 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX + case 0x00C3 => 0x00E3 // LATIN CAPITAL LETTER A WITH TILDE + case 0x00C4 => 0x00E4 // LATIN CAPITAL LETTER A WITH DIAERESIS + case 0x00C5 => 0x00E5 // LATIN CAPITAL LETTER A WITH RING ABOVE + case 0x00C6 => 0x00E6 // LATIN CAPITAL LETTER AE + case 0x00C7 => 0x00E7 // LATIN CAPITAL LETTER C WITH CEDILLA + case 0x00C8 => 0x00E8 // LATIN CAPITAL LETTER E WITH GRAVE + case 0x00C9 => 0x00E9 // LATIN CAPITAL LETTER E WITH ACUTE + case 0x00CA => 0x00EA // LATIN CAPITAL LETTER E WITH CIRCUMFLEX + case 0x00CB => 0x00EB // LATIN CAPITAL LETTER E WITH DIAERESIS + case 0x00CC => 0x00EC // LATIN CAPITAL LETTER I WITH GRAVE + case 0x00CD => 0x00ED // LATIN CAPITAL LETTER I WITH ACUTE + case 0x00CE => 0x00EE // LATIN CAPITAL LETTER I WITH CIRCUMFLEX + case 0x00CF => 0x00EF // LATIN CAPITAL LETTER I WITH DIAERESIS + case 0x00D0 => 0x00F0 // LATIN CAPITAL LETTER ETH + case 0x00D1 => 0x00F1 // LATIN CAPITAL LETTER N WITH TILDE + case 0x00D2 => 0x00F2 // LATIN CAPITAL LETTER O WITH GRAVE + case 0x00D3 => 0x00F3 // LATIN CAPITAL LETTER O WITH ACUTE + case 0x00D4 => 0x00F4 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX + case 0x00D5 => 0x00F5 // LATIN CAPITAL LETTER O WITH TILDE + case 0x00D6 => 0x00F6 // LATIN CAPITAL LETTER O WITH DIAERESIS + case 0x00D8 => 0x00F8 // LATIN CAPITAL LETTER O WITH STROKE + case 0x00D9 => 0x00F9 // LATIN CAPITAL LETTER U WITH GRAVE + case 0x00DA => 0x00FA // LATIN CAPITAL LETTER U WITH ACUTE + case 0x00DB => 0x00FB // LATIN CAPITAL LETTER U WITH CIRCUMFLEX + case 0x00DC => 0x00FC // LATIN CAPITAL LETTER U WITH DIAERESIS + case 0x00DD => 0x00FD // LATIN CAPITAL LETTER Y WITH ACUTE + case 0x00DE => 0x00FE // LATIN CAPITAL LETTER THORN + case 0x0100 => 0x0101 // LATIN CAPITAL LETTER A WITH MACRON + case 0x0102 => 0x0103 // LATIN CAPITAL LETTER A WITH BREVE + case 0x0104 => 0x0105 // LATIN CAPITAL LETTER A WITH OGONEK + case 0x0106 => 0x0107 // LATIN CAPITAL LETTER C WITH ACUTE + case 0x0108 => 0x0109 // LATIN CAPITAL LETTER C WITH CIRCUMFLEX + case 0x010A => 0x010B // LATIN CAPITAL LETTER C WITH DOT ABOVE + case 0x010C => 0x010D // LATIN CAPITAL LETTER C WITH CARON + case 0x010E => 0x010F // LATIN CAPITAL LETTER D WITH CARON + case 0x0110 => 0x0111 // LATIN CAPITAL LETTER D WITH STROKE + case 0x0112 => 0x0113 // LATIN CAPITAL LETTER E WITH MACRON + case 0x0114 => 0x0115 // LATIN CAPITAL LETTER E WITH BREVE + case 0x0116 => 0x0117 // LATIN CAPITAL LETTER E WITH DOT ABOVE + case 0x0118 => 0x0119 // LATIN CAPITAL LETTER E WITH OGONEK + case 0x011A => 0x011B // LATIN CAPITAL LETTER E WITH CARON + case 0x011C => 0x011D // LATIN CAPITAL LETTER G WITH CIRCUMFLEX + case 0x011E => 0x011F // LATIN CAPITAL LETTER G WITH BREVE + case 0x0120 => 0x0121 // LATIN CAPITAL LETTER G WITH DOT ABOVE + case 0x0122 => 0x0123 // LATIN CAPITAL LETTER G WITH CEDILLA + case 0x0124 => 0x0125 // LATIN CAPITAL LETTER H WITH CIRCUMFLEX + case 0x0126 => 0x0127 // LATIN CAPITAL LETTER H WITH STROKE + case 0x0128 => 0x0129 // LATIN CAPITAL LETTER I WITH TILDE + case 0x012A => 0x012B // LATIN CAPITAL LETTER I WITH MACRON + case 0x012C => 0x012D // LATIN CAPITAL LETTER I WITH BREVE + case 0x012E => 0x012F // LATIN CAPITAL LETTER I WITH OGONEK + case 0x0132 => 0x0133 // LATIN CAPITAL LIGATURE IJ + case 0x0134 => 0x0135 // LATIN CAPITAL LETTER J WITH CIRCUMFLEX + case 0x0136 => 0x0137 // LATIN CAPITAL LETTER K WITH CEDILLA + case 0x0139 => 0x013A // LATIN CAPITAL LETTER L WITH ACUTE + case 0x013B => 0x013C // LATIN CAPITAL LETTER L WITH CEDILLA + case 0x013D => 0x013E // LATIN CAPITAL LETTER L WITH CARON + case 0x013F => 0x0140 // LATIN CAPITAL LETTER L WITH MIDDLE DOT + case 0x0141 => 0x0142 // LATIN CAPITAL LETTER L WITH STROKE + case 0x0143 => 0x0144 // LATIN CAPITAL LETTER N WITH ACUTE + case 0x0145 => 0x0146 // LATIN CAPITAL LETTER N WITH CEDILLA + case 0x0147 => 0x0148 // LATIN CAPITAL LETTER N WITH CARON + case 0x014A => 0x014B // LATIN CAPITAL LETTER ENG + case 0x014C => 0x014D // LATIN CAPITAL LETTER O WITH MACRON + case 0x014E => 0x014F // LATIN CAPITAL LETTER O WITH BREVE + case 0x0150 => 0x0151 // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + case 0x0152 => 0x0153 // LATIN CAPITAL LIGATURE OE + case 0x0154 => 0x0155 // LATIN CAPITAL LETTER R WITH ACUTE + case 0x0156 => 0x0157 // LATIN CAPITAL LETTER R WITH CEDILLA + case 0x0158 => 0x0159 // LATIN CAPITAL LETTER R WITH CARON + case 0x015A => 0x015B // LATIN CAPITAL LETTER S WITH ACUTE + case 0x015C => 0x015D // LATIN CAPITAL LETTER S WITH CIRCUMFLEX + case 0x015E => 0x015F // LATIN CAPITAL LETTER S WITH CEDILLA + case 0x0160 => 0x0161 // LATIN CAPITAL LETTER S WITH CARON + case 0x0162 => 0x0163 // LATIN CAPITAL LETTER T WITH CEDILLA + case 0x0164 => 0x0165 // LATIN CAPITAL LETTER T WITH CARON + case 0x0166 => 0x0167 // LATIN CAPITAL LETTER T WITH STROKE + case 0x0168 => 0x0169 // LATIN CAPITAL LETTER U WITH TILDE + case 0x016A => 0x016B // LATIN CAPITAL LETTER U WITH MACRON + case 0x016C => 0x016D // LATIN CAPITAL LETTER U WITH BREVE + case 0x016E => 0x016F // LATIN CAPITAL LETTER U WITH RING ABOVE + case 0x0170 => 0x0171 // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + case 0x0172 => 0x0173 // LATIN CAPITAL LETTER U WITH OGONEK + case 0x0174 => 0x0175 // LATIN CAPITAL LETTER W WITH CIRCUMFLEX + case 0x0176 => 0x0177 // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + case 0x0178 => 0x00FF // LATIN CAPITAL LETTER Y WITH DIAERESIS + case 0x0179 => 0x017A // LATIN CAPITAL LETTER Z WITH ACUTE + case 0x017B => 0x017C // LATIN CAPITAL LETTER Z WITH DOT ABOVE + case 0x017D => 0x017E // LATIN CAPITAL LETTER Z WITH CARON + case 0x017F => 0x0073 // LATIN SMALL LETTER LONG S + case 0x0181 => 0x0253 // LATIN CAPITAL LETTER B WITH HOOK + case 0x0182 => 0x0183 // LATIN CAPITAL LETTER B WITH TOPBAR + case 0x0184 => 0x0185 // LATIN CAPITAL LETTER TONE SIX + case 0x0186 => 0x0254 // LATIN CAPITAL LETTER OPEN O + case 0x0187 => 0x0188 // LATIN CAPITAL LETTER C WITH HOOK + case 0x0189 => 0x0256 // LATIN CAPITAL LETTER AFRICAN D + case 0x018A => 0x0257 // LATIN CAPITAL LETTER D WITH HOOK + case 0x018B => 0x018C // LATIN CAPITAL LETTER D WITH TOPBAR + case 0x018E => 0x01DD // LATIN CAPITAL LETTER REVERSED E + case 0x018F => 0x0259 // LATIN CAPITAL LETTER SCHWA + case 0x0190 => 0x025B // LATIN CAPITAL LETTER OPEN E + case 0x0191 => 0x0192 // LATIN CAPITAL LETTER F WITH HOOK + case 0x0193 => 0x0260 // LATIN CAPITAL LETTER G WITH HOOK + case 0x0194 => 0x0263 // LATIN CAPITAL LETTER GAMMA + case 0x0196 => 0x0269 // LATIN CAPITAL LETTER IOTA + case 0x0197 => 0x0268 // LATIN CAPITAL LETTER I WITH STROKE + case 0x0198 => 0x0199 // LATIN CAPITAL LETTER K WITH HOOK + case 0x019C => 0x026F // LATIN CAPITAL LETTER TURNED M + case 0x019D => 0x0272 // LATIN CAPITAL LETTER N WITH LEFT HOOK + case 0x019F => 0x0275 // LATIN CAPITAL LETTER O WITH MIDDLE TILDE + case 0x01A0 => 0x01A1 // LATIN CAPITAL LETTER O WITH HORN + case 0x01A2 => 0x01A3 // LATIN CAPITAL LETTER OI + case 0x01A4 => 0x01A5 // LATIN CAPITAL LETTER P WITH HOOK + case 0x01A6 => 0x0280 // LATIN LETTER YR + case 0x01A7 => 0x01A8 // LATIN CAPITAL LETTER TONE TWO + case 0x01A9 => 0x0283 // LATIN CAPITAL LETTER ESH + case 0x01AC => 0x01AD // LATIN CAPITAL LETTER T WITH HOOK + case 0x01AE => 0x0288 // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + case 0x01AF => 0x01B0 // LATIN CAPITAL LETTER U WITH HORN + case 0x01B1 => 0x028A // LATIN CAPITAL LETTER UPSILON + case 0x01B2 => 0x028B // LATIN CAPITAL LETTER V WITH HOOK + case 0x01B3 => 0x01B4 // LATIN CAPITAL LETTER Y WITH HOOK + case 0x01B5 => 0x01B6 // LATIN CAPITAL LETTER Z WITH STROKE + case 0x01B7 => 0x0292 // LATIN CAPITAL LETTER EZH + case 0x01B8 => 0x01B9 // LATIN CAPITAL LETTER EZH REVERSED + case 0x01BC => 0x01BD // LATIN CAPITAL LETTER TONE FIVE + case 0x01C4 => 0x01C6 // LATIN CAPITAL LETTER DZ WITH CARON + case 0x01C5 => 0x01C6 // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + case 0x01C7 => 0x01C9 // LATIN CAPITAL LETTER LJ + case 0x01C8 => 0x01C9 // LATIN CAPITAL LETTER L WITH SMALL LETTER J + case 0x01CA => 0x01CC // LATIN CAPITAL LETTER NJ + case 0x01CB => 0x01CC // LATIN CAPITAL LETTER N WITH SMALL LETTER J + case 0x01CD => 0x01CE // LATIN CAPITAL LETTER A WITH CARON + case 0x01CF => 0x01D0 // LATIN CAPITAL LETTER I WITH CARON + case 0x01D1 => 0x01D2 // LATIN CAPITAL LETTER O WITH CARON + case 0x01D3 => 0x01D4 // LATIN CAPITAL LETTER U WITH CARON + case 0x01D5 => 0x01D6 // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON + case 0x01D7 => 0x01D8 // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE + case 0x01D9 => 0x01DA // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON + case 0x01DB => 0x01DC // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE + case 0x01DE => 0x01DF // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON + case 0x01E0 => 0x01E1 // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON + case 0x01E2 => 0x01E3 // LATIN CAPITAL LETTER AE WITH MACRON + case 0x01E4 => 0x01E5 // LATIN CAPITAL LETTER G WITH STROKE + case 0x01E6 => 0x01E7 // LATIN CAPITAL LETTER G WITH CARON + case 0x01E8 => 0x01E9 // LATIN CAPITAL LETTER K WITH CARON + case 0x01EA => 0x01EB // LATIN CAPITAL LETTER O WITH OGONEK + case 0x01EC => 0x01ED // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON + case 0x01EE => 0x01EF // LATIN CAPITAL LETTER EZH WITH CARON + case 0x01F1 => 0x01F3 // LATIN CAPITAL LETTER DZ + case 0x01F2 => 0x01F3 // LATIN CAPITAL LETTER D WITH SMALL LETTER Z + case 0x01F4 => 0x01F5 // LATIN CAPITAL LETTER G WITH ACUTE + case 0x01F6 => 0x0195 // LATIN CAPITAL LETTER HWAIR + case 0x01F7 => 0x01BF // LATIN CAPITAL LETTER WYNN + case 0x01F8 => 0x01F9 // LATIN CAPITAL LETTER N WITH GRAVE + case 0x01FA => 0x01FB // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE + case 0x01FC => 0x01FD // LATIN CAPITAL LETTER AE WITH ACUTE + case 0x01FE => 0x01FF // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE + case 0x0200 => 0x0201 // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE + case 0x0202 => 0x0203 // LATIN CAPITAL LETTER A WITH INVERTED BREVE + case 0x0204 => 0x0205 // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE + case 0x0206 => 0x0207 // LATIN CAPITAL LETTER E WITH INVERTED BREVE + case 0x0208 => 0x0209 // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE + case 0x020A => 0x020B // LATIN CAPITAL LETTER I WITH INVERTED BREVE + case 0x020C => 0x020D // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE + case 0x020E => 0x020F // LATIN CAPITAL LETTER O WITH INVERTED BREVE + case 0x0210 => 0x0211 // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE + case 0x0212 => 0x0213 // LATIN CAPITAL LETTER R WITH INVERTED BREVE + case 0x0214 => 0x0215 // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE + case 0x0216 => 0x0217 // LATIN CAPITAL LETTER U WITH INVERTED BREVE + case 0x0218 => 0x0219 // LATIN CAPITAL LETTER S WITH COMMA BELOW + case 0x021A => 0x021B // LATIN CAPITAL LETTER T WITH COMMA BELOW + case 0x021C => 0x021D // LATIN CAPITAL LETTER YOGH + case 0x021E => 0x021F // LATIN CAPITAL LETTER H WITH CARON + case 0x0220 => 0x019E // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG + case 0x0222 => 0x0223 // LATIN CAPITAL LETTER OU + case 0x0224 => 0x0225 // LATIN CAPITAL LETTER Z WITH HOOK + case 0x0226 => 0x0227 // LATIN CAPITAL LETTER A WITH DOT ABOVE + case 0x0228 => 0x0229 // LATIN CAPITAL LETTER E WITH CEDILLA + case 0x022A => 0x022B // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON + case 0x022C => 0x022D // LATIN CAPITAL LETTER O WITH TILDE AND MACRON + case 0x022E => 0x022F // LATIN CAPITAL LETTER O WITH DOT ABOVE + case 0x0230 => 0x0231 // LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON + case 0x0232 => 0x0233 // LATIN CAPITAL LETTER Y WITH MACRON + case 0x023A => 0x2C65 // LATIN CAPITAL LETTER A WITH STROKE + case 0x023B => 0x023C // LATIN CAPITAL LETTER C WITH STROKE + case 0x023D => 0x019A // LATIN CAPITAL LETTER L WITH BAR + case 0x023E => 0x2C66 // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE + case 0x0241 => 0x0242 // LATIN CAPITAL LETTER GLOTTAL STOP + case 0x0243 => 0x0180 // LATIN CAPITAL LETTER B WITH STROKE + case 0x0244 => 0x0289 // LATIN CAPITAL LETTER U BAR + case 0x0245 => 0x028C // LATIN CAPITAL LETTER TURNED V + case 0x0246 => 0x0247 // LATIN CAPITAL LETTER E WITH STROKE + case 0x0248 => 0x0249 // LATIN CAPITAL LETTER J WITH STROKE + case 0x024A => 0x024B // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL + case 0x024C => 0x024D // LATIN CAPITAL LETTER R WITH STROKE + case 0x024E => 0x024F // LATIN CAPITAL LETTER Y WITH STROKE + case 0x0345 => 0x03B9 // COMBINING GREEK YPOGEGRAMMENI + case 0x0370 => 0x0371 // GREEK CAPITAL LETTER HETA + case 0x0372 => 0x0373 // GREEK CAPITAL LETTER ARCHAIC SAMPI + case 0x0376 => 0x0377 // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA + case 0x037F => 0x03F3 // GREEK CAPITAL LETTER YOT + case 0x0386 => 0x03AC // GREEK CAPITAL LETTER ALPHA WITH TONOS + case 0x0388 => 0x03AD // GREEK CAPITAL LETTER EPSILON WITH TONOS + case 0x0389 => 0x03AE // GREEK CAPITAL LETTER ETA WITH TONOS + case 0x038A => 0x03AF // GREEK CAPITAL LETTER IOTA WITH TONOS + case 0x038C => 0x03CC // GREEK CAPITAL LETTER OMICRON WITH TONOS + case 0x038E => 0x03CD // GREEK CAPITAL LETTER UPSILON WITH TONOS + case 0x038F => 0x03CE // GREEK CAPITAL LETTER OMEGA WITH TONOS + case 0x0391 => 0x03B1 // GREEK CAPITAL LETTER ALPHA + case 0x0392 => 0x03B2 // GREEK CAPITAL LETTER BETA + case 0x0393 => 0x03B3 // GREEK CAPITAL LETTER GAMMA + case 0x0394 => 0x03B4 // GREEK CAPITAL LETTER DELTA + case 0x0395 => 0x03B5 // GREEK CAPITAL LETTER EPSILON + case 0x0396 => 0x03B6 // GREEK CAPITAL LETTER ZETA + case 0x0397 => 0x03B7 // GREEK CAPITAL LETTER ETA + case 0x0398 => 0x03B8 // GREEK CAPITAL LETTER THETA + case 0x0399 => 0x03B9 // GREEK CAPITAL LETTER IOTA + case 0x039A => 0x03BA // GREEK CAPITAL LETTER KAPPA + case 0x039B => 0x03BB // GREEK CAPITAL LETTER LAMDA + case 0x039C => 0x03BC // GREEK CAPITAL LETTER MU + case 0x039D => 0x03BD // GREEK CAPITAL LETTER NU + case 0x039E => 0x03BE // GREEK CAPITAL LETTER XI + case 0x039F => 0x03BF // GREEK CAPITAL LETTER OMICRON + case 0x03A0 => 0x03C0 // GREEK CAPITAL LETTER PI + case 0x03A1 => 0x03C1 // GREEK CAPITAL LETTER RHO + case 0x03A3 => 0x03C3 // GREEK CAPITAL LETTER SIGMA + case 0x03A4 => 0x03C4 // GREEK CAPITAL LETTER TAU + case 0x03A5 => 0x03C5 // GREEK CAPITAL LETTER UPSILON + case 0x03A6 => 0x03C6 // GREEK CAPITAL LETTER PHI + case 0x03A7 => 0x03C7 // GREEK CAPITAL LETTER CHI + case 0x03A8 => 0x03C8 // GREEK CAPITAL LETTER PSI + case 0x03A9 => 0x03C9 // GREEK CAPITAL LETTER OMEGA + case 0x03AA => 0x03CA // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + case 0x03AB => 0x03CB // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + case 0x03C2 => 0x03C3 // GREEK SMALL LETTER FINAL SIGMA + case 0x03CF => 0x03D7 // GREEK CAPITAL KAI SYMBOL + case 0x03D0 => 0x03B2 // GREEK BETA SYMBOL + case 0x03D1 => 0x03B8 // GREEK THETA SYMBOL + case 0x03D5 => 0x03C6 // GREEK PHI SYMBOL + case 0x03D6 => 0x03C0 // GREEK PI SYMBOL + case 0x03D8 => 0x03D9 // GREEK LETTER ARCHAIC KOPPA + case 0x03DA => 0x03DB // GREEK LETTER STIGMA + case 0x03DC => 0x03DD // GREEK LETTER DIGAMMA + case 0x03DE => 0x03DF // GREEK LETTER KOPPA + case 0x03E0 => 0x03E1 // GREEK LETTER SAMPI + case 0x03E2 => 0x03E3 // COPTIC CAPITAL LETTER SHEI + case 0x03E4 => 0x03E5 // COPTIC CAPITAL LETTER FEI + case 0x03E6 => 0x03E7 // COPTIC CAPITAL LETTER KHEI + case 0x03E8 => 0x03E9 // COPTIC CAPITAL LETTER HORI + case 0x03EA => 0x03EB // COPTIC CAPITAL LETTER GANGIA + case 0x03EC => 0x03ED // COPTIC CAPITAL LETTER SHIMA + case 0x03EE => 0x03EF // COPTIC CAPITAL LETTER DEI + case 0x03F0 => 0x03BA // GREEK KAPPA SYMBOL + case 0x03F1 => 0x03C1 // GREEK RHO SYMBOL + case 0x03F4 => 0x03B8 // GREEK CAPITAL THETA SYMBOL + case 0x03F5 => 0x03B5 // GREEK LUNATE EPSILON SYMBOL + case 0x03F7 => 0x03F8 // GREEK CAPITAL LETTER SHO + case 0x03F9 => 0x03F2 // GREEK CAPITAL LUNATE SIGMA SYMBOL + case 0x03FA => 0x03FB // GREEK CAPITAL LETTER SAN + case 0x03FD => 0x037B // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL + case 0x03FE => 0x037C // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL + case 0x03FF => 0x037D // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL + case 0x0400 => 0x0450 // CYRILLIC CAPITAL LETTER IE WITH GRAVE + case 0x0401 => 0x0451 // CYRILLIC CAPITAL LETTER IO + case 0x0402 => 0x0452 // CYRILLIC CAPITAL LETTER DJE + case 0x0403 => 0x0453 // CYRILLIC CAPITAL LETTER GJE + case 0x0404 => 0x0454 // CYRILLIC CAPITAL LETTER UKRAINIAN IE + case 0x0405 => 0x0455 // CYRILLIC CAPITAL LETTER DZE + case 0x0406 => 0x0456 // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + case 0x0407 => 0x0457 // CYRILLIC CAPITAL LETTER YI + case 0x0408 => 0x0458 // CYRILLIC CAPITAL LETTER JE + case 0x0409 => 0x0459 // CYRILLIC CAPITAL LETTER LJE + case 0x040A => 0x045A // CYRILLIC CAPITAL LETTER NJE + case 0x040B => 0x045B // CYRILLIC CAPITAL LETTER TSHE + case 0x040C => 0x045C // CYRILLIC CAPITAL LETTER KJE + case 0x040D => 0x045D // CYRILLIC CAPITAL LETTER I WITH GRAVE + case 0x040E => 0x045E // CYRILLIC CAPITAL LETTER SHORT U + case 0x040F => 0x045F // CYRILLIC CAPITAL LETTER DZHE + case 0x0410 => 0x0430 // CYRILLIC CAPITAL LETTER A + case 0x0411 => 0x0431 // CYRILLIC CAPITAL LETTER BE + case 0x0412 => 0x0432 // CYRILLIC CAPITAL LETTER VE + case 0x0413 => 0x0433 // CYRILLIC CAPITAL LETTER GHE + case 0x0414 => 0x0434 // CYRILLIC CAPITAL LETTER DE + case 0x0415 => 0x0435 // CYRILLIC CAPITAL LETTER IE + case 0x0416 => 0x0436 // CYRILLIC CAPITAL LETTER ZHE + case 0x0417 => 0x0437 // CYRILLIC CAPITAL LETTER ZE + case 0x0418 => 0x0438 // CYRILLIC CAPITAL LETTER I + case 0x0419 => 0x0439 // CYRILLIC CAPITAL LETTER SHORT I + case 0x041A => 0x043A // CYRILLIC CAPITAL LETTER KA + case 0x041B => 0x043B // CYRILLIC CAPITAL LETTER EL + case 0x041C => 0x043C // CYRILLIC CAPITAL LETTER EM + case 0x041D => 0x043D // CYRILLIC CAPITAL LETTER EN + case 0x041E => 0x043E // CYRILLIC CAPITAL LETTER O + case 0x041F => 0x043F // CYRILLIC CAPITAL LETTER PE + case 0x0420 => 0x0440 // CYRILLIC CAPITAL LETTER ER + case 0x0421 => 0x0441 // CYRILLIC CAPITAL LETTER ES + case 0x0422 => 0x0442 // CYRILLIC CAPITAL LETTER TE + case 0x0423 => 0x0443 // CYRILLIC CAPITAL LETTER U + case 0x0424 => 0x0444 // CYRILLIC CAPITAL LETTER EF + case 0x0425 => 0x0445 // CYRILLIC CAPITAL LETTER HA + case 0x0426 => 0x0446 // CYRILLIC CAPITAL LETTER TSE + case 0x0427 => 0x0447 // CYRILLIC CAPITAL LETTER CHE + case 0x0428 => 0x0448 // CYRILLIC CAPITAL LETTER SHA + case 0x0429 => 0x0449 // CYRILLIC CAPITAL LETTER SHCHA + case 0x042A => 0x044A // CYRILLIC CAPITAL LETTER HARD SIGN + case 0x042B => 0x044B // CYRILLIC CAPITAL LETTER YERU + case 0x042C => 0x044C // CYRILLIC CAPITAL LETTER SOFT SIGN + case 0x042D => 0x044D // CYRILLIC CAPITAL LETTER E + case 0x042E => 0x044E // CYRILLIC CAPITAL LETTER YU + case 0x042F => 0x044F // CYRILLIC CAPITAL LETTER YA + case 0x0460 => 0x0461 // CYRILLIC CAPITAL LETTER OMEGA + case 0x0462 => 0x0463 // CYRILLIC CAPITAL LETTER YAT + case 0x0464 => 0x0465 // CYRILLIC CAPITAL LETTER IOTIFIED E + case 0x0466 => 0x0467 // CYRILLIC CAPITAL LETTER LITTLE YUS + case 0x0468 => 0x0469 // CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS + case 0x046A => 0x046B // CYRILLIC CAPITAL LETTER BIG YUS + case 0x046C => 0x046D // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS + case 0x046E => 0x046F // CYRILLIC CAPITAL LETTER KSI + case 0x0470 => 0x0471 // CYRILLIC CAPITAL LETTER PSI + case 0x0472 => 0x0473 // CYRILLIC CAPITAL LETTER FITA + case 0x0474 => 0x0475 // CYRILLIC CAPITAL LETTER IZHITSA + case 0x0476 => 0x0477 // CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT + case 0x0478 => 0x0479 // CYRILLIC CAPITAL LETTER UK + case 0x047A => 0x047B // CYRILLIC CAPITAL LETTER ROUND OMEGA + case 0x047C => 0x047D // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO + case 0x047E => 0x047F // CYRILLIC CAPITAL LETTER OT + case 0x0480 => 0x0481 // CYRILLIC CAPITAL LETTER KOPPA + case 0x048A => 0x048B // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL + case 0x048C => 0x048D // CYRILLIC CAPITAL LETTER SEMISOFT SIGN + case 0x048E => 0x048F // CYRILLIC CAPITAL LETTER ER WITH TICK + case 0x0490 => 0x0491 // CYRILLIC CAPITAL LETTER GHE WITH UPTURN + case 0x0492 => 0x0493 // CYRILLIC CAPITAL LETTER GHE WITH STROKE + case 0x0494 => 0x0495 // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK + case 0x0496 => 0x0497 // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER + case 0x0498 => 0x0499 // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER + case 0x049A => 0x049B // CYRILLIC CAPITAL LETTER KA WITH DESCENDER + case 0x049C => 0x049D // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE + case 0x049E => 0x049F // CYRILLIC CAPITAL LETTER KA WITH STROKE + case 0x04A0 => 0x04A1 // CYRILLIC CAPITAL LETTER BASHKIR KA + case 0x04A2 => 0x04A3 // CYRILLIC CAPITAL LETTER EN WITH DESCENDER + case 0x04A4 => 0x04A5 // CYRILLIC CAPITAL LIGATURE EN GHE + case 0x04A6 => 0x04A7 // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK + case 0x04A8 => 0x04A9 // CYRILLIC CAPITAL LETTER ABKHASIAN HA + case 0x04AA => 0x04AB // CYRILLIC CAPITAL LETTER ES WITH DESCENDER + case 0x04AC => 0x04AD // CYRILLIC CAPITAL LETTER TE WITH DESCENDER + case 0x04AE => 0x04AF // CYRILLIC CAPITAL LETTER STRAIGHT U + case 0x04B0 => 0x04B1 // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE + case 0x04B2 => 0x04B3 // CYRILLIC CAPITAL LETTER HA WITH DESCENDER + case 0x04B4 => 0x04B5 // CYRILLIC CAPITAL LIGATURE TE TSE + case 0x04B6 => 0x04B7 // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER + case 0x04B8 => 0x04B9 // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE + case 0x04BA => 0x04BB // CYRILLIC CAPITAL LETTER SHHA + case 0x04BC => 0x04BD // CYRILLIC CAPITAL LETTER ABKHASIAN CHE + case 0x04BE => 0x04BF // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER + case 0x04C0 => 0x04CF // CYRILLIC LETTER PALOCHKA + case 0x04C1 => 0x04C2 // CYRILLIC CAPITAL LETTER ZHE WITH BREVE + case 0x04C3 => 0x04C4 // CYRILLIC CAPITAL LETTER KA WITH HOOK + case 0x04C5 => 0x04C6 // CYRILLIC CAPITAL LETTER EL WITH TAIL + case 0x04C7 => 0x04C8 // CYRILLIC CAPITAL LETTER EN WITH HOOK + case 0x04C9 => 0x04CA // CYRILLIC CAPITAL LETTER EN WITH TAIL + case 0x04CB => 0x04CC // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE + case 0x04CD => 0x04CE // CYRILLIC CAPITAL LETTER EM WITH TAIL + case 0x04D0 => 0x04D1 // CYRILLIC CAPITAL LETTER A WITH BREVE + case 0x04D2 => 0x04D3 // CYRILLIC CAPITAL LETTER A WITH DIAERESIS + case 0x04D4 => 0x04D5 // CYRILLIC CAPITAL LIGATURE A IE + case 0x04D6 => 0x04D7 // CYRILLIC CAPITAL LETTER IE WITH BREVE + case 0x04D8 => 0x04D9 // CYRILLIC CAPITAL LETTER SCHWA + case 0x04DA => 0x04DB // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS + case 0x04DC => 0x04DD // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS + case 0x04DE => 0x04DF // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS + case 0x04E0 => 0x04E1 // CYRILLIC CAPITAL LETTER ABKHASIAN DZE + case 0x04E2 => 0x04E3 // CYRILLIC CAPITAL LETTER I WITH MACRON + case 0x04E4 => 0x04E5 // CYRILLIC CAPITAL LETTER I WITH DIAERESIS + case 0x04E6 => 0x04E7 // CYRILLIC CAPITAL LETTER O WITH DIAERESIS + case 0x04E8 => 0x04E9 // CYRILLIC CAPITAL LETTER BARRED O + case 0x04EA => 0x04EB // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS + case 0x04EC => 0x04ED // CYRILLIC CAPITAL LETTER E WITH DIAERESIS + case 0x04EE => 0x04EF // CYRILLIC CAPITAL LETTER U WITH MACRON + case 0x04F0 => 0x04F1 // CYRILLIC CAPITAL LETTER U WITH DIAERESIS + case 0x04F2 => 0x04F3 // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE + case 0x04F4 => 0x04F5 // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS + case 0x04F6 => 0x04F7 // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER + case 0x04F8 => 0x04F9 // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS + case 0x04FA => 0x04FB // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK + case 0x04FC => 0x04FD // CYRILLIC CAPITAL LETTER HA WITH HOOK + case 0x04FE => 0x04FF // CYRILLIC CAPITAL LETTER HA WITH STROKE + case 0x0500 => 0x0501 // CYRILLIC CAPITAL LETTER KOMI DE + case 0x0502 => 0x0503 // CYRILLIC CAPITAL LETTER KOMI DJE + case 0x0504 => 0x0505 // CYRILLIC CAPITAL LETTER KOMI ZJE + case 0x0506 => 0x0507 // CYRILLIC CAPITAL LETTER KOMI DZJE + case 0x0508 => 0x0509 // CYRILLIC CAPITAL LETTER KOMI LJE + case 0x050A => 0x050B // CYRILLIC CAPITAL LETTER KOMI NJE + case 0x050C => 0x050D // CYRILLIC CAPITAL LETTER KOMI SJE + case 0x050E => 0x050F // CYRILLIC CAPITAL LETTER KOMI TJE + case 0x0510 => 0x0511 // CYRILLIC CAPITAL LETTER REVERSED ZE + case 0x0512 => 0x0513 // CYRILLIC CAPITAL LETTER EL WITH HOOK + case 0x0514 => 0x0515 // CYRILLIC CAPITAL LETTER LHA + case 0x0516 => 0x0517 // CYRILLIC CAPITAL LETTER RHA + case 0x0518 => 0x0519 // CYRILLIC CAPITAL LETTER YAE + case 0x051A => 0x051B // CYRILLIC CAPITAL LETTER QA + case 0x051C => 0x051D // CYRILLIC CAPITAL LETTER WE + case 0x051E => 0x051F // CYRILLIC CAPITAL LETTER ALEUT KA + case 0x0520 => 0x0521 // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK + case 0x0522 => 0x0523 // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK + case 0x0524 => 0x0525 // CYRILLIC CAPITAL LETTER PE WITH DESCENDER + case 0x0526 => 0x0527 // CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER + case 0x0528 => 0x0529 // CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK + case 0x052A => 0x052B // CYRILLIC CAPITAL LETTER DZZHE + case 0x052C => 0x052D // CYRILLIC CAPITAL LETTER DCHE + case 0x052E => 0x052F // CYRILLIC CAPITAL LETTER EL WITH DESCENDER + case 0x0531 => 0x0561 // ARMENIAN CAPITAL LETTER AYB + case 0x0532 => 0x0562 // ARMENIAN CAPITAL LETTER BEN + case 0x0533 => 0x0563 // ARMENIAN CAPITAL LETTER GIM + case 0x0534 => 0x0564 // ARMENIAN CAPITAL LETTER DA + case 0x0535 => 0x0565 // ARMENIAN CAPITAL LETTER ECH + case 0x0536 => 0x0566 // ARMENIAN CAPITAL LETTER ZA + case 0x0537 => 0x0567 // ARMENIAN CAPITAL LETTER EH + case 0x0538 => 0x0568 // ARMENIAN CAPITAL LETTER ET + case 0x0539 => 0x0569 // ARMENIAN CAPITAL LETTER TO + case 0x053A => 0x056A // ARMENIAN CAPITAL LETTER ZHE + case 0x053B => 0x056B // ARMENIAN CAPITAL LETTER INI + case 0x053C => 0x056C // ARMENIAN CAPITAL LETTER LIWN + case 0x053D => 0x056D // ARMENIAN CAPITAL LETTER XEH + case 0x053E => 0x056E // ARMENIAN CAPITAL LETTER CA + case 0x053F => 0x056F // ARMENIAN CAPITAL LETTER KEN + case 0x0540 => 0x0570 // ARMENIAN CAPITAL LETTER HO + case 0x0541 => 0x0571 // ARMENIAN CAPITAL LETTER JA + case 0x0542 => 0x0572 // ARMENIAN CAPITAL LETTER GHAD + case 0x0543 => 0x0573 // ARMENIAN CAPITAL LETTER CHEH + case 0x0544 => 0x0574 // ARMENIAN CAPITAL LETTER MEN + case 0x0545 => 0x0575 // ARMENIAN CAPITAL LETTER YI + case 0x0546 => 0x0576 // ARMENIAN CAPITAL LETTER NOW + case 0x0547 => 0x0577 // ARMENIAN CAPITAL LETTER SHA + case 0x0548 => 0x0578 // ARMENIAN CAPITAL LETTER VO + case 0x0549 => 0x0579 // ARMENIAN CAPITAL LETTER CHA + case 0x054A => 0x057A // ARMENIAN CAPITAL LETTER PEH + case 0x054B => 0x057B // ARMENIAN CAPITAL LETTER JHEH + case 0x054C => 0x057C // ARMENIAN CAPITAL LETTER RA + case 0x054D => 0x057D // ARMENIAN CAPITAL LETTER SEH + case 0x054E => 0x057E // ARMENIAN CAPITAL LETTER VEW + case 0x054F => 0x057F // ARMENIAN CAPITAL LETTER TIWN + case 0x0550 => 0x0580 // ARMENIAN CAPITAL LETTER REH + case 0x0551 => 0x0581 // ARMENIAN CAPITAL LETTER CO + case 0x0552 => 0x0582 // ARMENIAN CAPITAL LETTER YIWN + case 0x0553 => 0x0583 // ARMENIAN CAPITAL LETTER PIWR + case 0x0554 => 0x0584 // ARMENIAN CAPITAL LETTER KEH + case 0x0555 => 0x0585 // ARMENIAN CAPITAL LETTER OH + case 0x0556 => 0x0586 // ARMENIAN CAPITAL LETTER FEH + case 0x10A0 => 0x2D00 // GEORGIAN CAPITAL LETTER AN + case 0x10A1 => 0x2D01 // GEORGIAN CAPITAL LETTER BAN + case 0x10A2 => 0x2D02 // GEORGIAN CAPITAL LETTER GAN + case 0x10A3 => 0x2D03 // GEORGIAN CAPITAL LETTER DON + case 0x10A4 => 0x2D04 // GEORGIAN CAPITAL LETTER EN + case 0x10A5 => 0x2D05 // GEORGIAN CAPITAL LETTER VIN + case 0x10A6 => 0x2D06 // GEORGIAN CAPITAL LETTER ZEN + case 0x10A7 => 0x2D07 // GEORGIAN CAPITAL LETTER TAN + case 0x10A8 => 0x2D08 // GEORGIAN CAPITAL LETTER IN + case 0x10A9 => 0x2D09 // GEORGIAN CAPITAL LETTER KAN + case 0x10AA => 0x2D0A // GEORGIAN CAPITAL LETTER LAS + case 0x10AB => 0x2D0B // GEORGIAN CAPITAL LETTER MAN + case 0x10AC => 0x2D0C // GEORGIAN CAPITAL LETTER NAR + case 0x10AD => 0x2D0D // GEORGIAN CAPITAL LETTER ON + case 0x10AE => 0x2D0E // GEORGIAN CAPITAL LETTER PAR + case 0x10AF => 0x2D0F // GEORGIAN CAPITAL LETTER ZHAR + case 0x10B0 => 0x2D10 // GEORGIAN CAPITAL LETTER RAE + case 0x10B1 => 0x2D11 // GEORGIAN CAPITAL LETTER SAN + case 0x10B2 => 0x2D12 // GEORGIAN CAPITAL LETTER TAR + case 0x10B3 => 0x2D13 // GEORGIAN CAPITAL LETTER UN + case 0x10B4 => 0x2D14 // GEORGIAN CAPITAL LETTER PHAR + case 0x10B5 => 0x2D15 // GEORGIAN CAPITAL LETTER KHAR + case 0x10B6 => 0x2D16 // GEORGIAN CAPITAL LETTER GHAN + case 0x10B7 => 0x2D17 // GEORGIAN CAPITAL LETTER QAR + case 0x10B8 => 0x2D18 // GEORGIAN CAPITAL LETTER SHIN + case 0x10B9 => 0x2D19 // GEORGIAN CAPITAL LETTER CHIN + case 0x10BA => 0x2D1A // GEORGIAN CAPITAL LETTER CAN + case 0x10BB => 0x2D1B // GEORGIAN CAPITAL LETTER JIL + case 0x10BC => 0x2D1C // GEORGIAN CAPITAL LETTER CIL + case 0x10BD => 0x2D1D // GEORGIAN CAPITAL LETTER CHAR + case 0x10BE => 0x2D1E // GEORGIAN CAPITAL LETTER XAN + case 0x10BF => 0x2D1F // GEORGIAN CAPITAL LETTER JHAN + case 0x10C0 => 0x2D20 // GEORGIAN CAPITAL LETTER HAE + case 0x10C1 => 0x2D21 // GEORGIAN CAPITAL LETTER HE + case 0x10C2 => 0x2D22 // GEORGIAN CAPITAL LETTER HIE + case 0x10C3 => 0x2D23 // GEORGIAN CAPITAL LETTER WE + case 0x10C4 => 0x2D24 // GEORGIAN CAPITAL LETTER HAR + case 0x10C5 => 0x2D25 // GEORGIAN CAPITAL LETTER HOE + case 0x10C7 => 0x2D27 // GEORGIAN CAPITAL LETTER YN + case 0x10CD => 0x2D2D // GEORGIAN CAPITAL LETTER AEN + case 0x13F8 => 0x13F0 // CHEROKEE SMALL LETTER YE + case 0x13F9 => 0x13F1 // CHEROKEE SMALL LETTER YI + case 0x13FA => 0x13F2 // CHEROKEE SMALL LETTER YO + case 0x13FB => 0x13F3 // CHEROKEE SMALL LETTER YU + case 0x13FC => 0x13F4 // CHEROKEE SMALL LETTER YV + case 0x13FD => 0x13F5 // CHEROKEE SMALL LETTER MV + case 0x1C80 => 0x0432 // CYRILLIC SMALL LETTER ROUNDED VE + case 0x1C81 => 0x0434 // CYRILLIC SMALL LETTER LONG-LEGGED DE + case 0x1C82 => 0x043E // CYRILLIC SMALL LETTER NARROW O + case 0x1C83 => 0x0441 // CYRILLIC SMALL LETTER WIDE ES + case 0x1C84 => 0x0442 // CYRILLIC SMALL LETTER TALL TE + case 0x1C85 => 0x0442 // CYRILLIC SMALL LETTER THREE-LEGGED TE + case 0x1C86 => 0x044A // CYRILLIC SMALL LETTER TALL HARD SIGN + case 0x1C87 => 0x0463 // CYRILLIC SMALL LETTER TALL YAT + case 0x1C88 => 0xA64B // CYRILLIC SMALL LETTER UNBLENDED UK + case 0x1C90 => 0x10D0 // GEORGIAN MTAVRULI CAPITAL LETTER AN + case 0x1C91 => 0x10D1 // GEORGIAN MTAVRULI CAPITAL LETTER BAN + case 0x1C92 => 0x10D2 // GEORGIAN MTAVRULI CAPITAL LETTER GAN + case 0x1C93 => 0x10D3 // GEORGIAN MTAVRULI CAPITAL LETTER DON + case 0x1C94 => 0x10D4 // GEORGIAN MTAVRULI CAPITAL LETTER EN + case 0x1C95 => 0x10D5 // GEORGIAN MTAVRULI CAPITAL LETTER VIN + case 0x1C96 => 0x10D6 // GEORGIAN MTAVRULI CAPITAL LETTER ZEN + case 0x1C97 => 0x10D7 // GEORGIAN MTAVRULI CAPITAL LETTER TAN + case 0x1C98 => 0x10D8 // GEORGIAN MTAVRULI CAPITAL LETTER IN + case 0x1C99 => 0x10D9 // GEORGIAN MTAVRULI CAPITAL LETTER KAN + case 0x1C9A => 0x10DA // GEORGIAN MTAVRULI CAPITAL LETTER LAS + case 0x1C9B => 0x10DB // GEORGIAN MTAVRULI CAPITAL LETTER MAN + case 0x1C9C => 0x10DC // GEORGIAN MTAVRULI CAPITAL LETTER NAR + case 0x1C9D => 0x10DD // GEORGIAN MTAVRULI CAPITAL LETTER ON + case 0x1C9E => 0x10DE // GEORGIAN MTAVRULI CAPITAL LETTER PAR + case 0x1C9F => 0x10DF // GEORGIAN MTAVRULI CAPITAL LETTER ZHAR + case 0x1CA0 => 0x10E0 // GEORGIAN MTAVRULI CAPITAL LETTER RAE + case 0x1CA1 => 0x10E1 // GEORGIAN MTAVRULI CAPITAL LETTER SAN + case 0x1CA2 => 0x10E2 // GEORGIAN MTAVRULI CAPITAL LETTER TAR + case 0x1CA3 => 0x10E3 // GEORGIAN MTAVRULI CAPITAL LETTER UN + case 0x1CA4 => 0x10E4 // GEORGIAN MTAVRULI CAPITAL LETTER PHAR + case 0x1CA5 => 0x10E5 // GEORGIAN MTAVRULI CAPITAL LETTER KHAR + case 0x1CA6 => 0x10E6 // GEORGIAN MTAVRULI CAPITAL LETTER GHAN + case 0x1CA7 => 0x10E7 // GEORGIAN MTAVRULI CAPITAL LETTER QAR + case 0x1CA8 => 0x10E8 // GEORGIAN MTAVRULI CAPITAL LETTER SHIN + case 0x1CA9 => 0x10E9 // GEORGIAN MTAVRULI CAPITAL LETTER CHIN + case 0x1CAA => 0x10EA // GEORGIAN MTAVRULI CAPITAL LETTER CAN + case 0x1CAB => 0x10EB // GEORGIAN MTAVRULI CAPITAL LETTER JIL + case 0x1CAC => 0x10EC // GEORGIAN MTAVRULI CAPITAL LETTER CIL + case 0x1CAD => 0x10ED // GEORGIAN MTAVRULI CAPITAL LETTER CHAR + case 0x1CAE => 0x10EE // GEORGIAN MTAVRULI CAPITAL LETTER XAN + case 0x1CAF => 0x10EF // GEORGIAN MTAVRULI CAPITAL LETTER JHAN + case 0x1CB0 => 0x10F0 // GEORGIAN MTAVRULI CAPITAL LETTER HAE + case 0x1CB1 => 0x10F1 // GEORGIAN MTAVRULI CAPITAL LETTER HE + case 0x1CB2 => 0x10F2 // GEORGIAN MTAVRULI CAPITAL LETTER HIE + case 0x1CB3 => 0x10F3 // GEORGIAN MTAVRULI CAPITAL LETTER WE + case 0x1CB4 => 0x10F4 // GEORGIAN MTAVRULI CAPITAL LETTER HAR + case 0x1CB5 => 0x10F5 // GEORGIAN MTAVRULI CAPITAL LETTER HOE + case 0x1CB6 => 0x10F6 // GEORGIAN MTAVRULI CAPITAL LETTER FI + case 0x1CB7 => 0x10F7 // GEORGIAN MTAVRULI CAPITAL LETTER YN + case 0x1CB8 => 0x10F8 // GEORGIAN MTAVRULI CAPITAL LETTER ELIFI + case 0x1CB9 => 0x10F9 // GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN + case 0x1CBA => 0x10FA // GEORGIAN MTAVRULI CAPITAL LETTER AIN + case 0x1CBD => 0x10FD // GEORGIAN MTAVRULI CAPITAL LETTER AEN + case 0x1CBE => 0x10FE // GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN + case 0x1CBF => 0x10FF // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN + case 0x1E00 => 0x1E01 // LATIN CAPITAL LETTER A WITH RING BELOW + case 0x1E02 => 0x1E03 // LATIN CAPITAL LETTER B WITH DOT ABOVE + case 0x1E04 => 0x1E05 // LATIN CAPITAL LETTER B WITH DOT BELOW + case 0x1E06 => 0x1E07 // LATIN CAPITAL LETTER B WITH LINE BELOW + case 0x1E08 => 0x1E09 // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE + case 0x1E0A => 0x1E0B // LATIN CAPITAL LETTER D WITH DOT ABOVE + case 0x1E0C => 0x1E0D // LATIN CAPITAL LETTER D WITH DOT BELOW + case 0x1E0E => 0x1E0F // LATIN CAPITAL LETTER D WITH LINE BELOW + case 0x1E10 => 0x1E11 // LATIN CAPITAL LETTER D WITH CEDILLA + case 0x1E12 => 0x1E13 // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW + case 0x1E14 => 0x1E15 // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE + case 0x1E16 => 0x1E17 // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE + case 0x1E18 => 0x1E19 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW + case 0x1E1A => 0x1E1B // LATIN CAPITAL LETTER E WITH TILDE BELOW + case 0x1E1C => 0x1E1D // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE + case 0x1E1E => 0x1E1F // LATIN CAPITAL LETTER F WITH DOT ABOVE + case 0x1E20 => 0x1E21 // LATIN CAPITAL LETTER G WITH MACRON + case 0x1E22 => 0x1E23 // LATIN CAPITAL LETTER H WITH DOT ABOVE + case 0x1E24 => 0x1E25 // LATIN CAPITAL LETTER H WITH DOT BELOW + case 0x1E26 => 0x1E27 // LATIN CAPITAL LETTER H WITH DIAERESIS + case 0x1E28 => 0x1E29 // LATIN CAPITAL LETTER H WITH CEDILLA + case 0x1E2A => 0x1E2B // LATIN CAPITAL LETTER H WITH BREVE BELOW + case 0x1E2C => 0x1E2D // LATIN CAPITAL LETTER I WITH TILDE BELOW + case 0x1E2E => 0x1E2F // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE + case 0x1E30 => 0x1E31 // LATIN CAPITAL LETTER K WITH ACUTE + case 0x1E32 => 0x1E33 // LATIN CAPITAL LETTER K WITH DOT BELOW + case 0x1E34 => 0x1E35 // LATIN CAPITAL LETTER K WITH LINE BELOW + case 0x1E36 => 0x1E37 // LATIN CAPITAL LETTER L WITH DOT BELOW + case 0x1E38 => 0x1E39 // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON + case 0x1E3A => 0x1E3B // LATIN CAPITAL LETTER L WITH LINE BELOW + case 0x1E3C => 0x1E3D // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW + case 0x1E3E => 0x1E3F // LATIN CAPITAL LETTER M WITH ACUTE + case 0x1E40 => 0x1E41 // LATIN CAPITAL LETTER M WITH DOT ABOVE + case 0x1E42 => 0x1E43 // LATIN CAPITAL LETTER M WITH DOT BELOW + case 0x1E44 => 0x1E45 // LATIN CAPITAL LETTER N WITH DOT ABOVE + case 0x1E46 => 0x1E47 // LATIN CAPITAL LETTER N WITH DOT BELOW + case 0x1E48 => 0x1E49 // LATIN CAPITAL LETTER N WITH LINE BELOW + case 0x1E4A => 0x1E4B // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW + case 0x1E4C => 0x1E4D // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE + case 0x1E4E => 0x1E4F // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS + case 0x1E50 => 0x1E51 // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE + case 0x1E52 => 0x1E53 // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE + case 0x1E54 => 0x1E55 // LATIN CAPITAL LETTER P WITH ACUTE + case 0x1E56 => 0x1E57 // LATIN CAPITAL LETTER P WITH DOT ABOVE + case 0x1E58 => 0x1E59 // LATIN CAPITAL LETTER R WITH DOT ABOVE + case 0x1E5A => 0x1E5B // LATIN CAPITAL LETTER R WITH DOT BELOW + case 0x1E5C => 0x1E5D // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON + case 0x1E5E => 0x1E5F // LATIN CAPITAL LETTER R WITH LINE BELOW + case 0x1E60 => 0x1E61 // LATIN CAPITAL LETTER S WITH DOT ABOVE + case 0x1E62 => 0x1E63 // LATIN CAPITAL LETTER S WITH DOT BELOW + case 0x1E64 => 0x1E65 // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE + case 0x1E66 => 0x1E67 // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE + case 0x1E68 => 0x1E69 // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE + case 0x1E6A => 0x1E6B // LATIN CAPITAL LETTER T WITH DOT ABOVE + case 0x1E6C => 0x1E6D // LATIN CAPITAL LETTER T WITH DOT BELOW + case 0x1E6E => 0x1E6F // LATIN CAPITAL LETTER T WITH LINE BELOW + case 0x1E70 => 0x1E71 // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW + case 0x1E72 => 0x1E73 // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW + case 0x1E74 => 0x1E75 // LATIN CAPITAL LETTER U WITH TILDE BELOW + case 0x1E76 => 0x1E77 // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW + case 0x1E78 => 0x1E79 // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE + case 0x1E7A => 0x1E7B // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS + case 0x1E7C => 0x1E7D // LATIN CAPITAL LETTER V WITH TILDE + case 0x1E7E => 0x1E7F // LATIN CAPITAL LETTER V WITH DOT BELOW + case 0x1E80 => 0x1E81 // LATIN CAPITAL LETTER W WITH GRAVE + case 0x1E82 => 0x1E83 // LATIN CAPITAL LETTER W WITH ACUTE + case 0x1E84 => 0x1E85 // LATIN CAPITAL LETTER W WITH DIAERESIS + case 0x1E86 => 0x1E87 // LATIN CAPITAL LETTER W WITH DOT ABOVE + case 0x1E88 => 0x1E89 // LATIN CAPITAL LETTER W WITH DOT BELOW + case 0x1E8A => 0x1E8B // LATIN CAPITAL LETTER X WITH DOT ABOVE + case 0x1E8C => 0x1E8D // LATIN CAPITAL LETTER X WITH DIAERESIS + case 0x1E8E => 0x1E8F // LATIN CAPITAL LETTER Y WITH DOT ABOVE + case 0x1E90 => 0x1E91 // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX + case 0x1E92 => 0x1E93 // LATIN CAPITAL LETTER Z WITH DOT BELOW + case 0x1E94 => 0x1E95 // LATIN CAPITAL LETTER Z WITH LINE BELOW + case 0x1E9B => 0x1E61 // LATIN SMALL LETTER LONG S WITH DOT ABOVE + case 0x1EA0 => 0x1EA1 // LATIN CAPITAL LETTER A WITH DOT BELOW + case 0x1EA2 => 0x1EA3 // LATIN CAPITAL LETTER A WITH HOOK ABOVE + case 0x1EA4 => 0x1EA5 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE + case 0x1EA6 => 0x1EA7 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE + case 0x1EA8 => 0x1EA9 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1EAA => 0x1EAB // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE + case 0x1EAC => 0x1EAD // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW + case 0x1EAE => 0x1EAF // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE + case 0x1EB0 => 0x1EB1 // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE + case 0x1EB2 => 0x1EB3 // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE + case 0x1EB4 => 0x1EB5 // LATIN CAPITAL LETTER A WITH BREVE AND TILDE + case 0x1EB6 => 0x1EB7 // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW + case 0x1EB8 => 0x1EB9 // LATIN CAPITAL LETTER E WITH DOT BELOW + case 0x1EBA => 0x1EBB // LATIN CAPITAL LETTER E WITH HOOK ABOVE + case 0x1EBC => 0x1EBD // LATIN CAPITAL LETTER E WITH TILDE + case 0x1EBE => 0x1EBF // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE + case 0x1EC0 => 0x1EC1 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE + case 0x1EC2 => 0x1EC3 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1EC4 => 0x1EC5 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE + case 0x1EC6 => 0x1EC7 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW + case 0x1EC8 => 0x1EC9 // LATIN CAPITAL LETTER I WITH HOOK ABOVE + case 0x1ECA => 0x1ECB // LATIN CAPITAL LETTER I WITH DOT BELOW + case 0x1ECC => 0x1ECD // LATIN CAPITAL LETTER O WITH DOT BELOW + case 0x1ECE => 0x1ECF // LATIN CAPITAL LETTER O WITH HOOK ABOVE + case 0x1ED0 => 0x1ED1 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE + case 0x1ED2 => 0x1ED3 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE + case 0x1ED4 => 0x1ED5 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1ED6 => 0x1ED7 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE + case 0x1ED8 => 0x1ED9 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW + case 0x1EDA => 0x1EDB // LATIN CAPITAL LETTER O WITH HORN AND ACUTE + case 0x1EDC => 0x1EDD // LATIN CAPITAL LETTER O WITH HORN AND GRAVE + case 0x1EDE => 0x1EDF // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE + case 0x1EE0 => 0x1EE1 // LATIN CAPITAL LETTER O WITH HORN AND TILDE + case 0x1EE2 => 0x1EE3 // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW + case 0x1EE4 => 0x1EE5 // LATIN CAPITAL LETTER U WITH DOT BELOW + case 0x1EE6 => 0x1EE7 // LATIN CAPITAL LETTER U WITH HOOK ABOVE + case 0x1EE8 => 0x1EE9 // LATIN CAPITAL LETTER U WITH HORN AND ACUTE + case 0x1EEA => 0x1EEB // LATIN CAPITAL LETTER U WITH HORN AND GRAVE + case 0x1EEC => 0x1EED // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE + case 0x1EEE => 0x1EEF // LATIN CAPITAL LETTER U WITH HORN AND TILDE + case 0x1EF0 => 0x1EF1 // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW + case 0x1EF2 => 0x1EF3 // LATIN CAPITAL LETTER Y WITH GRAVE + case 0x1EF4 => 0x1EF5 // LATIN CAPITAL LETTER Y WITH DOT BELOW + case 0x1EF6 => 0x1EF7 // LATIN CAPITAL LETTER Y WITH HOOK ABOVE + case 0x1EF8 => 0x1EF9 // LATIN CAPITAL LETTER Y WITH TILDE + case 0x1EFA => 0x1EFB // LATIN CAPITAL LETTER MIDDLE-WELSH LL + case 0x1EFC => 0x1EFD // LATIN CAPITAL LETTER MIDDLE-WELSH V + case 0x1EFE => 0x1EFF // LATIN CAPITAL LETTER Y WITH LOOP + case 0x1F08 => 0x1F00 // GREEK CAPITAL LETTER ALPHA WITH PSILI + case 0x1F09 => 0x1F01 // GREEK CAPITAL LETTER ALPHA WITH DASIA + case 0x1F0A => 0x1F02 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA + case 0x1F0B => 0x1F03 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA + case 0x1F0C => 0x1F04 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA + case 0x1F0D => 0x1F05 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA + case 0x1F0E => 0x1F06 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI + case 0x1F0F => 0x1F07 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI + case 0x1F18 => 0x1F10 // GREEK CAPITAL LETTER EPSILON WITH PSILI + case 0x1F19 => 0x1F11 // GREEK CAPITAL LETTER EPSILON WITH DASIA + case 0x1F1A => 0x1F12 // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA + case 0x1F1B => 0x1F13 // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA + case 0x1F1C => 0x1F14 // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA + case 0x1F1D => 0x1F15 // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA + case 0x1F28 => 0x1F20 // GREEK CAPITAL LETTER ETA WITH PSILI + case 0x1F29 => 0x1F21 // GREEK CAPITAL LETTER ETA WITH DASIA + case 0x1F2A => 0x1F22 // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA + case 0x1F2B => 0x1F23 // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA + case 0x1F2C => 0x1F24 // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA + case 0x1F2D => 0x1F25 // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA + case 0x1F2E => 0x1F26 // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI + case 0x1F2F => 0x1F27 // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI + case 0x1F38 => 0x1F30 // GREEK CAPITAL LETTER IOTA WITH PSILI + case 0x1F39 => 0x1F31 // GREEK CAPITAL LETTER IOTA WITH DASIA + case 0x1F3A => 0x1F32 // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA + case 0x1F3B => 0x1F33 // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA + case 0x1F3C => 0x1F34 // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA + case 0x1F3D => 0x1F35 // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA + case 0x1F3E => 0x1F36 // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI + case 0x1F3F => 0x1F37 // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI + case 0x1F48 => 0x1F40 // GREEK CAPITAL LETTER OMICRON WITH PSILI + case 0x1F49 => 0x1F41 // GREEK CAPITAL LETTER OMICRON WITH DASIA + case 0x1F4A => 0x1F42 // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA + case 0x1F4B => 0x1F43 // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA + case 0x1F4C => 0x1F44 // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA + case 0x1F4D => 0x1F45 // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA + case 0x1F59 => 0x1F51 // GREEK CAPITAL LETTER UPSILON WITH DASIA + case 0x1F5B => 0x1F53 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA + case 0x1F5D => 0x1F55 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA + case 0x1F5F => 0x1F57 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI + case 0x1F68 => 0x1F60 // GREEK CAPITAL LETTER OMEGA WITH PSILI + case 0x1F69 => 0x1F61 // GREEK CAPITAL LETTER OMEGA WITH DASIA + case 0x1F6A => 0x1F62 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA + case 0x1F6B => 0x1F63 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA + case 0x1F6C => 0x1F64 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA + case 0x1F6D => 0x1F65 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA + case 0x1F6E => 0x1F66 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI + case 0x1F6F => 0x1F67 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI + case 0x1FB8 => 0x1FB0 // GREEK CAPITAL LETTER ALPHA WITH VRACHY + case 0x1FB9 => 0x1FB1 // GREEK CAPITAL LETTER ALPHA WITH MACRON + case 0x1FBA => 0x1F70 // GREEK CAPITAL LETTER ALPHA WITH VARIA + case 0x1FBB => 0x1F71 // GREEK CAPITAL LETTER ALPHA WITH OXIA + case 0x1FBE => 0x03B9 // GREEK PROSGEGRAMMENI + case 0x1FC8 => 0x1F72 // GREEK CAPITAL LETTER EPSILON WITH VARIA + case 0x1FC9 => 0x1F73 // GREEK CAPITAL LETTER EPSILON WITH OXIA + case 0x1FCA => 0x1F74 // GREEK CAPITAL LETTER ETA WITH VARIA + case 0x1FCB => 0x1F75 // GREEK CAPITAL LETTER ETA WITH OXIA + case 0x1FD8 => 0x1FD0 // GREEK CAPITAL LETTER IOTA WITH VRACHY + case 0x1FD9 => 0x1FD1 // GREEK CAPITAL LETTER IOTA WITH MACRON + case 0x1FDA => 0x1F76 // GREEK CAPITAL LETTER IOTA WITH VARIA + case 0x1FDB => 0x1F77 // GREEK CAPITAL LETTER IOTA WITH OXIA + case 0x1FE8 => 0x1FE0 // GREEK CAPITAL LETTER UPSILON WITH VRACHY + case 0x1FE9 => 0x1FE1 // GREEK CAPITAL LETTER UPSILON WITH MACRON + case 0x1FEA => 0x1F7A // GREEK CAPITAL LETTER UPSILON WITH VARIA + case 0x1FEB => 0x1F7B // GREEK CAPITAL LETTER UPSILON WITH OXIA + case 0x1FEC => 0x1FE5 // GREEK CAPITAL LETTER RHO WITH DASIA + case 0x1FF8 => 0x1F78 // GREEK CAPITAL LETTER OMICRON WITH VARIA + case 0x1FF9 => 0x1F79 // GREEK CAPITAL LETTER OMICRON WITH OXIA + case 0x1FFA => 0x1F7C // GREEK CAPITAL LETTER OMEGA WITH VARIA + case 0x1FFB => 0x1F7D // GREEK CAPITAL LETTER OMEGA WITH OXIA + case 0x2126 => 0x03C9 // OHM SIGN + case 0x212A => 0x006B // KELVIN SIGN + case 0x212B => 0x00E5 // ANGSTROM SIGN + case 0x2132 => 0x214E // TURNED CAPITAL F + case 0x2160 => 0x2170 // ROMAN NUMERAL ONE + case 0x2161 => 0x2171 // ROMAN NUMERAL TWO + case 0x2162 => 0x2172 // ROMAN NUMERAL THREE + case 0x2163 => 0x2173 // ROMAN NUMERAL FOUR + case 0x2164 => 0x2174 // ROMAN NUMERAL FIVE + case 0x2165 => 0x2175 // ROMAN NUMERAL SIX + case 0x2166 => 0x2176 // ROMAN NUMERAL SEVEN + case 0x2167 => 0x2177 // ROMAN NUMERAL EIGHT + case 0x2168 => 0x2178 // ROMAN NUMERAL NINE + case 0x2169 => 0x2179 // ROMAN NUMERAL TEN + case 0x216A => 0x217A // ROMAN NUMERAL ELEVEN + case 0x216B => 0x217B // ROMAN NUMERAL TWELVE + case 0x216C => 0x217C // ROMAN NUMERAL FIFTY + case 0x216D => 0x217D // ROMAN NUMERAL ONE HUNDRED + case 0x216E => 0x217E // ROMAN NUMERAL FIVE HUNDRED + case 0x216F => 0x217F // ROMAN NUMERAL ONE THOUSAND + case 0x2183 => 0x2184 // ROMAN NUMERAL REVERSED ONE HUNDRED + case 0x24B6 => 0x24D0 // CIRCLED LATIN CAPITAL LETTER A + case 0x24B7 => 0x24D1 // CIRCLED LATIN CAPITAL LETTER B + case 0x24B8 => 0x24D2 // CIRCLED LATIN CAPITAL LETTER C + case 0x24B9 => 0x24D3 // CIRCLED LATIN CAPITAL LETTER D + case 0x24BA => 0x24D4 // CIRCLED LATIN CAPITAL LETTER E + case 0x24BB => 0x24D5 // CIRCLED LATIN CAPITAL LETTER F + case 0x24BC => 0x24D6 // CIRCLED LATIN CAPITAL LETTER G + case 0x24BD => 0x24D7 // CIRCLED LATIN CAPITAL LETTER H + case 0x24BE => 0x24D8 // CIRCLED LATIN CAPITAL LETTER I + case 0x24BF => 0x24D9 // CIRCLED LATIN CAPITAL LETTER J + case 0x24C0 => 0x24DA // CIRCLED LATIN CAPITAL LETTER K + case 0x24C1 => 0x24DB // CIRCLED LATIN CAPITAL LETTER L + case 0x24C2 => 0x24DC // CIRCLED LATIN CAPITAL LETTER M + case 0x24C3 => 0x24DD // CIRCLED LATIN CAPITAL LETTER N + case 0x24C4 => 0x24DE // CIRCLED LATIN CAPITAL LETTER O + case 0x24C5 => 0x24DF // CIRCLED LATIN CAPITAL LETTER P + case 0x24C6 => 0x24E0 // CIRCLED LATIN CAPITAL LETTER Q + case 0x24C7 => 0x24E1 // CIRCLED LATIN CAPITAL LETTER R + case 0x24C8 => 0x24E2 // CIRCLED LATIN CAPITAL LETTER S + case 0x24C9 => 0x24E3 // CIRCLED LATIN CAPITAL LETTER T + case 0x24CA => 0x24E4 // CIRCLED LATIN CAPITAL LETTER U + case 0x24CB => 0x24E5 // CIRCLED LATIN CAPITAL LETTER V + case 0x24CC => 0x24E6 // CIRCLED LATIN CAPITAL LETTER W + case 0x24CD => 0x24E7 // CIRCLED LATIN CAPITAL LETTER X + case 0x24CE => 0x24E8 // CIRCLED LATIN CAPITAL LETTER Y + case 0x24CF => 0x24E9 // CIRCLED LATIN CAPITAL LETTER Z + case 0x2C00 => 0x2C30 // GLAGOLITIC CAPITAL LETTER AZU + case 0x2C01 => 0x2C31 // GLAGOLITIC CAPITAL LETTER BUKY + case 0x2C02 => 0x2C32 // GLAGOLITIC CAPITAL LETTER VEDE + case 0x2C03 => 0x2C33 // GLAGOLITIC CAPITAL LETTER GLAGOLI + case 0x2C04 => 0x2C34 // GLAGOLITIC CAPITAL LETTER DOBRO + case 0x2C05 => 0x2C35 // GLAGOLITIC CAPITAL LETTER YESTU + case 0x2C06 => 0x2C36 // GLAGOLITIC CAPITAL LETTER ZHIVETE + case 0x2C07 => 0x2C37 // GLAGOLITIC CAPITAL LETTER DZELO + case 0x2C08 => 0x2C38 // GLAGOLITIC CAPITAL LETTER ZEMLJA + case 0x2C09 => 0x2C39 // GLAGOLITIC CAPITAL LETTER IZHE + case 0x2C0A => 0x2C3A // GLAGOLITIC CAPITAL LETTER INITIAL IZHE + case 0x2C0B => 0x2C3B // GLAGOLITIC CAPITAL LETTER I + case 0x2C0C => 0x2C3C // GLAGOLITIC CAPITAL LETTER DJERVI + case 0x2C0D => 0x2C3D // GLAGOLITIC CAPITAL LETTER KAKO + case 0x2C0E => 0x2C3E // GLAGOLITIC CAPITAL LETTER LJUDIJE + case 0x2C0F => 0x2C3F // GLAGOLITIC CAPITAL LETTER MYSLITE + case 0x2C10 => 0x2C40 // GLAGOLITIC CAPITAL LETTER NASHI + case 0x2C11 => 0x2C41 // GLAGOLITIC CAPITAL LETTER ONU + case 0x2C12 => 0x2C42 // GLAGOLITIC CAPITAL LETTER POKOJI + case 0x2C13 => 0x2C43 // GLAGOLITIC CAPITAL LETTER RITSI + case 0x2C14 => 0x2C44 // GLAGOLITIC CAPITAL LETTER SLOVO + case 0x2C15 => 0x2C45 // GLAGOLITIC CAPITAL LETTER TVRIDO + case 0x2C16 => 0x2C46 // GLAGOLITIC CAPITAL LETTER UKU + case 0x2C17 => 0x2C47 // GLAGOLITIC CAPITAL LETTER FRITU + case 0x2C18 => 0x2C48 // GLAGOLITIC CAPITAL LETTER HERU + case 0x2C19 => 0x2C49 // GLAGOLITIC CAPITAL LETTER OTU + case 0x2C1A => 0x2C4A // GLAGOLITIC CAPITAL LETTER PE + case 0x2C1B => 0x2C4B // GLAGOLITIC CAPITAL LETTER SHTA + case 0x2C1C => 0x2C4C // GLAGOLITIC CAPITAL LETTER TSI + case 0x2C1D => 0x2C4D // GLAGOLITIC CAPITAL LETTER CHRIVI + case 0x2C1E => 0x2C4E // GLAGOLITIC CAPITAL LETTER SHA + case 0x2C1F => 0x2C4F // GLAGOLITIC CAPITAL LETTER YERU + case 0x2C20 => 0x2C50 // GLAGOLITIC CAPITAL LETTER YERI + case 0x2C21 => 0x2C51 // GLAGOLITIC CAPITAL LETTER YATI + case 0x2C22 => 0x2C52 // GLAGOLITIC CAPITAL LETTER SPIDERY HA + case 0x2C23 => 0x2C53 // GLAGOLITIC CAPITAL LETTER YU + case 0x2C24 => 0x2C54 // GLAGOLITIC CAPITAL LETTER SMALL YUS + case 0x2C25 => 0x2C55 // GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL + case 0x2C26 => 0x2C56 // GLAGOLITIC CAPITAL LETTER YO + case 0x2C27 => 0x2C57 // GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS + case 0x2C28 => 0x2C58 // GLAGOLITIC CAPITAL LETTER BIG YUS + case 0x2C29 => 0x2C59 // GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS + case 0x2C2A => 0x2C5A // GLAGOLITIC CAPITAL LETTER FITA + case 0x2C2B => 0x2C5B // GLAGOLITIC CAPITAL LETTER IZHITSA + case 0x2C2C => 0x2C5C // GLAGOLITIC CAPITAL LETTER SHTAPIC + case 0x2C2D => 0x2C5D // GLAGOLITIC CAPITAL LETTER TROKUTASTI A + case 0x2C2E => 0x2C5E // GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE + case 0x2C2F => 0x2C5F // GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI + case 0x2C60 => 0x2C61 // LATIN CAPITAL LETTER L WITH DOUBLE BAR + case 0x2C62 => 0x026B // LATIN CAPITAL LETTER L WITH MIDDLE TILDE + case 0x2C63 => 0x1D7D // LATIN CAPITAL LETTER P WITH STROKE + case 0x2C64 => 0x027D // LATIN CAPITAL LETTER R WITH TAIL + case 0x2C67 => 0x2C68 // LATIN CAPITAL LETTER H WITH DESCENDER + case 0x2C69 => 0x2C6A // LATIN CAPITAL LETTER K WITH DESCENDER + case 0x2C6B => 0x2C6C // LATIN CAPITAL LETTER Z WITH DESCENDER + case 0x2C6D => 0x0251 // LATIN CAPITAL LETTER ALPHA + case 0x2C6E => 0x0271 // LATIN CAPITAL LETTER M WITH HOOK + case 0x2C6F => 0x0250 // LATIN CAPITAL LETTER TURNED A + case 0x2C70 => 0x0252 // LATIN CAPITAL LETTER TURNED ALPHA + case 0x2C72 => 0x2C73 // LATIN CAPITAL LETTER W WITH HOOK + case 0x2C75 => 0x2C76 // LATIN CAPITAL LETTER HALF H + case 0x2C7E => 0x023F // LATIN CAPITAL LETTER S WITH SWASH TAIL + case 0x2C7F => 0x0240 // LATIN CAPITAL LETTER Z WITH SWASH TAIL + case 0x2C80 => 0x2C81 // COPTIC CAPITAL LETTER ALFA + case 0x2C82 => 0x2C83 // COPTIC CAPITAL LETTER VIDA + case 0x2C84 => 0x2C85 // COPTIC CAPITAL LETTER GAMMA + case 0x2C86 => 0x2C87 // COPTIC CAPITAL LETTER DALDA + case 0x2C88 => 0x2C89 // COPTIC CAPITAL LETTER EIE + case 0x2C8A => 0x2C8B // COPTIC CAPITAL LETTER SOU + case 0x2C8C => 0x2C8D // COPTIC CAPITAL LETTER ZATA + case 0x2C8E => 0x2C8F // COPTIC CAPITAL LETTER HATE + case 0x2C90 => 0x2C91 // COPTIC CAPITAL LETTER THETHE + case 0x2C92 => 0x2C93 // COPTIC CAPITAL LETTER IAUDA + case 0x2C94 => 0x2C95 // COPTIC CAPITAL LETTER KAPA + case 0x2C96 => 0x2C97 // COPTIC CAPITAL LETTER LAULA + case 0x2C98 => 0x2C99 // COPTIC CAPITAL LETTER MI + case 0x2C9A => 0x2C9B // COPTIC CAPITAL LETTER NI + case 0x2C9C => 0x2C9D // COPTIC CAPITAL LETTER KSI + case 0x2C9E => 0x2C9F // COPTIC CAPITAL LETTER O + case 0x2CA0 => 0x2CA1 // COPTIC CAPITAL LETTER PI + case 0x2CA2 => 0x2CA3 // COPTIC CAPITAL LETTER RO + case 0x2CA4 => 0x2CA5 // COPTIC CAPITAL LETTER SIMA + case 0x2CA6 => 0x2CA7 // COPTIC CAPITAL LETTER TAU + case 0x2CA8 => 0x2CA9 // COPTIC CAPITAL LETTER UA + case 0x2CAA => 0x2CAB // COPTIC CAPITAL LETTER FI + case 0x2CAC => 0x2CAD // COPTIC CAPITAL LETTER KHI + case 0x2CAE => 0x2CAF // COPTIC CAPITAL LETTER PSI + case 0x2CB0 => 0x2CB1 // COPTIC CAPITAL LETTER OOU + case 0x2CB2 => 0x2CB3 // COPTIC CAPITAL LETTER DIALECT-P ALEF + case 0x2CB4 => 0x2CB5 // COPTIC CAPITAL LETTER OLD COPTIC AIN + case 0x2CB6 => 0x2CB7 // COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE + case 0x2CB8 => 0x2CB9 // COPTIC CAPITAL LETTER DIALECT-P KAPA + case 0x2CBA => 0x2CBB // COPTIC CAPITAL LETTER DIALECT-P NI + case 0x2CBC => 0x2CBD // COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI + case 0x2CBE => 0x2CBF // COPTIC CAPITAL LETTER OLD COPTIC OOU + case 0x2CC0 => 0x2CC1 // COPTIC CAPITAL LETTER SAMPI + case 0x2CC2 => 0x2CC3 // COPTIC CAPITAL LETTER CROSSED SHEI + case 0x2CC4 => 0x2CC5 // COPTIC CAPITAL LETTER OLD COPTIC SHEI + case 0x2CC6 => 0x2CC7 // COPTIC CAPITAL LETTER OLD COPTIC ESH + case 0x2CC8 => 0x2CC9 // COPTIC CAPITAL LETTER AKHMIMIC KHEI + case 0x2CCA => 0x2CCB // COPTIC CAPITAL LETTER DIALECT-P HORI + case 0x2CCC => 0x2CCD // COPTIC CAPITAL LETTER OLD COPTIC HORI + case 0x2CCE => 0x2CCF // COPTIC CAPITAL LETTER OLD COPTIC HA + case 0x2CD0 => 0x2CD1 // COPTIC CAPITAL LETTER L-SHAPED HA + case 0x2CD2 => 0x2CD3 // COPTIC CAPITAL LETTER OLD COPTIC HEI + case 0x2CD4 => 0x2CD5 // COPTIC CAPITAL LETTER OLD COPTIC HAT + case 0x2CD6 => 0x2CD7 // COPTIC CAPITAL LETTER OLD COPTIC GANGIA + case 0x2CD8 => 0x2CD9 // COPTIC CAPITAL LETTER OLD COPTIC DJA + case 0x2CDA => 0x2CDB // COPTIC CAPITAL LETTER OLD COPTIC SHIMA + case 0x2CDC => 0x2CDD // COPTIC CAPITAL LETTER OLD NUBIAN SHIMA + case 0x2CDE => 0x2CDF // COPTIC CAPITAL LETTER OLD NUBIAN NGI + case 0x2CE0 => 0x2CE1 // COPTIC CAPITAL LETTER OLD NUBIAN NYI + case 0x2CE2 => 0x2CE3 // COPTIC CAPITAL LETTER OLD NUBIAN WAU + case 0x2CEB => 0x2CEC // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI + case 0x2CED => 0x2CEE // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA + case 0x2CF2 => 0x2CF3 // COPTIC CAPITAL LETTER BOHAIRIC KHEI + case 0xA640 => 0xA641 // CYRILLIC CAPITAL LETTER ZEMLYA + case 0xA642 => 0xA643 // CYRILLIC CAPITAL LETTER DZELO + case 0xA644 => 0xA645 // CYRILLIC CAPITAL LETTER REVERSED DZE + case 0xA646 => 0xA647 // CYRILLIC CAPITAL LETTER IOTA + case 0xA648 => 0xA649 // CYRILLIC CAPITAL LETTER DJERV + case 0xA64A => 0xA64B // CYRILLIC CAPITAL LETTER MONOGRAPH UK + case 0xA64C => 0xA64D // CYRILLIC CAPITAL LETTER BROAD OMEGA + case 0xA64E => 0xA64F // CYRILLIC CAPITAL LETTER NEUTRAL YER + case 0xA650 => 0xA651 // CYRILLIC CAPITAL LETTER YERU WITH BACK YER + case 0xA652 => 0xA653 // CYRILLIC CAPITAL LETTER IOTIFIED YAT + case 0xA654 => 0xA655 // CYRILLIC CAPITAL LETTER REVERSED YU + case 0xA656 => 0xA657 // CYRILLIC CAPITAL LETTER IOTIFIED A + case 0xA658 => 0xA659 // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS + case 0xA65A => 0xA65B // CYRILLIC CAPITAL LETTER BLENDED YUS + case 0xA65C => 0xA65D // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS + case 0xA65E => 0xA65F // CYRILLIC CAPITAL LETTER YN + case 0xA660 => 0xA661 // CYRILLIC CAPITAL LETTER REVERSED TSE + case 0xA662 => 0xA663 // CYRILLIC CAPITAL LETTER SOFT DE + case 0xA664 => 0xA665 // CYRILLIC CAPITAL LETTER SOFT EL + case 0xA666 => 0xA667 // CYRILLIC CAPITAL LETTER SOFT EM + case 0xA668 => 0xA669 // CYRILLIC CAPITAL LETTER MONOCULAR O + case 0xA66A => 0xA66B // CYRILLIC CAPITAL LETTER BINOCULAR O + case 0xA66C => 0xA66D // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O + case 0xA680 => 0xA681 // CYRILLIC CAPITAL LETTER DWE + case 0xA682 => 0xA683 // CYRILLIC CAPITAL LETTER DZWE + case 0xA684 => 0xA685 // CYRILLIC CAPITAL LETTER ZHWE + case 0xA686 => 0xA687 // CYRILLIC CAPITAL LETTER CCHE + case 0xA688 => 0xA689 // CYRILLIC CAPITAL LETTER DZZE + case 0xA68A => 0xA68B // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK + case 0xA68C => 0xA68D // CYRILLIC CAPITAL LETTER TWE + case 0xA68E => 0xA68F // CYRILLIC CAPITAL LETTER TSWE + case 0xA690 => 0xA691 // CYRILLIC CAPITAL LETTER TSSE + case 0xA692 => 0xA693 // CYRILLIC CAPITAL LETTER TCHE + case 0xA694 => 0xA695 // CYRILLIC CAPITAL LETTER HWE + case 0xA696 => 0xA697 // CYRILLIC CAPITAL LETTER SHWE + case 0xA698 => 0xA699 // CYRILLIC CAPITAL LETTER DOUBLE O + case 0xA69A => 0xA69B // CYRILLIC CAPITAL LETTER CROSSED O + case 0xA722 => 0xA723 // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF + case 0xA724 => 0xA725 // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN + case 0xA726 => 0xA727 // LATIN CAPITAL LETTER HENG + case 0xA728 => 0xA729 // LATIN CAPITAL LETTER TZ + case 0xA72A => 0xA72B // LATIN CAPITAL LETTER TRESILLO + case 0xA72C => 0xA72D // LATIN CAPITAL LETTER CUATRILLO + case 0xA72E => 0xA72F // LATIN CAPITAL LETTER CUATRILLO WITH COMMA + case 0xA732 => 0xA733 // LATIN CAPITAL LETTER AA + case 0xA734 => 0xA735 // LATIN CAPITAL LETTER AO + case 0xA736 => 0xA737 // LATIN CAPITAL LETTER AU + case 0xA738 => 0xA739 // LATIN CAPITAL LETTER AV + case 0xA73A => 0xA73B // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR + case 0xA73C => 0xA73D // LATIN CAPITAL LETTER AY + case 0xA73E => 0xA73F // LATIN CAPITAL LETTER REVERSED C WITH DOT + case 0xA740 => 0xA741 // LATIN CAPITAL LETTER K WITH STROKE + case 0xA742 => 0xA743 // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE + case 0xA744 => 0xA745 // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE + case 0xA746 => 0xA747 // LATIN CAPITAL LETTER BROKEN L + case 0xA748 => 0xA749 // LATIN CAPITAL LETTER L WITH HIGH STROKE + case 0xA74A => 0xA74B // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY + case 0xA74C => 0xA74D // LATIN CAPITAL LETTER O WITH LOOP + case 0xA74E => 0xA74F // LATIN CAPITAL LETTER OO + case 0xA750 => 0xA751 // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER + case 0xA752 => 0xA753 // LATIN CAPITAL LETTER P WITH FLOURISH + case 0xA754 => 0xA755 // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL + case 0xA756 => 0xA757 // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER + case 0xA758 => 0xA759 // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE + case 0xA75A => 0xA75B // LATIN CAPITAL LETTER R ROTUNDA + case 0xA75C => 0xA75D // LATIN CAPITAL LETTER RUM ROTUNDA + case 0xA75E => 0xA75F // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE + case 0xA760 => 0xA761 // LATIN CAPITAL LETTER VY + case 0xA762 => 0xA763 // LATIN CAPITAL LETTER VISIGOTHIC Z + case 0xA764 => 0xA765 // LATIN CAPITAL LETTER THORN WITH STROKE + case 0xA766 => 0xA767 // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER + case 0xA768 => 0xA769 // LATIN CAPITAL LETTER VEND + case 0xA76A => 0xA76B // LATIN CAPITAL LETTER ET + case 0xA76C => 0xA76D // LATIN CAPITAL LETTER IS + case 0xA76E => 0xA76F // LATIN CAPITAL LETTER CON + case 0xA779 => 0xA77A // LATIN CAPITAL LETTER INSULAR D + case 0xA77B => 0xA77C // LATIN CAPITAL LETTER INSULAR F + case 0xA77D => 0x1D79 // LATIN CAPITAL LETTER INSULAR G + case 0xA77E => 0xA77F // LATIN CAPITAL LETTER TURNED INSULAR G + case 0xA780 => 0xA781 // LATIN CAPITAL LETTER TURNED L + case 0xA782 => 0xA783 // LATIN CAPITAL LETTER INSULAR R + case 0xA784 => 0xA785 // LATIN CAPITAL LETTER INSULAR S + case 0xA786 => 0xA787 // LATIN CAPITAL LETTER INSULAR T + case 0xA78B => 0xA78C // LATIN CAPITAL LETTER SALTILLO + case 0xA78D => 0x0265 // LATIN CAPITAL LETTER TURNED H + case 0xA790 => 0xA791 // LATIN CAPITAL LETTER N WITH DESCENDER + case 0xA792 => 0xA793 // LATIN CAPITAL LETTER C WITH BAR + case 0xA796 => 0xA797 // LATIN CAPITAL LETTER B WITH FLOURISH + case 0xA798 => 0xA799 // LATIN CAPITAL LETTER F WITH STROKE + case 0xA79A => 0xA79B // LATIN CAPITAL LETTER VOLAPUK AE + case 0xA79C => 0xA79D // LATIN CAPITAL LETTER VOLAPUK OE + case 0xA79E => 0xA79F // LATIN CAPITAL LETTER VOLAPUK UE + case 0xA7A0 => 0xA7A1 // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE + case 0xA7A2 => 0xA7A3 // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE + case 0xA7A4 => 0xA7A5 // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE + case 0xA7A6 => 0xA7A7 // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE + case 0xA7A8 => 0xA7A9 // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE + case 0xA7AA => 0x0266 // LATIN CAPITAL LETTER H WITH HOOK + case 0xA7AB => 0x025C // LATIN CAPITAL LETTER REVERSED OPEN E + case 0xA7AC => 0x0261 // LATIN CAPITAL LETTER SCRIPT G + case 0xA7AD => 0x026C // LATIN CAPITAL LETTER L WITH BELT + case 0xA7AE => 0x026A // LATIN CAPITAL LETTER SMALL CAPITAL I + case 0xA7B0 => 0x029E // LATIN CAPITAL LETTER TURNED K + case 0xA7B1 => 0x0287 // LATIN CAPITAL LETTER TURNED T + case 0xA7B2 => 0x029D // LATIN CAPITAL LETTER J WITH CROSSED-TAIL + case 0xA7B3 => 0xAB53 // LATIN CAPITAL LETTER CHI + case 0xA7B4 => 0xA7B5 // LATIN CAPITAL LETTER BETA + case 0xA7B6 => 0xA7B7 // LATIN CAPITAL LETTER OMEGA + case 0xA7B8 => 0xA7B9 // LATIN CAPITAL LETTER U WITH STROKE + case 0xA7BA => 0xA7BB // LATIN CAPITAL LETTER GLOTTAL A + case 0xA7BC => 0xA7BD // LATIN CAPITAL LETTER GLOTTAL I + case 0xA7BE => 0xA7BF // LATIN CAPITAL LETTER GLOTTAL U + case 0xA7C0 => 0xA7C1 // LATIN CAPITAL LETTER OLD POLISH O + case 0xA7C2 => 0xA7C3 // LATIN CAPITAL LETTER ANGLICANA W + case 0xA7C4 => 0xA794 // LATIN CAPITAL LETTER C WITH PALATAL HOOK + case 0xA7C5 => 0x0282 // LATIN CAPITAL LETTER S WITH HOOK + case 0xA7C6 => 0x1D8E // LATIN CAPITAL LETTER Z WITH PALATAL HOOK + case 0xA7C7 => 0xA7C8 // LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY + case 0xA7C9 => 0xA7CA // LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY + case 0xA7D0 => 0xA7D1 // LATIN CAPITAL LETTER CLOSED INSULAR G + case 0xA7D6 => 0xA7D7 // LATIN CAPITAL LETTER MIDDLE SCOTS S + case 0xA7D8 => 0xA7D9 // LATIN CAPITAL LETTER SIGMOID S + case 0xA7F5 => 0xA7F6 // LATIN CAPITAL LETTER REVERSED HALF H + case 0xAB70 => 0x13A0 // CHEROKEE SMALL LETTER A + case 0xAB71 => 0x13A1 // CHEROKEE SMALL LETTER E + case 0xAB72 => 0x13A2 // CHEROKEE SMALL LETTER I + case 0xAB73 => 0x13A3 // CHEROKEE SMALL LETTER O + case 0xAB74 => 0x13A4 // CHEROKEE SMALL LETTER U + case 0xAB75 => 0x13A5 // CHEROKEE SMALL LETTER V + case 0xAB76 => 0x13A6 // CHEROKEE SMALL LETTER GA + case 0xAB77 => 0x13A7 // CHEROKEE SMALL LETTER KA + case 0xAB78 => 0x13A8 // CHEROKEE SMALL LETTER GE + case 0xAB79 => 0x13A9 // CHEROKEE SMALL LETTER GI + case 0xAB7A => 0x13AA // CHEROKEE SMALL LETTER GO + case 0xAB7B => 0x13AB // CHEROKEE SMALL LETTER GU + case 0xAB7C => 0x13AC // CHEROKEE SMALL LETTER GV + case 0xAB7D => 0x13AD // CHEROKEE SMALL LETTER HA + case 0xAB7E => 0x13AE // CHEROKEE SMALL LETTER HE + case 0xAB7F => 0x13AF // CHEROKEE SMALL LETTER HI + case 0xAB80 => 0x13B0 // CHEROKEE SMALL LETTER HO + case 0xAB81 => 0x13B1 // CHEROKEE SMALL LETTER HU + case 0xAB82 => 0x13B2 // CHEROKEE SMALL LETTER HV + case 0xAB83 => 0x13B3 // CHEROKEE SMALL LETTER LA + case 0xAB84 => 0x13B4 // CHEROKEE SMALL LETTER LE + case 0xAB85 => 0x13B5 // CHEROKEE SMALL LETTER LI + case 0xAB86 => 0x13B6 // CHEROKEE SMALL LETTER LO + case 0xAB87 => 0x13B7 // CHEROKEE SMALL LETTER LU + case 0xAB88 => 0x13B8 // CHEROKEE SMALL LETTER LV + case 0xAB89 => 0x13B9 // CHEROKEE SMALL LETTER MA + case 0xAB8A => 0x13BA // CHEROKEE SMALL LETTER ME + case 0xAB8B => 0x13BB // CHEROKEE SMALL LETTER MI + case 0xAB8C => 0x13BC // CHEROKEE SMALL LETTER MO + case 0xAB8D => 0x13BD // CHEROKEE SMALL LETTER MU + case 0xAB8E => 0x13BE // CHEROKEE SMALL LETTER NA + case 0xAB8F => 0x13BF // CHEROKEE SMALL LETTER HNA + case 0xAB90 => 0x13C0 // CHEROKEE SMALL LETTER NAH + case 0xAB91 => 0x13C1 // CHEROKEE SMALL LETTER NE + case 0xAB92 => 0x13C2 // CHEROKEE SMALL LETTER NI + case 0xAB93 => 0x13C3 // CHEROKEE SMALL LETTER NO + case 0xAB94 => 0x13C4 // CHEROKEE SMALL LETTER NU + case 0xAB95 => 0x13C5 // CHEROKEE SMALL LETTER NV + case 0xAB96 => 0x13C6 // CHEROKEE SMALL LETTER QUA + case 0xAB97 => 0x13C7 // CHEROKEE SMALL LETTER QUE + case 0xAB98 => 0x13C8 // CHEROKEE SMALL LETTER QUI + case 0xAB99 => 0x13C9 // CHEROKEE SMALL LETTER QUO + case 0xAB9A => 0x13CA // CHEROKEE SMALL LETTER QUU + case 0xAB9B => 0x13CB // CHEROKEE SMALL LETTER QUV + case 0xAB9C => 0x13CC // CHEROKEE SMALL LETTER SA + case 0xAB9D => 0x13CD // CHEROKEE SMALL LETTER S + case 0xAB9E => 0x13CE // CHEROKEE SMALL LETTER SE + case 0xAB9F => 0x13CF // CHEROKEE SMALL LETTER SI + case 0xABA0 => 0x13D0 // CHEROKEE SMALL LETTER SO + case 0xABA1 => 0x13D1 // CHEROKEE SMALL LETTER SU + case 0xABA2 => 0x13D2 // CHEROKEE SMALL LETTER SV + case 0xABA3 => 0x13D3 // CHEROKEE SMALL LETTER DA + case 0xABA4 => 0x13D4 // CHEROKEE SMALL LETTER TA + case 0xABA5 => 0x13D5 // CHEROKEE SMALL LETTER DE + case 0xABA6 => 0x13D6 // CHEROKEE SMALL LETTER TE + case 0xABA7 => 0x13D7 // CHEROKEE SMALL LETTER DI + case 0xABA8 => 0x13D8 // CHEROKEE SMALL LETTER TI + case 0xABA9 => 0x13D9 // CHEROKEE SMALL LETTER DO + case 0xABAA => 0x13DA // CHEROKEE SMALL LETTER DU + case 0xABAB => 0x13DB // CHEROKEE SMALL LETTER DV + case 0xABAC => 0x13DC // CHEROKEE SMALL LETTER DLA + case 0xABAD => 0x13DD // CHEROKEE SMALL LETTER TLA + case 0xABAE => 0x13DE // CHEROKEE SMALL LETTER TLE + case 0xABAF => 0x13DF // CHEROKEE SMALL LETTER TLI + case 0xABB0 => 0x13E0 // CHEROKEE SMALL LETTER TLO + case 0xABB1 => 0x13E1 // CHEROKEE SMALL LETTER TLU + case 0xABB2 => 0x13E2 // CHEROKEE SMALL LETTER TLV + case 0xABB3 => 0x13E3 // CHEROKEE SMALL LETTER TSA + case 0xABB4 => 0x13E4 // CHEROKEE SMALL LETTER TSE + case 0xABB5 => 0x13E5 // CHEROKEE SMALL LETTER TSI + case 0xABB6 => 0x13E6 // CHEROKEE SMALL LETTER TSO + case 0xABB7 => 0x13E7 // CHEROKEE SMALL LETTER TSU + case 0xABB8 => 0x13E8 // CHEROKEE SMALL LETTER TSV + case 0xABB9 => 0x13E9 // CHEROKEE SMALL LETTER WA + case 0xABBA => 0x13EA // CHEROKEE SMALL LETTER WE + case 0xABBB => 0x13EB // CHEROKEE SMALL LETTER WI + case 0xABBC => 0x13EC // CHEROKEE SMALL LETTER WO + case 0xABBD => 0x13ED // CHEROKEE SMALL LETTER WU + case 0xABBE => 0x13EE // CHEROKEE SMALL LETTER WV + case 0xABBF => 0x13EF // CHEROKEE SMALL LETTER YA + case 0xFF21 => 0xFF41 // FULLWIDTH LATIN CAPITAL LETTER A + case 0xFF22 => 0xFF42 // FULLWIDTH LATIN CAPITAL LETTER B + case 0xFF23 => 0xFF43 // FULLWIDTH LATIN CAPITAL LETTER C + case 0xFF24 => 0xFF44 // FULLWIDTH LATIN CAPITAL LETTER D + case 0xFF25 => 0xFF45 // FULLWIDTH LATIN CAPITAL LETTER E + case 0xFF26 => 0xFF46 // FULLWIDTH LATIN CAPITAL LETTER F + case 0xFF27 => 0xFF47 // FULLWIDTH LATIN CAPITAL LETTER G + case 0xFF28 => 0xFF48 // FULLWIDTH LATIN CAPITAL LETTER H + case 0xFF29 => 0xFF49 // FULLWIDTH LATIN CAPITAL LETTER I + case 0xFF2A => 0xFF4A // FULLWIDTH LATIN CAPITAL LETTER J + case 0xFF2B => 0xFF4B // FULLWIDTH LATIN CAPITAL LETTER K + case 0xFF2C => 0xFF4C // FULLWIDTH LATIN CAPITAL LETTER L + case 0xFF2D => 0xFF4D // FULLWIDTH LATIN CAPITAL LETTER M + case 0xFF2E => 0xFF4E // FULLWIDTH LATIN CAPITAL LETTER N + case 0xFF2F => 0xFF4F // FULLWIDTH LATIN CAPITAL LETTER O + case 0xFF30 => 0xFF50 // FULLWIDTH LATIN CAPITAL LETTER P + case 0xFF31 => 0xFF51 // FULLWIDTH LATIN CAPITAL LETTER Q + case 0xFF32 => 0xFF52 // FULLWIDTH LATIN CAPITAL LETTER R + case 0xFF33 => 0xFF53 // FULLWIDTH LATIN CAPITAL LETTER S + case 0xFF34 => 0xFF54 // FULLWIDTH LATIN CAPITAL LETTER T + case 0xFF35 => 0xFF55 // FULLWIDTH LATIN CAPITAL LETTER U + case 0xFF36 => 0xFF56 // FULLWIDTH LATIN CAPITAL LETTER V + case 0xFF37 => 0xFF57 // FULLWIDTH LATIN CAPITAL LETTER W + case 0xFF38 => 0xFF58 // FULLWIDTH LATIN CAPITAL LETTER X + case 0xFF39 => 0xFF59 // FULLWIDTH LATIN CAPITAL LETTER Y + case 0xFF3A => 0xFF5A // FULLWIDTH LATIN CAPITAL LETTER Z + case 0x10400 => 0x10428 // DESERET CAPITAL LETTER LONG I + case 0x10401 => 0x10429 // DESERET CAPITAL LETTER LONG E + case 0x10402 => 0x1042A // DESERET CAPITAL LETTER LONG A + case 0x10403 => 0x1042B // DESERET CAPITAL LETTER LONG AH + case 0x10404 => 0x1042C // DESERET CAPITAL LETTER LONG O + case 0x10405 => 0x1042D // DESERET CAPITAL LETTER LONG OO + case 0x10406 => 0x1042E // DESERET CAPITAL LETTER SHORT I + case 0x10407 => 0x1042F // DESERET CAPITAL LETTER SHORT E + case 0x10408 => 0x10430 // DESERET CAPITAL LETTER SHORT A + case 0x10409 => 0x10431 // DESERET CAPITAL LETTER SHORT AH + case 0x1040A => 0x10432 // DESERET CAPITAL LETTER SHORT O + case 0x1040B => 0x10433 // DESERET CAPITAL LETTER SHORT OO + case 0x1040C => 0x10434 // DESERET CAPITAL LETTER AY + case 0x1040D => 0x10435 // DESERET CAPITAL LETTER OW + case 0x1040E => 0x10436 // DESERET CAPITAL LETTER WU + case 0x1040F => 0x10437 // DESERET CAPITAL LETTER YEE + case 0x10410 => 0x10438 // DESERET CAPITAL LETTER H + case 0x10411 => 0x10439 // DESERET CAPITAL LETTER PEE + case 0x10412 => 0x1043A // DESERET CAPITAL LETTER BEE + case 0x10413 => 0x1043B // DESERET CAPITAL LETTER TEE + case 0x10414 => 0x1043C // DESERET CAPITAL LETTER DEE + case 0x10415 => 0x1043D // DESERET CAPITAL LETTER CHEE + case 0x10416 => 0x1043E // DESERET CAPITAL LETTER JEE + case 0x10417 => 0x1043F // DESERET CAPITAL LETTER KAY + case 0x10418 => 0x10440 // DESERET CAPITAL LETTER GAY + case 0x10419 => 0x10441 // DESERET CAPITAL LETTER EF + case 0x1041A => 0x10442 // DESERET CAPITAL LETTER VEE + case 0x1041B => 0x10443 // DESERET CAPITAL LETTER ETH + case 0x1041C => 0x10444 // DESERET CAPITAL LETTER THEE + case 0x1041D => 0x10445 // DESERET CAPITAL LETTER ES + case 0x1041E => 0x10446 // DESERET CAPITAL LETTER ZEE + case 0x1041F => 0x10447 // DESERET CAPITAL LETTER ESH + case 0x10420 => 0x10448 // DESERET CAPITAL LETTER ZHEE + case 0x10421 => 0x10449 // DESERET CAPITAL LETTER ER + case 0x10422 => 0x1044A // DESERET CAPITAL LETTER EL + case 0x10423 => 0x1044B // DESERET CAPITAL LETTER EM + case 0x10424 => 0x1044C // DESERET CAPITAL LETTER EN + case 0x10425 => 0x1044D // DESERET CAPITAL LETTER ENG + case 0x10426 => 0x1044E // DESERET CAPITAL LETTER OI + case 0x10427 => 0x1044F // DESERET CAPITAL LETTER EW + case 0x104B0 => 0x104D8 // OSAGE CAPITAL LETTER A + case 0x104B1 => 0x104D9 // OSAGE CAPITAL LETTER AI + case 0x104B2 => 0x104DA // OSAGE CAPITAL LETTER AIN + case 0x104B3 => 0x104DB // OSAGE CAPITAL LETTER AH + case 0x104B4 => 0x104DC // OSAGE CAPITAL LETTER BRA + case 0x104B5 => 0x104DD // OSAGE CAPITAL LETTER CHA + case 0x104B6 => 0x104DE // OSAGE CAPITAL LETTER EHCHA + case 0x104B7 => 0x104DF // OSAGE CAPITAL LETTER E + case 0x104B8 => 0x104E0 // OSAGE CAPITAL LETTER EIN + case 0x104B9 => 0x104E1 // OSAGE CAPITAL LETTER HA + case 0x104BA => 0x104E2 // OSAGE CAPITAL LETTER HYA + case 0x104BB => 0x104E3 // OSAGE CAPITAL LETTER I + case 0x104BC => 0x104E4 // OSAGE CAPITAL LETTER KA + case 0x104BD => 0x104E5 // OSAGE CAPITAL LETTER EHKA + case 0x104BE => 0x104E6 // OSAGE CAPITAL LETTER KYA + case 0x104BF => 0x104E7 // OSAGE CAPITAL LETTER LA + case 0x104C0 => 0x104E8 // OSAGE CAPITAL LETTER MA + case 0x104C1 => 0x104E9 // OSAGE CAPITAL LETTER NA + case 0x104C2 => 0x104EA // OSAGE CAPITAL LETTER O + case 0x104C3 => 0x104EB // OSAGE CAPITAL LETTER OIN + case 0x104C4 => 0x104EC // OSAGE CAPITAL LETTER PA + case 0x104C5 => 0x104ED // OSAGE CAPITAL LETTER EHPA + case 0x104C6 => 0x104EE // OSAGE CAPITAL LETTER SA + case 0x104C7 => 0x104EF // OSAGE CAPITAL LETTER SHA + case 0x104C8 => 0x104F0 // OSAGE CAPITAL LETTER TA + case 0x104C9 => 0x104F1 // OSAGE CAPITAL LETTER EHTA + case 0x104CA => 0x104F2 // OSAGE CAPITAL LETTER TSA + case 0x104CB => 0x104F3 // OSAGE CAPITAL LETTER EHTSA + case 0x104CC => 0x104F4 // OSAGE CAPITAL LETTER TSHA + case 0x104CD => 0x104F5 // OSAGE CAPITAL LETTER DHA + case 0x104CE => 0x104F6 // OSAGE CAPITAL LETTER U + case 0x104CF => 0x104F7 // OSAGE CAPITAL LETTER WA + case 0x104D0 => 0x104F8 // OSAGE CAPITAL LETTER KHA + case 0x104D1 => 0x104F9 // OSAGE CAPITAL LETTER GHA + case 0x104D2 => 0x104FA // OSAGE CAPITAL LETTER ZA + case 0x104D3 => 0x104FB // OSAGE CAPITAL LETTER ZHA + case 0x10570 => 0x10597 // VITHKUQI CAPITAL LETTER A + case 0x10571 => 0x10598 // VITHKUQI CAPITAL LETTER BBE + case 0x10572 => 0x10599 // VITHKUQI CAPITAL LETTER BE + case 0x10573 => 0x1059A // VITHKUQI CAPITAL LETTER CE + case 0x10574 => 0x1059B // VITHKUQI CAPITAL LETTER CHE + case 0x10575 => 0x1059C // VITHKUQI CAPITAL LETTER DE + case 0x10576 => 0x1059D // VITHKUQI CAPITAL LETTER DHE + case 0x10577 => 0x1059E // VITHKUQI CAPITAL LETTER EI + case 0x10578 => 0x1059F // VITHKUQI CAPITAL LETTER E + case 0x10579 => 0x105A0 // VITHKUQI CAPITAL LETTER FE + case 0x1057A => 0x105A1 // VITHKUQI CAPITAL LETTER GA + case 0x1057C => 0x105A3 // VITHKUQI CAPITAL LETTER HA + case 0x1057D => 0x105A4 // VITHKUQI CAPITAL LETTER HHA + case 0x1057E => 0x105A5 // VITHKUQI CAPITAL LETTER I + case 0x1057F => 0x105A6 // VITHKUQI CAPITAL LETTER IJE + case 0x10580 => 0x105A7 // VITHKUQI CAPITAL LETTER JE + case 0x10581 => 0x105A8 // VITHKUQI CAPITAL LETTER KA + case 0x10582 => 0x105A9 // VITHKUQI CAPITAL LETTER LA + case 0x10583 => 0x105AA // VITHKUQI CAPITAL LETTER LLA + case 0x10584 => 0x105AB // VITHKUQI CAPITAL LETTER ME + case 0x10585 => 0x105AC // VITHKUQI CAPITAL LETTER NE + case 0x10586 => 0x105AD // VITHKUQI CAPITAL LETTER NJE + case 0x10587 => 0x105AE // VITHKUQI CAPITAL LETTER O + case 0x10588 => 0x105AF // VITHKUQI CAPITAL LETTER PE + case 0x10589 => 0x105B0 // VITHKUQI CAPITAL LETTER QA + case 0x1058A => 0x105B1 // VITHKUQI CAPITAL LETTER RE + case 0x1058C => 0x105B3 // VITHKUQI CAPITAL LETTER SE + case 0x1058D => 0x105B4 // VITHKUQI CAPITAL LETTER SHE + case 0x1058E => 0x105B5 // VITHKUQI CAPITAL LETTER TE + case 0x1058F => 0x105B6 // VITHKUQI CAPITAL LETTER THE + case 0x10590 => 0x105B7 // VITHKUQI CAPITAL LETTER U + case 0x10591 => 0x105B8 // VITHKUQI CAPITAL LETTER VE + case 0x10592 => 0x105B9 // VITHKUQI CAPITAL LETTER XE + case 0x10594 => 0x105BB // VITHKUQI CAPITAL LETTER Y + case 0x10595 => 0x105BC // VITHKUQI CAPITAL LETTER ZE + case 0x10C80 => 0x10CC0 // OLD HUNGARIAN CAPITAL LETTER A + case 0x10C81 => 0x10CC1 // OLD HUNGARIAN CAPITAL LETTER AA + case 0x10C82 => 0x10CC2 // OLD HUNGARIAN CAPITAL LETTER EB + case 0x10C83 => 0x10CC3 // OLD HUNGARIAN CAPITAL LETTER AMB + case 0x10C84 => 0x10CC4 // OLD HUNGARIAN CAPITAL LETTER EC + case 0x10C85 => 0x10CC5 // OLD HUNGARIAN CAPITAL LETTER ENC + case 0x10C86 => 0x10CC6 // OLD HUNGARIAN CAPITAL LETTER ECS + case 0x10C87 => 0x10CC7 // OLD HUNGARIAN CAPITAL LETTER ED + case 0x10C88 => 0x10CC8 // OLD HUNGARIAN CAPITAL LETTER AND + case 0x10C89 => 0x10CC9 // OLD HUNGARIAN CAPITAL LETTER E + case 0x10C8A => 0x10CCA // OLD HUNGARIAN CAPITAL LETTER CLOSE E + case 0x10C8B => 0x10CCB // OLD HUNGARIAN CAPITAL LETTER EE + case 0x10C8C => 0x10CCC // OLD HUNGARIAN CAPITAL LETTER EF + case 0x10C8D => 0x10CCD // OLD HUNGARIAN CAPITAL LETTER EG + case 0x10C8E => 0x10CCE // OLD HUNGARIAN CAPITAL LETTER EGY + case 0x10C8F => 0x10CCF // OLD HUNGARIAN CAPITAL LETTER EH + case 0x10C90 => 0x10CD0 // OLD HUNGARIAN CAPITAL LETTER I + case 0x10C91 => 0x10CD1 // OLD HUNGARIAN CAPITAL LETTER II + case 0x10C92 => 0x10CD2 // OLD HUNGARIAN CAPITAL LETTER EJ + case 0x10C93 => 0x10CD3 // OLD HUNGARIAN CAPITAL LETTER EK + case 0x10C94 => 0x10CD4 // OLD HUNGARIAN CAPITAL LETTER AK + case 0x10C95 => 0x10CD5 // OLD HUNGARIAN CAPITAL LETTER UNK + case 0x10C96 => 0x10CD6 // OLD HUNGARIAN CAPITAL LETTER EL + case 0x10C97 => 0x10CD7 // OLD HUNGARIAN CAPITAL LETTER ELY + case 0x10C98 => 0x10CD8 // OLD HUNGARIAN CAPITAL LETTER EM + case 0x10C99 => 0x10CD9 // OLD HUNGARIAN CAPITAL LETTER EN + case 0x10C9A => 0x10CDA // OLD HUNGARIAN CAPITAL LETTER ENY + case 0x10C9B => 0x10CDB // OLD HUNGARIAN CAPITAL LETTER O + case 0x10C9C => 0x10CDC // OLD HUNGARIAN CAPITAL LETTER OO + case 0x10C9D => 0x10CDD // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE + case 0x10C9E => 0x10CDE // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE + case 0x10C9F => 0x10CDF // OLD HUNGARIAN CAPITAL LETTER OEE + case 0x10CA0 => 0x10CE0 // OLD HUNGARIAN CAPITAL LETTER EP + case 0x10CA1 => 0x10CE1 // OLD HUNGARIAN CAPITAL LETTER EMP + case 0x10CA2 => 0x10CE2 // OLD HUNGARIAN CAPITAL LETTER ER + case 0x10CA3 => 0x10CE3 // OLD HUNGARIAN CAPITAL LETTER SHORT ER + case 0x10CA4 => 0x10CE4 // OLD HUNGARIAN CAPITAL LETTER ES + case 0x10CA5 => 0x10CE5 // OLD HUNGARIAN CAPITAL LETTER ESZ + case 0x10CA6 => 0x10CE6 // OLD HUNGARIAN CAPITAL LETTER ET + case 0x10CA7 => 0x10CE7 // OLD HUNGARIAN CAPITAL LETTER ENT + case 0x10CA8 => 0x10CE8 // OLD HUNGARIAN CAPITAL LETTER ETY + case 0x10CA9 => 0x10CE9 // OLD HUNGARIAN CAPITAL LETTER ECH + case 0x10CAA => 0x10CEA // OLD HUNGARIAN CAPITAL LETTER U + case 0x10CAB => 0x10CEB // OLD HUNGARIAN CAPITAL LETTER UU + case 0x10CAC => 0x10CEC // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE + case 0x10CAD => 0x10CED // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE + case 0x10CAE => 0x10CEE // OLD HUNGARIAN CAPITAL LETTER EV + case 0x10CAF => 0x10CEF // OLD HUNGARIAN CAPITAL LETTER EZ + case 0x10CB0 => 0x10CF0 // OLD HUNGARIAN CAPITAL LETTER EZS + case 0x10CB1 => 0x10CF1 // OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN + case 0x10CB2 => 0x10CF2 // OLD HUNGARIAN CAPITAL LETTER US + case 0x118A0 => 0x118C0 // WARANG CITI CAPITAL LETTER NGAA + case 0x118A1 => 0x118C1 // WARANG CITI CAPITAL LETTER A + case 0x118A2 => 0x118C2 // WARANG CITI CAPITAL LETTER WI + case 0x118A3 => 0x118C3 // WARANG CITI CAPITAL LETTER YU + case 0x118A4 => 0x118C4 // WARANG CITI CAPITAL LETTER YA + case 0x118A5 => 0x118C5 // WARANG CITI CAPITAL LETTER YO + case 0x118A6 => 0x118C6 // WARANG CITI CAPITAL LETTER II + case 0x118A7 => 0x118C7 // WARANG CITI CAPITAL LETTER UU + case 0x118A8 => 0x118C8 // WARANG CITI CAPITAL LETTER E + case 0x118A9 => 0x118C9 // WARANG CITI CAPITAL LETTER O + case 0x118AA => 0x118CA // WARANG CITI CAPITAL LETTER ANG + case 0x118AB => 0x118CB // WARANG CITI CAPITAL LETTER GA + case 0x118AC => 0x118CC // WARANG CITI CAPITAL LETTER KO + case 0x118AD => 0x118CD // WARANG CITI CAPITAL LETTER ENY + case 0x118AE => 0x118CE // WARANG CITI CAPITAL LETTER YUJ + case 0x118AF => 0x118CF // WARANG CITI CAPITAL LETTER UC + case 0x118B0 => 0x118D0 // WARANG CITI CAPITAL LETTER ENN + case 0x118B1 => 0x118D1 // WARANG CITI CAPITAL LETTER ODD + case 0x118B2 => 0x118D2 // WARANG CITI CAPITAL LETTER TTE + case 0x118B3 => 0x118D3 // WARANG CITI CAPITAL LETTER NUNG + case 0x118B4 => 0x118D4 // WARANG CITI CAPITAL LETTER DA + case 0x118B5 => 0x118D5 // WARANG CITI CAPITAL LETTER AT + case 0x118B6 => 0x118D6 // WARANG CITI CAPITAL LETTER AM + case 0x118B7 => 0x118D7 // WARANG CITI CAPITAL LETTER BU + case 0x118B8 => 0x118D8 // WARANG CITI CAPITAL LETTER PU + case 0x118B9 => 0x118D9 // WARANG CITI CAPITAL LETTER HIYO + case 0x118BA => 0x118DA // WARANG CITI CAPITAL LETTER HOLO + case 0x118BB => 0x118DB // WARANG CITI CAPITAL LETTER HORR + case 0x118BC => 0x118DC // WARANG CITI CAPITAL LETTER HAR + case 0x118BD => 0x118DD // WARANG CITI CAPITAL LETTER SSUU + case 0x118BE => 0x118DE // WARANG CITI CAPITAL LETTER SII + case 0x118BF => 0x118DF // WARANG CITI CAPITAL LETTER VIYO + case 0x16E40 => 0x16E60 // MEDEFAIDRIN CAPITAL LETTER M + case 0x16E41 => 0x16E61 // MEDEFAIDRIN CAPITAL LETTER S + case 0x16E42 => 0x16E62 // MEDEFAIDRIN CAPITAL LETTER V + case 0x16E43 => 0x16E63 // MEDEFAIDRIN CAPITAL LETTER W + case 0x16E44 => 0x16E64 // MEDEFAIDRIN CAPITAL LETTER ATIU + case 0x16E45 => 0x16E65 // MEDEFAIDRIN CAPITAL LETTER Z + case 0x16E46 => 0x16E66 // MEDEFAIDRIN CAPITAL LETTER KP + case 0x16E47 => 0x16E67 // MEDEFAIDRIN CAPITAL LETTER P + case 0x16E48 => 0x16E68 // MEDEFAIDRIN CAPITAL LETTER T + case 0x16E49 => 0x16E69 // MEDEFAIDRIN CAPITAL LETTER G + case 0x16E4A => 0x16E6A // MEDEFAIDRIN CAPITAL LETTER F + case 0x16E4B => 0x16E6B // MEDEFAIDRIN CAPITAL LETTER I + case 0x16E4C => 0x16E6C // MEDEFAIDRIN CAPITAL LETTER K + case 0x16E4D => 0x16E6D // MEDEFAIDRIN CAPITAL LETTER A + case 0x16E4E => 0x16E6E // MEDEFAIDRIN CAPITAL LETTER J + case 0x16E4F => 0x16E6F // MEDEFAIDRIN CAPITAL LETTER E + case 0x16E50 => 0x16E70 // MEDEFAIDRIN CAPITAL LETTER B + case 0x16E51 => 0x16E71 // MEDEFAIDRIN CAPITAL LETTER C + case 0x16E52 => 0x16E72 // MEDEFAIDRIN CAPITAL LETTER U + case 0x16E53 => 0x16E73 // MEDEFAIDRIN CAPITAL LETTER YU + case 0x16E54 => 0x16E74 // MEDEFAIDRIN CAPITAL LETTER L + case 0x16E55 => 0x16E75 // MEDEFAIDRIN CAPITAL LETTER Q + case 0x16E56 => 0x16E76 // MEDEFAIDRIN CAPITAL LETTER HP + case 0x16E57 => 0x16E77 // MEDEFAIDRIN CAPITAL LETTER NY + case 0x16E58 => 0x16E78 // MEDEFAIDRIN CAPITAL LETTER X + case 0x16E59 => 0x16E79 // MEDEFAIDRIN CAPITAL LETTER D + case 0x16E5A => 0x16E7A // MEDEFAIDRIN CAPITAL LETTER OE + case 0x16E5B => 0x16E7B // MEDEFAIDRIN CAPITAL LETTER N + case 0x16E5C => 0x16E7C // MEDEFAIDRIN CAPITAL LETTER R + case 0x16E5D => 0x16E7D // MEDEFAIDRIN CAPITAL LETTER O + case 0x16E5E => 0x16E7E // MEDEFAIDRIN CAPITAL LETTER AI + case 0x16E5F => 0x16E7F // MEDEFAIDRIN CAPITAL LETTER Y + case 0x1E900 => 0x1E922 // ADLAM CAPITAL LETTER ALIF + case 0x1E901 => 0x1E923 // ADLAM CAPITAL LETTER DAALI + case 0x1E902 => 0x1E924 // ADLAM CAPITAL LETTER LAAM + case 0x1E903 => 0x1E925 // ADLAM CAPITAL LETTER MIIM + case 0x1E904 => 0x1E926 // ADLAM CAPITAL LETTER BA + case 0x1E905 => 0x1E927 // ADLAM CAPITAL LETTER SINNYIIYHE + case 0x1E906 => 0x1E928 // ADLAM CAPITAL LETTER PE + case 0x1E907 => 0x1E929 // ADLAM CAPITAL LETTER BHE + case 0x1E908 => 0x1E92A // ADLAM CAPITAL LETTER RA + case 0x1E909 => 0x1E92B // ADLAM CAPITAL LETTER E + case 0x1E90A => 0x1E92C // ADLAM CAPITAL LETTER FA + case 0x1E90B => 0x1E92D // ADLAM CAPITAL LETTER I + case 0x1E90C => 0x1E92E // ADLAM CAPITAL LETTER O + case 0x1E90D => 0x1E92F // ADLAM CAPITAL LETTER DHA + case 0x1E90E => 0x1E930 // ADLAM CAPITAL LETTER YHE + case 0x1E90F => 0x1E931 // ADLAM CAPITAL LETTER WAW + case 0x1E910 => 0x1E932 // ADLAM CAPITAL LETTER NUN + case 0x1E911 => 0x1E933 // ADLAM CAPITAL LETTER KAF + case 0x1E912 => 0x1E934 // ADLAM CAPITAL LETTER YA + case 0x1E913 => 0x1E935 // ADLAM CAPITAL LETTER U + case 0x1E914 => 0x1E936 // ADLAM CAPITAL LETTER JIIM + case 0x1E915 => 0x1E937 // ADLAM CAPITAL LETTER CHI + case 0x1E916 => 0x1E938 // ADLAM CAPITAL LETTER HA + case 0x1E917 => 0x1E939 // ADLAM CAPITAL LETTER QAAF + case 0x1E918 => 0x1E93A // ADLAM CAPITAL LETTER GA + case 0x1E919 => 0x1E93B // ADLAM CAPITAL LETTER NYA + case 0x1E91A => 0x1E93C // ADLAM CAPITAL LETTER TU + case 0x1E91B => 0x1E93D // ADLAM CAPITAL LETTER NHA + case 0x1E91C => 0x1E93E // ADLAM CAPITAL LETTER VA + case 0x1E91D => 0x1E93F // ADLAM CAPITAL LETTER KHA + case 0x1E91E => 0x1E940 // ADLAM CAPITAL LETTER GBE + case 0x1E91F => 0x1E941 // ADLAM CAPITAL LETTER ZAL + case 0x1E920 => 0x1E942 // ADLAM CAPITAL LETTER KPO + case 0x1E921 => 0x1E943 // ADLAM CAPITAL LETTER SHA + case _ => codePoint // All others map to themselves + } +} diff --git a/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala b/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala index c7ea175..288e9aa 100644 --- a/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala +++ b/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala @@ -18,8 +18,11 @@ package org.typelevel.ci package testing import java.util.Locale -import org.scalacheck.{Arbitrary, Cogen, Gen} import org.scalacheck.Arbitrary.arbitrary +import org.scalacheck.{Arbitrary, Cogen, Gen, Shrink} +import scala.annotation.nowarn +import scala.annotation.tailrec +import scala.collection.immutable.BitSet object arbitraries { implicit val arbitraryForOrgTypelevelCiCIString: Arbitrary[CIString] = { @@ -34,9 +37,72 @@ object arbitraries { val lowers = chars.filter(_.isLower) val uppers = chars.filter(_.isUpper) val genChar = Gen.oneOf(weirdCharFolds, weirdStringFolds, lowers, uppers, arbitrary[Char]) - Arbitrary(Gen.listOf(genChar).map(cs => CIString(cs.mkString))) + + val surrogatePairStrings: Gen[String] = + // Any Unicode codepoint >= 0x10000 is represented on the JVM by a + // surrogate pair of two character values. + Gen.choose(0x10000, 0x10ffff).map(codePoint => + new String(Array(codePoint), 0, 1) + ) + + val titleCaseStrings: Gen[String] = { + @tailrec + def loop(acc: BitSet, codePoint: Int): BitSet = + if (codePoint > 0x10ffff) { + acc + } else { + if (Character.isTitleCase(codePoint)) { + loop(acc + codePoint, codePoint + 1) + } else { + loop(acc, codePoint + 1) + } + } + + Gen.oneOf(loop(BitSet.empty, 0)).map(codePoint => new String(Array(codePoint), 0, 1)) + } + + Arbitrary( + Gen.oneOf( + Gen.listOf(genChar).map(cs => CIString(cs.mkString)), + arbitrary[String].map(CIString.apply), + surrogatePairStrings.map(CIString.apply), + titleCaseStrings.map(CIString.apply) + ) + ) + } + + implicit val shrinkForCIString: Shrink[CIString] = { + val stringShrink: Shrink[String] = implicitly[Shrink[String]] + Shrink( + x => stringShrink.shrink(x.toString).map(CIString.apply) + ) } + implicit val cogenForOrgTypelevelCiCIString: Cogen[CIString] = Cogen[String].contramap(ci => new String(ci.toString.toArray.map(_.toLower))) + + implicit val arbCaseFoldedString: Arbitrary[CaseFoldedString] = + Arbitrary( + arbitrary[String].flatMap(value => + Gen.oneOf( + CaseFoldedString(value), + CaseFoldedString(value, true) // Turkic folding rules + ) + ) + ) + + implicit val cogenForCaseFoldedString: Cogen[CaseFoldedString] = + Cogen[String].contramap(_.toString) + + @nowarn("cat=deprecation") + implicit val shrinkCaseFoldedString: Shrink[CaseFoldedString] = { + import scala.collection.immutable.Stream + val stringShrink: Shrink[String] = implicitly[Shrink[String]] + Shrink( + x => stringShrink.shrink(x.toString).flatMap(value => + Stream(CaseFoldedString(value), CaseFoldedString(value, true)) + ) + ) + } } diff --git a/tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala b/tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala new file mode 100644 index 0000000..6aa2018 --- /dev/null +++ b/tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala @@ -0,0 +1,25 @@ +package org.typelevel.ci + +import cats.implicits._ +import java.io._ +import munit.ScalaCheckSuite +import org.typelevel.ci.testing.arbitraries._ +import org.scalacheck.Prop._ + +final class CaseFoldedStringJVMSuite extends ScalaCheckSuite { + property("serialization round trips") { + def roundTrip[A](x: A): A = { + val baos = new ByteArrayOutputStream + val oos = new ObjectOutputStream(baos) + oos.writeObject(x) + oos.close() + val bais = new ByteArrayInputStream(baos.toByteArray) + val ois = new ObjectInputStream(bais) + ois.readObject().asInstanceOf[A] + } + + forAll { (x: CaseFoldedString) => + x.eqv(roundTrip(x)) + } + } +} diff --git a/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala b/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala index a28087e..fd8cae2 100644 --- a/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala +++ b/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala @@ -22,20 +22,31 @@ import munit.DisciplineSuite import org.scalacheck.Prop._ import org.typelevel.ci.testing.arbitraries._ import scala.math.signum +import scala.annotation.tailrec class CIStringSuite extends DisciplineSuite { property("case insensitive equality") { forAll { (x: CIString) => - val y = CIString(new String(x.toString.toArray.map(_.toUpper))) - val z = CIString(new String(x.toString.toArray.map(_.toLower))) - assertEquals(y, z) + if (x.toString.contains('\u0131')) { + // '\u0131' is LATIN SMALL LETTER DOTLESS I The .toUpper on this + // character will yield a 'I', but the Unicode standard for case + // folding states \u0131 is only case insensitively equivalent to 'I' + // for Turkic languages and by default this mapping should not be + // used. + val y = CIString(x.toString.toLowerCase()) + val z = CIString(x.toString.toUpperCase()) + assertNotEquals(y, z) + } else { + val y = CIString(x.toString.toLowerCase()) + val z = CIString(x.toString.toUpperCase()) + val t = CIString(CIStringSuite.toTitleCase(x.toString)) + assertEquals(y, z) + assertEquals(y, t) + assertEquals(t, z) + } } } - test("character based equality") { - assert(CIString("ß") != CIString("SS")) - } - property("reflexive equality") { forAll { (x: CIString) => assertEquals(x, x) @@ -178,6 +189,17 @@ class CIStringSuite extends DisciplineSuite { }) } + // Test name copied from java.lang.Character.getName(), I know it's long... + test("GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI should compare equal with upper and loser case invocations"){ + val codePoint: Int = 8085 // Unicode codepoint of lower case value + val lower: String = (new String(Character.toChars(codePoint))).toLowerCase + val upper: String = lower.toUpperCase + val title: String = lower.map(c => Character.toTitleCase(c)).mkString + assertEquals(CIString(lower), CIString(upper)) + assertEquals(CIString(lower), CIString(title)) + assertEquals(CIString(title), CIString(upper)) + } + checkAll("Order[CIString]", OrderTests[CIString].order) checkAll("Hash[CIString]", HashTests[CIString].hash) checkAll("LowerBounded[CIString]", LowerBoundedTests[CIString].lowerBounded) @@ -187,3 +209,27 @@ class CIStringSuite extends DisciplineSuite { "CIString instances", SerializableTests.serializable(CIString.catsInstancesForOrgTypelevelCIString)) } + +object CIStringSuite { + def mapStringByCodepoint(f: Int => Int)(s: String): String = { + // Scala's wrapper class doesn't support appendCodePoint, so we need to + // explicitly use the java.lang.StringBuilder + val builder: java.lang.StringBuilder = new java.lang.StringBuilder(s.length) + + @tailrec + def loop(index: Int): String = + if (index >= s.length) { + builder.toString + } else { + val codePoint: Int = s.codePointAt(index) + builder.appendCodePoint(f(codePoint)) + val inc: Int = Character.charCount(codePoint) + loop(index + inc) + } + + loop(0) + } + + def toTitleCase(s: String): String = + mapStringByCodepoint(Character.toTitleCase)(s) +} diff --git a/tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala b/tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala new file mode 100644 index 0000000..7d70aad --- /dev/null +++ b/tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala @@ -0,0 +1,190 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import cats.implicits._ +import cats.kernel.laws.discipline._ +import munit.DisciplineSuite +import org.scalacheck.Prop._ +import org.typelevel.ci.testing.arbitraries._ +import scala.math.signum +import scala.annotation.tailrec + +final class CaseFoldedStringSuite extends DisciplineSuite { + property("case insensitive equality") { + forAll { (x: CaseFoldedString) => + if (x.toString.contains('\u0131')) { + // '\u0131' is LATIN SMALL LETTER DOTLESS I The .toUpper on this + // character will yield a 'I', but the Unicode standard for case + // folding states \u0131 is only case insensitively equivalent to 'I' + // for Turkic languages and by default this mapping should not be + // used. + val y = CaseFoldedString(x.toString.toLowerCase()) + val z = CaseFoldedString(x.toString.toUpperCase()) + assertNotEquals(y, z) + } else { + val y = CaseFoldedString(x.toString.toLowerCase()) + val z = CaseFoldedString(x.toString.toUpperCase()) + val t = CaseFoldedString(CaseFoldedStringSuite.toTitleCase(x.toString)) + assertEquals(y, z) + assertEquals(y, t) + assertEquals(t, z) + } + } + } + + test("case insensitive comparison") { + assert(CaseFoldedString("case-insensitive") < CaseFoldedString("CI")) + } + + property("reflexive comparison") { + forAll { (x: CaseFoldedString) => + assertEquals(x.compare(x), 0) + } + } + + property("equality consistent with comparison") { + forAll { (x: CaseFoldedString, y: CaseFoldedString) => + assertEquals((x == y), (x.compare(y) == 0)) + } + } + + property("hashCode consistent with equality") { + forAll { (x: CaseFoldedString, y: CaseFoldedString) => + assert((x != y) || (x.hashCode == y.hashCode)) + } + } + + test("isEmpty is true given an empty string") { + assert(CaseFoldedString("").isEmpty) + } + + test("isEmpty is false given a non-empty string") { + assert(!CaseFoldedString("non-empty string").isEmpty) + } + + property("is never equal to .nonEmpty for any given string") { + forAll { (ci: CaseFoldedString) => + assert(ci.isEmpty != ci.nonEmpty) + } + } + + test("nonEmpty is true given a non-empty string") { + assert(CaseFoldedString("non-empty string").nonEmpty) + } + + test("nonEmpty is false given an empty string") { + assert(!CaseFoldedString("").nonEmpty) + } + + test("trim removes leading whitespace") { + assert(CaseFoldedString(" text").trim == CaseFoldedString("text")) + } + + test("removes trailing whitespace") { + assert(CaseFoldedString("text ").trim == CaseFoldedString("text")) + } + + test("removes leading and trailing whitespace") { + assert(CaseFoldedString(" text ").trim == CaseFoldedString("text")) + } + + // property("ci interpolator is consistent with apply") { + // forAll { (s: String) => + // assertEquals(ci"$s", CaseFoldedString(s)) + // } + // } + + // property("ci interpolator handles expressions") { + // forAll { (x: Int, y: Int) => + // assertEquals(ci"${x + y}", CaseFoldedString((x + y).toString)) + // } + // } + + // property("ci interpolator handles multiple parts") { + // forAll { (a: String, b: String, c: String) => + // assertEquals(ci"$a:$b:$c", CaseFoldedString(s"$a:$b:$c")) + // } + // } + + // property("ci interpolator extractor is case-insensitive") { + // forAll { (s: String) => + // assert(CaseFoldedString(new String(s.toString.toArray.map(_.toUpper))) match { + // case ci"${t}" => t == CaseFoldedString(s) + // case _ => false + // }) + + // assert(CaseFoldedString(new String(s.toString.toArray.map(_.toLower))) match { + // case ci"${t}" => t == CaseFoldedString(s) + // case _ => false + // }) + // } + // } + + // test("ci interpolator extracts multiple parts") { + // assert(CaseFoldedString("Hello, Aretha") match { + // case ci"${greeting}, ${name}" => greeting == ci"Hello" && name == ci"Aretha" + // }) + // } + + // test("ci interpolator matches literals") { + // assert(CaseFoldedString("literally") match { + // case ci"LiTeRaLlY" => true + // case _ => false + // }) + // } + + // Test name copied from java.lang.Character.getName(), I know it's long... + test("GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI should compare equal with upper and loser case invocations"){ + val codePoint: Int = 8085 // Unicode codepoint of lower case value + val lower: String = (new String(Character.toChars(codePoint))).toLowerCase + val upper: String = lower.toUpperCase + val title: String = lower.map(c => Character.toTitleCase(c)).mkString + assertEquals(CaseFoldedString(lower), CaseFoldedString(upper)) + assertEquals(CaseFoldedString(lower), CaseFoldedString(title)) + assertEquals(CaseFoldedString(title), CaseFoldedString(upper)) + } + + checkAll("Order[CaseFoldedString]", OrderTests[CaseFoldedString].order) + checkAll("Hash[CaseFoldedString]", HashTests[CaseFoldedString].hash) + checkAll("LowerBounded[CaseFoldedString]", LowerBoundedTests[CaseFoldedString].lowerBounded) + checkAll("Monoid[CaseFoldedString]", MonoidTests[CaseFoldedString].monoid) +} + +object CaseFoldedStringSuite { + def mapStringByCodepoint(f: Int => Int)(s: String): String = { + // Scala's wrapper class doesn't support appendCodePoint, so we need to + // explicitly use the java.lang.StringBuilder + val builder: java.lang.StringBuilder = new java.lang.StringBuilder(s.length) + + @tailrec + def loop(index: Int): String = + if (index >= s.length) { + builder.toString + } else { + val codePoint: Int = s.codePointAt(index) + builder.appendCodePoint(f(codePoint)) + val inc: Int = Character.charCount(codePoint) + loop(index + inc) + } + + loop(0) + } + + def toTitleCase(s: String): String = + mapStringByCodepoint(Character.toTitleCase)(s) +}