421 lines
24 KiB
C#
421 lines
24 KiB
C#
using System.Collections.Generic;
|
|
|
|
namespace RTLTMPro
|
|
{
|
|
public static class GlyphFixer
|
|
{
|
|
public static Dictionary<char, char> EnglishToFarsiNumberMap = new Dictionary<char, char>()
|
|
{
|
|
[(char)EnglishNumbers.Zero] = (char)FarsiNumbers.Zero,
|
|
[(char)EnglishNumbers.One] = (char)FarsiNumbers.One,
|
|
[(char)EnglishNumbers.Two] = (char)FarsiNumbers.Two,
|
|
[(char)EnglishNumbers.Three] = (char)FarsiNumbers.Three,
|
|
[(char)EnglishNumbers.Four] = (char)FarsiNumbers.Four,
|
|
[(char)EnglishNumbers.Five] = (char)FarsiNumbers.Five,
|
|
[(char)EnglishNumbers.Six] = (char)FarsiNumbers.Six,
|
|
[(char)EnglishNumbers.Seven] = (char)FarsiNumbers.Seven,
|
|
[(char)EnglishNumbers.Eight] = (char)FarsiNumbers.Eight,
|
|
[(char)EnglishNumbers.Nine] = (char)FarsiNumbers.Nine,
|
|
};
|
|
|
|
public static Dictionary<char, char> EnglishToHinduNumberMap = new Dictionary<char, char>()
|
|
{
|
|
[(char)EnglishNumbers.Zero] = (char)HinduNumbers.Zero,
|
|
[(char)EnglishNumbers.One] = (char)HinduNumbers.One,
|
|
[(char)EnglishNumbers.Two] = (char)HinduNumbers.Two,
|
|
[(char)EnglishNumbers.Three] = (char)HinduNumbers.Three,
|
|
[(char)EnglishNumbers.Four] = (char)HinduNumbers.Four,
|
|
[(char)EnglishNumbers.Five] = (char)HinduNumbers.Five,
|
|
[(char)EnglishNumbers.Six] = (char)HinduNumbers.Six,
|
|
[(char)EnglishNumbers.Seven] = (char)HinduNumbers.Seven,
|
|
[(char)EnglishNumbers.Eight] = (char)HinduNumbers.Eight,
|
|
[(char)EnglishNumbers.Nine] = (char)HinduNumbers.Nine,
|
|
};
|
|
|
|
|
|
/// <summary>
|
|
/// Fixes the shape of letters based on their position.
|
|
/// </summary>
|
|
/// <param name="input"></param>
|
|
/// <param name="output"></param>
|
|
/// <param name="preserveNumbers"></param>
|
|
/// <param name="farsi"></param>
|
|
/// <returns></returns>
|
|
public static void Fix(FastStringBuilder input, FastStringBuilder output, bool preserveNumbers, bool farsi, bool fixTextTags)
|
|
{
|
|
FixYah(input, farsi);
|
|
|
|
output.SetValue(input);
|
|
|
|
for (int i = 0; i < input.Length; i++)
|
|
{
|
|
bool skipNext = false;
|
|
int iChar = input.Get(i);
|
|
|
|
// For special Lam Letter connections.
|
|
if (iChar == (int)ArabicGeneralLetters.Lam)
|
|
{
|
|
if (i < input.Length - 1)
|
|
{
|
|
skipNext = HandleSpecialLam(input, output, i);
|
|
if (skipNext)
|
|
iChar = output.Get(i);
|
|
}
|
|
}
|
|
|
|
// We don't want to fix tatweel or zwnj character
|
|
if (iChar == (int)ArabicGeneralLetters.Tatweel ||
|
|
iChar == (int)SpecialCharacters.ZeroWidthNoJoiner)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (iChar < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)iChar))
|
|
{
|
|
char converted = GlyphTable.Convert((char)iChar);
|
|
|
|
if (IsMiddleLetter(input, i))
|
|
{
|
|
output.Set(i, (char)(converted + 3));
|
|
} else if (IsFinishingLetter(input, i))
|
|
{
|
|
output.Set(i, (char)(converted + 1));
|
|
} else if (IsLeadingLetter(input, i))
|
|
{
|
|
output.Set(i, (char)(converted + 2));
|
|
} else
|
|
{
|
|
output.Set(i, (char)converted);
|
|
}
|
|
}
|
|
|
|
// If this letter as Lam and special Lam-Alef connection was made, We want to skip the Alef
|
|
// (Lam-Alef occupies 1 space)
|
|
if (skipNext)
|
|
{
|
|
i++;
|
|
}
|
|
}
|
|
|
|
if (!preserveNumbers)
|
|
{
|
|
if (fixTextTags)
|
|
{
|
|
FixNumbersOutsideOfTags(output, farsi);
|
|
} else
|
|
{
|
|
FixNumbers(output, farsi);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Removes tashkeel. Converts general RTL letters to isolated form. Also fixes Farsi and Arabic ی letter.
|
|
/// </summary>
|
|
/// <param name="text">Input to prepare</param>
|
|
/// <param name="farsi"></param>
|
|
/// <returns>Prepared input in char array</returns>
|
|
public static void FixYah(FastStringBuilder text, bool farsi)
|
|
{
|
|
for (int i = 0; i < text.Length; i++)
|
|
{
|
|
if (farsi && text.Get(i) == (int)ArabicGeneralLetters.Yeh)
|
|
{
|
|
text.Set(i, (char)ArabicGeneralLetters.FarsiYeh);
|
|
} else if (farsi == false && text.Get(i) == (int)ArabicGeneralLetters.FarsiYeh)
|
|
{
|
|
text.Set(i, (char)ArabicGeneralLetters.Yeh);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Handles the special Lam-Alef connection in the text.
|
|
/// </summary>
|
|
/// <param name="input"></param>
|
|
/// <param name="output"></param>
|
|
/// <param name="i">Index of Lam letter</param>
|
|
/// <returns><see langword="true" /> if special connection has been made.</returns>
|
|
private static bool HandleSpecialLam(FastStringBuilder input, FastStringBuilder output, int i)
|
|
{
|
|
bool isFixed;
|
|
switch (input.Get(i + 1))
|
|
{
|
|
case (char)ArabicGeneralLetters.AlefHamzaBelow:
|
|
output.Set(i, (char)0xFEF7);
|
|
isFixed = true;
|
|
break;
|
|
case (char)ArabicGeneralLetters.Alef:
|
|
output.Set(i, (char)0xFEF9);
|
|
isFixed = true;
|
|
break;
|
|
case (char)ArabicGeneralLetters.AlefHamzaAbove:
|
|
output.Set(i, (char)0xFEF5);
|
|
isFixed = true;
|
|
break;
|
|
case (char)ArabicGeneralLetters.AlefMaddaAbove:
|
|
output.Set(i, (char)0xFEF3);
|
|
isFixed = true;
|
|
break;
|
|
default:
|
|
isFixed = false;
|
|
break;
|
|
}
|
|
|
|
if (isFixed)
|
|
{
|
|
output.Set(i + 1, (char)0xFFFF);
|
|
}
|
|
|
|
return isFixed;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Converts English numbers to Persian or Arabic numbers.
|
|
/// </summary>
|
|
/// <param name="text"></param>
|
|
/// <param name="farsi"></param>
|
|
/// <returns>Converted number</returns>
|
|
public static void FixNumbers(FastStringBuilder text, bool farsi)
|
|
{
|
|
text.Replace((char)EnglishNumbers.Zero, farsi ? (char)FarsiNumbers.Zero : (char)HinduNumbers.Zero);
|
|
text.Replace((char)EnglishNumbers.One, farsi ? (char)FarsiNumbers.One : (char)HinduNumbers.One);
|
|
text.Replace((char)EnglishNumbers.Two, farsi ? (char)FarsiNumbers.Two : (char)HinduNumbers.Two);
|
|
text.Replace((char)EnglishNumbers.Three, farsi ? (char)FarsiNumbers.Three : (char)HinduNumbers.Three);
|
|
text.Replace((char)EnglishNumbers.Four, farsi ? (char)FarsiNumbers.Four : (char)HinduNumbers.Four);
|
|
text.Replace((char)EnglishNumbers.Five, farsi ? (char)FarsiNumbers.Five : (char)HinduNumbers.Five);
|
|
text.Replace((char)EnglishNumbers.Six, farsi ? (char)FarsiNumbers.Six : (char)HinduNumbers.Six);
|
|
text.Replace((char)EnglishNumbers.Seven, farsi ? (char)FarsiNumbers.Seven : (char)HinduNumbers.Seven);
|
|
text.Replace((char)EnglishNumbers.Eight, farsi ? (char)FarsiNumbers.Eight : (char)HinduNumbers.Eight);
|
|
text.Replace((char)EnglishNumbers.Nine, farsi ? (char)FarsiNumbers.Nine : (char)HinduNumbers.Nine);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Converts English numbers that are outside tags to Persian or Arabic numbers.
|
|
/// </summary>
|
|
/// <param name="text"></param>
|
|
/// <param name="farsi"></param>
|
|
/// <returns>Text with converted numbers</returns>
|
|
public static void FixNumbersOutsideOfTags(FastStringBuilder text, bool farsi)
|
|
{
|
|
var englishDigits = new HashSet<char>(EnglishToFarsiNumberMap.Keys);
|
|
for (int i = 0; i < text.Length; i++)
|
|
{
|
|
var iChar = text.Get(i);
|
|
// skip valid tags
|
|
if (iChar == '<')
|
|
{
|
|
bool sawValidTag = false;
|
|
for (int j = i + 1; j < text.Length; j++)
|
|
{
|
|
int jChar = text.Get(j);
|
|
if ((j == i + 1 && jChar == ' ') || jChar == '<')
|
|
{
|
|
break;
|
|
} else if (jChar == '>')
|
|
{
|
|
i = j;
|
|
sawValidTag = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (sawValidTag) continue;
|
|
}
|
|
|
|
if (englishDigits.Contains((char)iChar))
|
|
{
|
|
text.Set(i, farsi ? EnglishToFarsiNumberMap[(char)iChar] : EnglishToHinduNumberMap[(char)iChar]);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Is the letter at provided index a leading letter?
|
|
/// </summary>
|
|
/// <returns><see langword="true" /> if the letter is a leading letter</returns>
|
|
private static bool IsLeadingLetter(FastStringBuilder letters, int index)
|
|
{
|
|
var currentIndexLetter = letters.Get(index);
|
|
|
|
int previousIndexLetter = default;
|
|
if (index != 0)
|
|
previousIndexLetter = letters.Get(index - 1);
|
|
|
|
int nextIndexLetter = default;
|
|
if (index < letters.Length - 1)
|
|
nextIndexLetter = letters.Get(index + 1);
|
|
|
|
bool isPreviousLetterNonConnectable = index == 0 ||
|
|
(previousIndexLetter < 0xFFFF && !TextUtils.IsGlyphFixedArabicCharacter((char)previousIndexLetter)) ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.Hamza ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.AlefMaddaAbove ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.AlefHamzaAbove ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.AlefHamzaBelow ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.WawHamzaAbove ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.Alef ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.Dal ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.Thal ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.Reh ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.Zain ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.Jeh ||
|
|
previousIndexLetter == (int)ArabicGeneralLetters.Waw ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.AlefMaddaAbove ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.AlefHamzaAbove ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.AlefHamzaBelow ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.WawHamzaAbove ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.Alef ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.Hamza ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.Dal ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.Thal ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.Reh ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.Zain ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.Jeh ||
|
|
previousIndexLetter == (int)ArabicIsolatedLetters.Waw ||
|
|
previousIndexLetter == (int)SpecialCharacters.ZeroWidthNoJoiner;
|
|
|
|
|
|
bool canThisLetterBeLeading = currentIndexLetter != ' ' &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Hamza &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.AlefHamzaAbove &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.AlefHamzaBelow &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.AlefMaddaAbove &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.WawHamzaAbove &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Alef &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Dal &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Thal &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Reh &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Zain &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Jeh &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Waw &&
|
|
currentIndexLetter != (int)SpecialCharacters.ZeroWidthNoJoiner;
|
|
|
|
|
|
bool isNextLetterConnectable = index < letters.Length - 1 &&
|
|
(nextIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)nextIndexLetter)) &&
|
|
nextIndexLetter != (int)ArabicGeneralLetters.Hamza &&
|
|
nextIndexLetter != (int)SpecialCharacters.ZeroWidthNoJoiner;
|
|
|
|
return isPreviousLetterNonConnectable &&
|
|
canThisLetterBeLeading &&
|
|
isNextLetterConnectable;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Is the letter at provided index a finishing letter?
|
|
/// </summary>
|
|
/// <returns><see langword="true" /> if the letter is a finishing letter</returns>
|
|
private static bool IsFinishingLetter(FastStringBuilder letters, int index)
|
|
{
|
|
int currentIndexLetter = letters.Get(index);
|
|
|
|
int previousIndexLetter = default;
|
|
if (index != 0)
|
|
previousIndexLetter = letters.Get(index - 1);
|
|
|
|
bool isPreviousLetterConnectable = index != 0 &&
|
|
previousIndexLetter != ' ' &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Hamza &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.AlefMaddaAbove &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.AlefHamzaAbove &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.AlefHamzaBelow &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.WawHamzaAbove &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Alef &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Dal &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Thal &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Reh &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Zain &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Jeh &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Waw &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Hamza &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.AlefMaddaAbove &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.AlefHamzaAbove &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.AlefHamzaBelow &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.WawHamzaAbove &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Alef &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Dal &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Thal &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Reh &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Zain &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Jeh &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Waw &&
|
|
previousIndexLetter != (int)SpecialCharacters.ZeroWidthNoJoiner &&
|
|
(previousIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)previousIndexLetter));
|
|
|
|
|
|
bool canThisLetterBeFinishing = currentIndexLetter != ' ' &&
|
|
currentIndexLetter != (int)SpecialCharacters.ZeroWidthNoJoiner &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Hamza;
|
|
|
|
return isPreviousLetterConnectable && canThisLetterBeFinishing;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Is the letter at provided index a middle letter?
|
|
/// </summary>
|
|
/// <returns><see langword="true" /> if the letter is a middle letter</returns>
|
|
private static bool IsMiddleLetter(FastStringBuilder letters, int index)
|
|
{
|
|
var currentIndexLetter = letters.Get(index);
|
|
|
|
int previousIndexLetter = default;
|
|
if (index != 0)
|
|
previousIndexLetter = letters.Get(index - 1);
|
|
|
|
int nextIndexLetter = default;
|
|
if (index < letters.Length - 1)
|
|
nextIndexLetter = letters.Get(index + 1);
|
|
|
|
bool middleLetterCheck = index != 0 &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Hamza &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.AlefMaddaAbove &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.AlefHamzaAbove &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.AlefHamzaBelow &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.WawHamzaAbove &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Alef &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Dal &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Thal &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Reh &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Zain &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Jeh &&
|
|
currentIndexLetter != (int)ArabicGeneralLetters.Waw &&
|
|
currentIndexLetter != (int)SpecialCharacters.ZeroWidthNoJoiner;
|
|
|
|
bool previousLetterCheck = index != 0 &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Hamza &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.AlefMaddaAbove &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.AlefHamzaAbove &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.AlefHamzaBelow &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.WawHamzaAbove &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Alef &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Dal &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Thal &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Reh &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Zain &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Jeh &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Waw &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.AlefMaddaAbove &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.AlefHamzaAbove &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.AlefHamzaBelow &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.WawHamzaAbove &&
|
|
previousIndexLetter != (int)ArabicGeneralLetters.Hamza &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Alef &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Dal &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Thal &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Reh &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Zain &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Jeh &&
|
|
previousIndexLetter != (int)ArabicIsolatedLetters.Waw &&
|
|
previousIndexLetter != (int)SpecialCharacters.ZeroWidthNoJoiner &&
|
|
(previousIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)previousIndexLetter));
|
|
|
|
bool nextLetterCheck = index < letters.Length - 1 &&
|
|
(nextIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)nextIndexLetter)) &&
|
|
nextIndexLetter != (int)SpecialCharacters.ZeroWidthNoJoiner &&
|
|
nextIndexLetter != (int)ArabicGeneralLetters.Hamza &&
|
|
nextIndexLetter != (int)ArabicIsolatedLetters.Hamza;
|
|
|
|
return nextLetterCheck && previousLetterCheck && middleLetterCheck;
|
|
}
|
|
}
|
|
} |