% ---------------------------------------------------------------------------- % % fontwrap inserts fonts into your XeLaTeX text based on which unicode blocks % characters belong to. For instance, something like: % % "This is English with 日本語 interspersed" % % will be turned into become: % % "\fontspec{\unicodeblockLatinFont}This is English with \fontspec{\unicodeblockJapaneseFont}日本語\fontspec{\unicodeblockLatinFont} interspersed" % % This is something that most word processors can do already, and it was sort % silly that LaTeX didn't have a general scheme for this. After all, LaTeX should % be better than word processors! ^_^ % % That said, you can bind different fonts to different unicode blocks (the best % font for Japanese, for instance, is definitely not the best font for Chinese, % both of which are probably pretty poor English fonts... for instance), % Any unicode block that you didn't bind yourself will use the defaults: % % \langfonddefaultfont for anything not CJK % \langfonddefaultCJKfont for anything CJK (including the separate japanese and korean blocks) % % June 2008, Mike Kamermans, % nihongoresources.com (C) % ---------------------------------------------------------------------------- \ProvidesPackage{fontwrap} % enables use of system fonts \usepackage{fontspec} % enables full support for unicode text \usepackage{xunicode} % fixes some older command behaviour to current - see http://www.ctan.org/tex-archive/help/Catalogue/entries/xltxtra.html \usepackage{xltxtra} % also get some external Perl logic going \usepackage{perltex} % ---------------------------------------------------------------------------- % % The actually interesting method. This parses the input and inserts \fontspec % commands where necessary. % % ---------------------------------------------------------------------------- % default font - a fairly complete unicode font \newcommand{\fontwrapdefaultfont}{Bitstream Cyberbit} % default font for CJK languages - another pretty complete unicode font \newcommand{\fontwrapdefaultCJKfont}{Code2000} % --------------------------------------------------------- % Font bindings. Font bindings for 158 unicode blocks... % --------------------------------------------------------- \newcommand{\unicodeblockBasicLatinFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockHiraganaFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockCJKUnifiedIdeographsFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockKatakanaFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockAegeanNumbersFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockAlphabeticPresentationFormsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockAncientGreekMusicalNotationFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockAncientGreekNumbersFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockArabicFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockArabicPresentationFormsAFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockArabicPresentationFormsBFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockArabicSupplementFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockArmenianFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockArrowsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockBalineseFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockBengaliFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockBlockElementsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockBopomofoFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockBopomofoExtendedFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockBoxDrawingFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockBraillePatternsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockBugineseFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockBuhidFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockByzantineMusicalSymbolsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCherokeeFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCJKCompatibilityFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockCJKCompatibilityFormsFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockCJKCompatibilityIdeographsFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockCJKCompatibilityIdeographsSupplementFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockCJKRadicalsSupplementFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockCJKStrokesFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockCJKSymbolsandPunctuationFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockCJKUnifiedIdeographsExtensionAFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockCJKUnifiedIdeographsExtensionBFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockCombiningDiacriticalMarksFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCombiningDiacriticalMarksforSymbolsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCombiningDiacriticalMarksSupplementFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCombiningHalfMarksFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockControlPicturesFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCopticFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCountingRodNumeralsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCuneiformFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCuneiformNumbersandPunctuationFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCurrencySymbolsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCypriotSyllabaryFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCyrillicFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCyrillicExtendedAFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCyrillicExtendedBFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockCyrillicSupplementFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockDeseretFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockDevanagariFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockDingbatsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockDominoTilesFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockEnclosedAlphanumericsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockEnclosedCJKLettersandMonthsFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockEthiopicFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockEthiopicExtendedFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockEthiopicSupplementFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockGeneralPunctuationFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockGeometricShapesFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockGeorgianFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockGeorgianSupplementFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockGlagoliticFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockGothicFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockGreekandCopticFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockGreekExtendedFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockGujaratiFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockGurmukhiFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockHalfwidthandFullwidthFormsFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockHangulCompatibilityJamoFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockHangulJamoFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockHangulSyllablesFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockHanunooFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockHebrewFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockHighPrivateUseSurrogatesFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockHighSurrogatesFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockIdeographicDescriptionCharactersFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockIPAExtensionsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockKanbunFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockKangxiRadicalsFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockKannadaFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockKatakanaPhoneticExtensionsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockKharoshthiFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockKhmerFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockKhmerSymbolsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLaoFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLatinExtendedAdditionalFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLatinExtendedAFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLatinExtendedBFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLatinExtendedCFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLatinExtendedDFont}{\fontwrapdefaultfont} % in keeping with uniformity, technically this should be unicodeblockLatin1SupplementFont, but numbers are not allowed in macro names... \newcommand{\unicodeblockLatinSupplementFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLetterlikeSymbolsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLimbuFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLinearBIdeogramsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLinearBSyllabaryFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockLowSurrogatesFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMahjongTilesFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMalayalamFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMathematicalAlphanumericSymbolsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMathematicalOperatorsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMiscellaneousMathematicalSymbolsAFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMiscellaneousMathematicalSymbolsBFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMiscellaneousSymbolsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMiscellaneousSymbolsandArrowsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMiscellaneousTechnicalFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockModifierToneLettersFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMongolianFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMusicalSymbolsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockMyanmarFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockNewTaiLueFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockNKoFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockNumberFormsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockOghamFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockOldItalicFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockOldPersianFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockOpticalCharacterRecognitionFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockOriyaFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockOsmanyaFont}{\fontwrapdefaultfont} % because hyphens are also not allowed in macros, "Phags-pa" has been turned into the less correct PhagsPa \newcommand{\unicodeblockPhagsPaFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockPhoenicianFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockPhoneticExtensionsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockPhoneticExtensionsSupplementFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockPrivateUseAreaFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockRunicFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockShavianFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSinhalaFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSmallFormVariantsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSpacingModifierLettersFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSpecialsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSuperscriptsandSubscriptsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSupplementalArrowsAFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSupplementalArrowsBFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSupplementalMathematicalOperatorsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSupplementalPunctuationFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSupplementaryPrivateUseAreaAFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSupplementaryPrivateUseAreaBFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSylotiNagriFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockSyriacFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockTagalogFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockTagbanwaFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockTagsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockTaiLeFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockTaiXuanJingSymbolsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockTamilFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockTeluguFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockThaanaFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockThaiFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockTibetanFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockTifinaghFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockUgariticFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockUnifiedCanadianAboriginalSyllabicsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockVariationSelectorsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockVariationSelectorsSupplementFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockVerticalFormsFont}{\fontwrapdefaultfont} \newcommand{\unicodeblockYiRadicalsFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockYiSyllablesFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodeblockYijingHexagramSymbolsFont}{\fontwrapdefaultCJKfont} % % I don't want to write a huge ifthen construction - I prefer % to rely on parse passes instead. % \perlnewcommand{\setunicodeblockfont}[2]{ return "\\renewcommand{\\unicodeblock".$_[0]."Font}{".$_[1]."}"; } % ------------------------------------------------------------------ % % in addition to the unicode blocks, also mark the informal groups % % ------------------------------------------------------------------ \newcommand{\unicodegroupArabicFont}{\fontwrapdefaultfont} \newcommand{\unicodegroupChineseFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodegroupCJKFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodegroupCyrillicFont}{\fontwrapdefaultfont} \newcommand{\unicodegroupDiacriticsFont}{\fontwrapdefaultfont} \newcommand{\unicodegroupGreekFont}{\fontwrapdefaultfont} \newcommand{\unicodegroupKoreanFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodegroupJapaneseFont}{\fontwrapdefaultCJKfont} \newcommand{\unicodegroupLatinFont}{\fontwrapdefaultfont} \newcommand{\unicodegroupMathematicsFont}{\fontwrapdefaultfont} \newcommand{\unicodegroupPhoneticsFont}{\fontwrapdefaultfont} \newcommand{\unicodegroupPunctuationFont}{\fontwrapdefaultfont} \newcommand{\unicodegroupSymbolsFont}{\fontwrapdefaultfont} \newcommand{\unicodegroupYiFont}{\fontwrapdefaultfont} \newcommand{\unicodegroupOtherFont}{\fontwrapdefaultfont} % ------------------------------------------------------------------------------- % % Then, informal group font binding, to make life just a little easier. % The following informal groups are supported: % % - Arabic % - Chinese (including bopomofo) % - CJK (Chinese/Japanese/Korean) % - Cyrillic % - Diacritics % - Greek % - Japanese % - Korean (=Hangul) % - Latin % - Mathematics % - Phonetics % - Punctuation % - Symbols % - Yi % - Other (I am not a fan of lump groups. I hope to un-lump most of it) % % ------------------------------------------------------------------------------- \newcommand{\setunicodegroupArabicFont}[1]{   \renewcommand{\unicodeblockArabicFont}{#1}   \renewcommand{\unicodeblockArabicPresentationFormsAFont}{#1}   \renewcommand{\unicodeblockArabicPresentationFormsBFont}{#1}   \renewcommand{\unicodeblockArabicSupplementFont}{#1} \renewcommand{\unicodegroupArabicFont}{#1} } \newcommand{\setunicodegroupChineseFont}[1]{ \renewcommand{\unicodeblockBopomofoFont}{#1} \renewcommand{\unicodeblockBopomofoExtendedFont}{#1} \renewcommand{\unicodeblockCJKCompatibilityFont}{#1} \renewcommand{\unicodeblockCJKCompatibilityFormsFont}{#1} \renewcommand{\unicodeblockCJKCompatibilityIdeographsFont}{#1} \renewcommand{\unicodeblockCJKCompatibilityIdeographsSupplementFont}{#1} \renewcommand{\unicodeblockCJKRadicalsSupplementFont}{#1} \renewcommand{\unicodeblockCJKStrokesFont}{#1} \renewcommand{\unicodeblockCJKSymbolsandPunctuationFont}{#1} \renewcommand{\unicodeblockCJKUnifiedIdeographsFont}{#1} \renewcommand{\unicodeblockCJKUnifiedIdeographsExtensionAFont}{#1} \renewcommand{\unicodeblockCJKUnifiedIdeographsExtensionBFont}{#1} \renewcommand{\unicodeblockEnclosedCJKLettersandMonthsFont}{#1} \renewcommand{\unicodeblockIdeographicDescriptionCharactersFont}{#1} \renewcommand{\unicodeblockKangxiRadicalsFont}{#1} \renewcommand{\unicodegroupChineseFont}{#1} } \newcommand{\setunicodegroupCJKFont}[1]{ \renewcommand{\unicodeblockBopomofoFont}{#1} \renewcommand{\unicodeblockBopomofoExtendedFont}{#1} \renewcommand{\unicodeblockCJKCompatibilityFont}{#1} \renewcommand{\unicodeblockCJKCompatibilityFormsFont}{#1} \renewcommand{\unicodeblockCJKCompatibilityIdeographsFont}{#1} \renewcommand{\unicodeblockCJKCompatibilityIdeographsSupplementFont}{#1} \renewcommand{\unicodeblockCJKRadicalsSupplementFont}{#1} \renewcommand{\unicodeblockCJKStrokesFont}{#1} \renewcommand{\unicodeblockCJKSymbolsandPunctuationFont}{#1} \renewcommand{\unicodeblockCJKUnifiedIdeographsFont}{#1} \renewcommand{\unicodeblockCJKUnifiedIdeographsExtensionAFont}{#1} \renewcommand{\unicodeblockCJKUnifiedIdeographsExtensionBFont}{#1} \renewcommand{\unicodeblockEnclosedCJKLettersandMonthsFont}{#1} \renewcommand{\unicodeblockHalfwidthandFullwidthFormsFont}{#1} \renewcommand{\unicodeblockHangulCompatibilityJamoFont}{#1} \renewcommand{\unicodeblockHangulJamoFont}{#1} \renewcommand{\unicodeblockHangulSyllablesFont}{#1} \renewcommand{\unicodeblockHiraganaFont}{#1} \renewcommand{\unicodeblockIdeographicDescriptionCharactersFont}{#1} \renewcommand{\unicodeblockKanbunFont}{#1} \renewcommand{\unicodeblockKangxiRadicalsFont}{#1} \renewcommand{\unicodeblockKatakanaFont}{#1} \renewcommand{\unicodeblockKatakanaPhoneticExtensionsFont}{#1} \renewcommand{\unicodegroupCJKFont}{#1} } \newcommand{\setunicodegroupCyrillicFont}[1]{ \renewcommand{\unicodeblockCyrillicFont}{#1} \renewcommand{\unicodeblockCyrillicExtendedAFont}{#1} \renewcommand{\unicodeblockCyrillicExtendedBFont}{#1} \renewcommand{\unicodeblockCyrillicSupplementFont}{#1} \renewcommand{\unicodegroupCyrillicFont}{#1} } \newcommand{\setunicodegroupDiacriticsFont}[1]{   \renewcommand{\unicodeblockCombiningDiacriticalMarksFont}{#1}   \renewcommand{\unicodeblockCombiningDiacriticalMarksforSymbolsFont}{#1}   \renewcommand{\unicodeblockCombiningDiacriticalMarksSupplementFont}{#1}   \renewcommand{\unicodeblockCombiningHalfMarksFont}{#1} \renewcommand{\unicodegroupDiacriticsFont}{#1} } \newcommand{\setunicodegroupGreekFont}[1]{   \renewcommand{\unicodeblockGreekandCopticFont}{#1}   \renewcommand{\unicodeblockGreekExtendedFont}{#1} \renewcommand{\unicodegroupGreekFont}{#1} } \newcommand{\setunicodegroupKoreanFont}[1]{ \renewcommand{\unicodeblockHangulCompatibilityJamoFont}{#1} \renewcommand{\unicodeblockHangulJamoFont}{#1} \renewcommand{\unicodeblockHangulSyllablesFont}{#1} \renewcommand{\unicodegroupKoreanFont}{#1} } \newcommand{\setunicodegroupJapaneseFont}[1]{ \renewcommand{\unicodeblockHalfwidthandFullwidthFormsFont}{#1} \renewcommand{\unicodeblockHiraganaFont}{#1} \renewcommand{\unicodeblockKanbunFont}{#1} \renewcommand{\unicodeblockKatakanaFont}{#1} \renewcommand{\unicodeblockKatakanaPhoneticExtensionsFont}{#1} \renewcommand{\unicodeblockKangxiRadicalsFont}{#1} \renewcommand{\unicodegroupJapaneseFont}{#1} } \newcommand{\setunicodegroupLatinFont}[1]{   \renewcommand{\unicodeblockBasicLatinFont}{#1}   \renewcommand{\unicodeblockLatinExtendedAdditionalFont}{#1}   \renewcommand{\unicodeblockLatinExtendedAFont}{#1}   \renewcommand{\unicodeblockLatinExtendedBFont}{#1}   \renewcommand{\unicodeblockLatinExtendedCFont}{#1}   \renewcommand{\unicodeblockLatinExtendedDFont}{#1}   \renewcommand{\unicodeblockLatinSupplementFont}{#1} \renewcommand{\unicodeblockAlphabeticPresentationFormsFont}{#1} \renewcommand{\unicodegroupLatinFont}{#1} } \newcommand{\setunicodegroupMathematicsFont}[1]{ \renewcommand{\unicodeblockMathematicalAlphanumericSymbolsFont}{#1} \renewcommand{\unicodeblockMathematicalOperatorsFont}{#1} \renewcommand{\unicodeblockMiscellaneousMathematicalSymbolsAFont}{#1} \renewcommand{\unicodeblockMiscellaneousMathematicalSymbolsBFont}{#1} \renewcommand{\unicodeblockSupplementalMathematicalOperatorsFont}{#1} \renewcommand{\unicodegroupMathematicsFont}{#1} } \newcommand{\setunicodegroupPhoneticsFont}[1]{ \renewcommand{\unicodeblockIPAExtensionsFont}{#1} \renewcommand{\unicodeblockPhoneticExtensionsFont}{#1} \renewcommand{\unicodeblockPhoneticExtensionsSupplementFont}{#1} \renewcommand{\unicodegroupPhoneticsFont}{#1} } \newcommand{\setunicodegroupPunctuationFont}[1]{ \renewcommand{\unicodeblockGeneralPunctuationFont}{#1} \renewcommand{\unicodeblockSupplementalPunctuationFont}{#1} \renewcommand{\unicodegroupPunctuationFont}{#1} } \newcommand{\setunicodegroupSymbolsFont}[1]{ \renewcommand{\unicodeblockArrowsFont}{#1} \renewcommand{\unicodeblockBoxDrawingFont}{#1} \renewcommand{\unicodeblockByzantineMusicalSymbolsFont}{#1} \renewcommand{\unicodeblockControlPicturesFont}{#1} \renewcommand{\unicodeblockCurrencySymbolsFont}{#1} \renewcommand{\unicodeblockGeometricShapesFont}{#1} \renewcommand{\unicodeblockLetterlikeSymbolsFont}{#1} \renewcommand{\unicodeblockMiscellaneousSymbolsFont}{#1} \renewcommand{\unicodeblockMiscellaneousSymbolsandArrowsFont}{#1} \renewcommand{\unicodeblockMiscellaneousTechnicalFont}{#1} \renewcommand{\unicodeblockSupplementalArrowsAFont}{#1} \renewcommand{\unicodeblockSupplementalArrowsBFont}{#1} \renewcommand{\unicodeblockVariationSelectorsFont}{#1} \renewcommand{\unicodeblockVariationSelectorsSupplementFont}{#1} \renewcommand{\unicodegroupSymbolsFont}{#1} } \newcommand{\setunicodegroupYiFont}[1]{ \renewcommand{\unicodeblockYiRadicalsFont}{#1} \renewcommand{\unicodeblockYiSyllablesFont}{#1} \renewcommand{\unicodegroupYiFont}{#1} } \newcommand{\setunicodegroupOtherFont}[1]{ \renewcommand{\unicodeblockAegeanNumbersFont}{#1} \renewcommand{\unicodeblockAncientGreekMusicalNotationFont}{#1} \renewcommand{\unicodeblockAncientGreekNumbersFont}{#1} \renewcommand{\unicodeblockArmenianFont}{#1} \renewcommand{\unicodeblockBalineseFont}{#1} \renewcommand{\unicodeblockBengaliFont}{#1} \renewcommand{\unicodeblockBlockElementsFont}{#1} \renewcommand{\unicodeblockBraillePatternsFont}{#1} \renewcommand{\unicodeblockBugineseFont}{#1} \renewcommand{\unicodeblockBuhidFont}{#1} \renewcommand{\unicodeblockCherokeeFont}{#1} \renewcommand{\unicodeblockCopticFont}{#1} \renewcommand{\unicodeblockCountingRodNumeralsFont}{#1} \renewcommand{\unicodeblockCuneiformFont}{#1} \renewcommand{\unicodeblockCuneiformNumbersandPunctuationFont}{#1} \renewcommand{\unicodeblockCypriotSyllabaryFont}{#1} \renewcommand{\unicodeblockDeseretFont}{#1} \renewcommand{\unicodeblockDevanagariFont}{#1} \renewcommand{\unicodeblockDingbatsFont}{#1} \renewcommand{\unicodeblockDominoTilesFont}{#1} \renewcommand{\unicodeblockEnclosedAlphanumericsFont}{#1} \renewcommand{\unicodeblockEthiopicFont}{#1} \renewcommand{\unicodeblockEthiopicExtendedFont}{#1} \renewcommand{\unicodeblockEthiopicSupplementFont}{#1} \renewcommand{\unicodeblockGeorgianFont}{#1} \renewcommand{\unicodeblockGeorgianSupplementFont}{#1} \renewcommand{\unicodeblockGlagoliticFont}{#1} \renewcommand{\unicodeblockGothicFont}{#1} \renewcommand{\unicodeblockGujaratiFont}{#1} \renewcommand{\unicodeblockGurmukhiFont}{#1} \renewcommand{\unicodeblockHanunooFont}{#1} \renewcommand{\unicodeblockHebrewFont}{#1} \renewcommand{\unicodeblockHighPrivateUseSurrogatesFont}{#1} \renewcommand{\unicodeblockHighSurrogatesFont}{#1} \renewcommand{\unicodeblockKannadaFont}{#1} \renewcommand{\unicodeblockKharoshthiFont}{#1} \renewcommand{\unicodeblockKhmerFont}{#1} \renewcommand{\unicodeblockKhmerSymbolsFont}{#1} \renewcommand{\unicodeblockLaoFont}{#1} \renewcommand{\unicodeblockLimbuFont}{#1} \renewcommand{\unicodeblockLinearBIdeogramsFont}{#1} \renewcommand{\unicodeblockLinearBSyllabaryFont}{#1} \renewcommand{\unicodeblockLowSurrogatesFont}{#1} \renewcommand{\unicodeblockMahjongTilesFont}{#1} \renewcommand{\unicodeblockMalayalamFont}{#1} \renewcommand{\unicodeblockModifierToneLettersFont}{#1} \renewcommand{\unicodeblockMongolianFont}{#1} \renewcommand{\unicodeblockMusicalSymbolsFont}{#1} \renewcommand{\unicodeblockMyanmarFont}{#1} \renewcommand{\unicodeblockNewTaiLueFont}{#1} \renewcommand{\unicodeblockNKoFont}{#1} \renewcommand{\unicodeblockNumberFormsFont}{#1} \renewcommand{\unicodeblockOghamFont}{#1} \renewcommand{\unicodeblockOldItalicFont}{#1} \renewcommand{\unicodeblockOldPersianFont}{#1} \renewcommand{\unicodeblockOpticalCharacterRecognitionFont}{#1} \renewcommand{\unicodeblockOriyaFont}{#1} \renewcommand{\unicodeblockOsmanyaFont}{#1} \renewcommand{\unicodeblockPhagsPaFont}{#1} \renewcommand{\unicodeblockPhoenicianFont}{#1} \renewcommand{\unicodeblockPrivateUseAreaFont}{#1} \renewcommand{\unicodeblockRunicFont}{#1} \renewcommand{\unicodeblockShavianFont}{#1} \renewcommand{\unicodeblockSinhalaFont}{#1} \renewcommand{\unicodeblockSmallFormVariantsFont}{#1} \renewcommand{\unicodeblockSpacingModifierLettersFont}{#1} \renewcommand{\unicodeblockSpecialsFont}{#1} \renewcommand{\unicodeblockSuperscriptsandSubscriptsFont}{#1} \renewcommand{\unicodeblockSupplementaryPrivateUseAreaAFont}{#1} \renewcommand{\unicodeblockSupplementaryPrivateUseAreaBFont}{#1} \renewcommand{\unicodeblockSylotiNagriFont}{#1} \renewcommand{\unicodeblockSyriacFont}{#1} \renewcommand{\unicodeblockTagalogFont}{#1} \renewcommand{\unicodeblockTagbanwaFont}{#1} \renewcommand{\unicodeblockTagsFont}{#1} \renewcommand{\unicodeblockTaiLeFont}{#1} \renewcommand{\unicodeblockTaiXuanJingSymbolsFont}{#1} \renewcommand{\unicodeblockTamilFont}{#1} \renewcommand{\unicodeblockTeluguFont}{#1} \renewcommand{\unicodeblockThaanaFont}{#1} \renewcommand{\unicodeblockThaiFont}{#1} \renewcommand{\unicodeblockTibetanFont}{#1} \renewcommand{\unicodeblockTifinaghFont}{#1} \renewcommand{\unicodeblockUgariticFont}{#1} \renewcommand{\unicodeblockUnifiedCanadianAboriginalSyllabicsFont}{#1} \renewcommand{\unicodeblockVerticalFormsFont}{#1} \renewcommand{\unicodeblockYijingHexagramSymbolsFont}{#1} \renewcommand{\unicodegroupOtherFont}{#1} } \newcommand{\setfontwrapdefaultfont}[1]{ \setunicodegroupArabicFont{#1} \setunicodegroupCJKFont{#1} \setunicodegroupCyrillicFont{#1} \setunicodegroupDiacriticsFont{#1} \setunicodegroupGreekFont{#1} \setunicodegroupLatinFont{#1} \setunicodegroupMathematicsFont{#1} \setunicodegroupPhoneticsFont{#1} \setunicodegroupPunctuationFont{#1} \setunicodegroupSymbolsFont{#1} \setunicodegroupYiFont{#1} \setunicodegroupOtherFont{#1} } % % I don't want to write a huge ifthen construction here either % \perlnewcommand{\setunicodegroupfont}[2]{ return "\\setunicodegroup".$_[0]."Font{".$_[1]."}"; } % ------------------------------------------------------------------------------------ % % then, the actual block data definitions. These consist of a comma delimited list of: % % - unicode block name % - informal group the block is in % - codepoint of the first glyph in the block, in hexadecimal notation % - codepoint of the last glyph in the block, in hexadecimal notation % - number of glyphs in the block % - font macro, which is passed as macro argument % % ------------------------------------------------------------------------------------ \newcommand{\unicodeblockAegeanNumbers}[1]{Aegean Numbers,Other,10100,1013F,57,#1} \newcommand{\unicodeblockAlphabeticPresentationForms}[1]{Alphabetic Presentation Forms,Ligatures,FB00,FB4F,58,#1} \newcommand{\unicodeblockAncientGreekMusicalNotation}[1]{Ancient Greek Musical Notation,Other,1D200,1D24F,70,#1} \newcommand{\unicodeblockAncientGreekNumbers}[1]{Ancient Greek Numbers,Other,10140,1018F,75,#1} \newcommand{\unicodeblockArabic}[1]{Arabic,Arabic,0600,06FF,235,#1} \newcommand{\unicodeblockArabicPresentationFormsA}[1]{Arabic Presentation Forms-A,Arabic,FB50,FDFF,595,#1} \newcommand{\unicodeblockArabicPresentationFormsB}[1]{Arabic Presentation Forms-B,Arabic,FE70,FEFF,141,#1} \newcommand{\unicodeblockArabicSupplement}[1]{Arabic Supplement,Arabic,0750,077F,30,#1} \newcommand{\unicodeblockArmenian}[1]{Armenian,Other,0530,058F,86,#1} \newcommand{\unicodeblockArrows}[1]{Arrows,Symbols,2190,21FF,112,#1} \newcommand{\unicodeblockBalinese}[1]{Balinese,Other,1B00,1B7F,121,#1} \newcommand{\unicodeblockBasicLatin}[1]{Basic Latin,Latin,0000,007F,128,#1} \newcommand{\unicodeblockBengali}[1]{Bengali,Other,0980,09FF,91,#1} \newcommand{\unicodeblockBlockElements}[1]{Block Elements,Other,2580,259F,32,#1} \newcommand{\unicodeblockBopomofo}[1]{Bopomofo,Bopomofo,3100,312F,40,#1} \newcommand{\unicodeblockBopomofoExtended}[1]{Bopomofo Extended,Bopomofo,31A0,31BF,24,#1} \newcommand{\unicodeblockBoxDrawing}[1]{Box Drawing,Symbols,2500,257F,128,#1} \newcommand{\unicodeblockBraillePatterns}[1]{Braille Patterns,Other,2800,28FF,256,#1} \newcommand{\unicodeblockBuginese}[1]{Buginese,Other,1A00,1A1F,30,#1} \newcommand{\unicodeblockBuhid}[1]{Buhid,Other,1740,175F,20,#1} \newcommand{\unicodeblockByzantineMusicalSymbols}[1]{Byzantine Musical Symbols,Symbols,1D000,1D0FF,246,#1} \newcommand{\unicodeblockCherokee}[1]{Cherokee,Other,13A0,13FF,85,#1} \newcommand{\unicodeblockCJKCompatibility}[1]{CJK Compatibility,CJK,3300,33FF,256,#1} \newcommand{\unicodeblockCJKCompatibilityForms}[1]{CJK Compatibility Forms,CJK,FE30,FE4F,32,#1} \newcommand{\unicodeblockCJKCompatibilityIdeographs}[1]{CJK Compatibility Ideographs,CJK,F900,FAFF,467,#1} \newcommand{\unicodeblockCJKCompatibilityIdeographsSupplement}[1]{CJK Compatibility Ideographs Supplement,CJK,2F800,2FA1F,542,#1} \newcommand{\unicodeblockCJKRadicalsSupplement}[1]{CJK Radicals Supplement,CJK,2E80,2EFF,115,#1} \newcommand{\unicodeblockCJKStrokes}[1]{CJK Strokes,CJK,31C0,31EF,16,#1} \newcommand{\unicodeblockCJKSymbolsandPunctuation}[1]{CJK Symbols and Punctuation,CJK,3000,303F,64,#1} \newcommand{\unicodeblockCJKUnifiedIdeographs}[1]{CJK Unified Ideographs,CJK,4E00,9FFF,20924,#1} \newcommand{\unicodeblockCJKUnifiedIdeographsExtensionA}[1]{CJK Unified Ideographs Extension A,CJK,3400,4DBF,6582,#1} \newcommand{\unicodeblockCJKUnifiedIdeographsExtensionB}[1]{CJK Unified Ideographs Extension B,CJK,20000,2A6DF,42711,#1} \newcommand{\unicodeblockCombiningDiacriticalMarks}[1]{Combining Diacritical Marks,Diacritics,0300,036F,112,#1} \newcommand{\unicodeblockCombiningDiacriticalMarksforSymbols}[1]{Combining Diacritical Marks for Symbols,Diacritics,20D0,20FF,32,#1} \newcommand{\unicodeblockCombiningDiacriticalMarksSupplement}[1]{Combining Diacritical Marks Supplement,Diacritics,1DC0,1DFF,13,#1} \newcommand{\unicodeblockCombiningHalfMarks}[1]{Combining Half Marks,Diacritics,FE20,FE2F,4,#1} \newcommand{\unicodeblockControlPictures}[1]{Control Pictures,Symbols,2400,243F,39,#1} \newcommand{\unicodeblockCoptic}[1]{Coptic,Other,2C80,2CFF,114,#1} \newcommand{\unicodeblockCountingRodNumerals}[1]{Counting Rod Numerals,Other,1D360,1D37F,18,#1} \newcommand{\unicodeblockCuneiform}[1]{Cuneiform,Other,12000,123FF,879,#1} \newcommand{\unicodeblockCuneiformNumbersandPunctuation}[1]{Cuneiform Numbers and Punctuation,Other,12400,1247F,103,#1} \newcommand{\unicodeblockCurrencySymbols}[1]{Currency Symbols,Symbols,20A0,20CF,22,#1} \newcommand{\unicodeblockCypriotSyllabary}[1]{Cypriot Syllabary,Other,10800,1083F,55,#1} \newcommand{\unicodeblockCyrillic}[1]{Cyrillic,Cyrillic,0400,04FF,255,#1} \newcommand{\unicodeblockCyrillicExtendedA}[1]{Cyrillic Extended-A,Cyrillic,2DE0,2DFF,32,#1} \newcommand{\unicodeblockCyrillicExtendedB}[1]{Cyrillic Extended-B,Cyrillic,A640,A69f,78,#1} \newcommand{\unicodeblockCyrillicSupplement}[1]{Cyrillic Supplement,Cyrillic,0500,052F,20,#1} \newcommand{\unicodeblockDeseret}[1]{Deseret,Other,10400,1044F,80,#1} \newcommand{\unicodeblockDevanagari}[1]{Devanagari,Other,0900,097F,110,#1} \newcommand{\unicodeblockDingbats}[1]{Dingbats,Other,2700,27BF,174,#1} \newcommand{\unicodeblockDominoTiles}[1]{Domino Tiles,Other,1F030,1F09F,100,#1} \newcommand{\unicodeblockEnclosedAlphanumerics}[1]{Enclosed Alphanumerics,Other,2460,24FF,160,#1} \newcommand{\unicodeblockEnclosedCJKLettersandMonths}[1]{Enclosed CJK Letters and Months,CJK,3200,32FF,242,#1} \newcommand{\unicodeblockEthiopic}[1]{Ethiopic,Other,1200,137F,356,#1} \newcommand{\unicodeblockEthiopicExtended}[1]{Ethiopic Extended,Other,2D80,2DDF,79,#1} \newcommand{\unicodeblockEthiopicSupplement}[1]{Ethiopic Supplement,Other,1380,139F,26,#1} \newcommand{\unicodeblockGeneralPunctuation}[1]{General Punctuation,Punctuation,2000,206F,106,#1} \newcommand{\unicodeblockGeometricShapes}[1]{Geometric Shapes,Symbols,25A0,25FF,96,#1} \newcommand{\unicodeblockGeorgian}[1]{Georgian,Other,10A0,10FF,83,#1} \newcommand{\unicodeblockGeorgianSupplement}[1]{Georgian Supplement,Other,2D00,2D2F,38,#1} \newcommand{\unicodeblockGlagolitic}[1]{Glagolitic,Other,2C00,2C5F,94,#1} \newcommand{\unicodeblockGothic}[1]{Gothic,Other,10330,1034F,27,#1} \newcommand{\unicodeblockGreekandCoptic}[1]{Greek and Coptic,Greek,0370,03FF,127,#1} \newcommand{\unicodeblockGreekExtended}[1]{Greek Extended,Greek,1F00,1FFF,233,#1} \newcommand{\unicodeblockGujarati}[1]{Gujarati,Other,0A80,0AFF,83,#1} \newcommand{\unicodeblockGurmukhi}[1]{Gurmukhi,Other,0A00,0A7F,77,#1} \newcommand{\unicodeblockHalfwidthandFullwidthForms}[1]{Halfwidth and Fullwidth Forms,CJK,FF00,FFEF,225,#1} \newcommand{\unicodeblockHangulCompatibilityJamo}[1]{Hangul Compatibility Jamo,Hangul,3130,318F,94,#1} \newcommand{\unicodeblockHangulJamo}[1]{Hangul Jamo,Hangul,1100,11FF,240,#1} \newcommand{\unicodeblockHangulSyllables}[1]{Hangul Syllables,Hangul,AC00,D7AF,2,#1} \newcommand{\unicodeblockHanunoo}[1]{Hanunoo,Other,1720,173F,23,#1} \newcommand{\unicodeblockHebrew}[1]{Hebrew,Other,0590,05FF,87,#1} \newcommand{\unicodeblockHighPrivateUseSurrogates}[1]{High Private Use Surrogates,Other,DB80,DBFF,2,#1} \newcommand{\unicodeblockHighSurrogates}[1]{High Surrogates,Other,D800,DB7F,2,#1} \newcommand{\unicodeblockHiragana}[1]{Hiragana,CJK,3040,309F,93,#1} \newcommand{\unicodeblockIdeographicDescriptionCharacters}[1]{Ideographic Description Characters,CJK,2FF0,2FFF,12,#1} \newcommand{\unicodeblockIPAExtensions}[1]{IPA Extensions,Phonetics,0250,02AF,96,#1} \newcommand{\unicodeblockKanbun}[1]{Kanbun,CJK,3190,319F,16,#1} \newcommand{\unicodeblockKangxiRadicals}[1]{Kangxi Radicals,CJK,2F00,2FDF,214,#1} \newcommand{\unicodeblockKannada}[1]{Kannada,Other,0C80,0CFF,86,#1} \newcommand{\unicodeblockKatakana}[1]{Katakana,CJK,30A0,30FF,96,#1} \newcommand{\unicodeblockKatakanaPhoneticExtensions}[1]{Katakana Phonetic Extensions,CJK,31F0,31FF,16,#1} \newcommand{\unicodeblockKharoshthi}[1]{Kharoshthi,Other,10A00,10A5F,65,#1} \newcommand{\unicodeblockKhmer}[1]{Khmer,Other,1780,17FF,114,#1} \newcommand{\unicodeblockKhmerSymbols}[1]{Khmer Symbols,Other,19E0,19FF,32,#1} \newcommand{\unicodeblockLao}[1]{Lao,Other,0E80,0EFF,65,#1} \newcommand{\unicodeblockLatinExtendedAdditional}[1]{Latin Extended Additional,Latin,1E00,1EFF,246,#1} \newcommand{\unicodeblockLatinExtendedA}[1]{Latin Extended-A,Latin,0100,017F,128,#1} \newcommand{\unicodeblockLatinExtendedB}[1]{Latin Extended-B,Latin,0180,024F,208,#1} \newcommand{\unicodeblockLatinExtendedC}[1]{Latin Extended-C,Latin,2C60,2C7F,17,#1} \newcommand{\unicodeblockLatinExtendedD}[1]{Latin Extended-D,Latin,A720,A7FF,2,#1} \newcommand{\unicodeblockLatinSupplement}[1]{Latin-1 Supplement,Latin,0080,00FF,128,#1} \newcommand{\unicodeblockLetterlikeSymbols}[1]{Letterlike Symbols,Symbols,2100,214F,79,#1} \newcommand{\unicodeblockLimbu}[1]{Limbu,Other,1900,194F,66,#1} \newcommand{\unicodeblockLinearBIdeograms}[1]{Linear B Ideograms,Other,10080,100FF,123,#1} \newcommand{\unicodeblockLinearBSyllabary}[1]{Linear B Syllabary,Other,10000,1007F,88,#1} \newcommand{\unicodeblockLowSurrogates}[1]{Low Surrogates,Other,DC00,DFFF,2,#1} \newcommand{\unicodeblockMahjongTiles}[1]{Mahjong Tiles,Other,1F000,1F02F,44,#1} \newcommand{\unicodeblockMalayalam}[1]{Malayalam,Other,0D00,0D7F,78,#1} \newcommand{\unicodeblockMathematicalAlphanumericSymbols}[1]{Mathematical Alphanumeric Symbols,Mathematics,1D400,1D7FF,996,#1} \newcommand{\unicodeblockMathematicalOperators}[1]{Mathematical Operators,Mathematics,2200,22FF,256,#1} \newcommand{\unicodeblockMiscellaneousMathematicalSymbolsA}[1]{Miscellaneous Mathematical Symbols-A,Mathematics,27C0,27EF,39,#1} \newcommand{\unicodeblockMiscellaneousMathematicalSymbolsB}[1]{Miscellaneous Mathematical Symbols-B,Mathematics,2980,29FF,128,#1} \newcommand{\unicodeblockMiscellaneousSymbols}[1]{Miscellaneous Symbols,Symbols,2600,26FF,176,#1} \newcommand{\unicodeblockMiscellaneousSymbolsandArrows}[1]{Miscellaneous Symbols and Arrows,Symbols,2B00,2BFF,31,#1} \newcommand{\unicodeblockMiscellaneousTechnical}[1]{Miscellaneous Technical,Symbols,2300,23FF,232,#1} \newcommand{\unicodeblockModifierToneLetters}[1]{Modifier Tone Letters,Other,A700,A71F,27,#1} \newcommand{\unicodeblockMongolian}[1]{Mongolian,Other,1800,18AF,155,#1} \newcommand{\unicodeblockMusicalSymbols}[1]{Musical Symbols,Other,1D100,1D1FF,219,#1} \newcommand{\unicodeblockMyanmar}[1]{Myanmar,Other,1000,109F,78,#1} \newcommand{\unicodeblockNewTaiLue}[1]{New Tai Lue,Other,1980,19DF,80,#1} \newcommand{\unicodeblockNKo}[1]{NKo,Other,07C0,07FF,59,#1} \newcommand{\unicodeblockNumberForms}[1]{Number Forms,Other,2150,218F,50,#1} \newcommand{\unicodeblockOgham}[1]{Ogham,Other,1680,169F,29,#1} \newcommand{\unicodeblockOldItalic}[1]{Old Italic,Other,10300,1032F,35,#1} \newcommand{\unicodeblockOldPersian}[1]{Old Persian,Other,103A0,103DF,50,#1} \newcommand{\unicodeblockOpticalCharacterRecognition}[1]{Optical Character Recognition,Other,2440,245F,11,#1} \newcommand{\unicodeblockOriya}[1]{Oriya,Other,0B00,0B7F,81,#1} \newcommand{\unicodeblockOsmanya}[1]{Osmanya,Other,10480,104AF,40,#1} \newcommand{\unicodeblockPhagsPa}[1]{PhagsPa,Other,A840,A87F,56,#1} \newcommand{\unicodeblockPhoenician}[1]{Phoenician,Other,10900,1091F,27,#1} \newcommand{\unicodeblockPhoneticExtensions}[1]{Phonetic Extensions,Phonetics,1D00,1D7F,128,#1} \newcommand{\unicodeblockPhoneticExtensionsSupplement}[1]{Phonetic Extensions Supplement,Phonetics,1D80,1DBF,64,#1} \newcommand{\unicodeblockPrivateUseArea}[1]{Private Use Area,Other,E000,F8FF,2,#1} \newcommand{\unicodeblockRunic}[1]{Runic,Other,16A0,16FF,81,#1} \newcommand{\unicodeblockShavian}[1]{Shavian,Other,10450,1047F,48,#1} \newcommand{\unicodeblockSinhala}[1]{Sinhala,Other,0D80,0DFF,80,#1} \newcommand{\unicodeblockSmallFormVariants}[1]{Small Form Variants,Other,FE50,FE6F,26,#1} \newcommand{\unicodeblockSpacingModifierLetters}[1]{Spacing Modifier Letters,Other,02B0,02FF,80,#1} \newcommand{\unicodeblockSpecials}[1]{Specials,Other,FFF0,FFFF,5,#1} \newcommand{\unicodeblockSuperscriptsandSubscripts}[1]{Superscripts and Subscripts,Other,2070,209F,34,#1} \newcommand{\unicodeblockSupplementalArrowsA}[1]{Supplemental Arrows-A,Symbols,27F0,27FF,16,#1} \newcommand{\unicodeblockSupplementalArrowsB}[1]{Supplemental Arrows-B,Symbols,2900,297F,128,#1} \newcommand{\unicodeblockSupplementalMathematicalOperators}[1]{Supplemental Mathematical Operators,Mathematics,2A00,2AFF,256,#1} \newcommand{\unicodeblockSupplementalPunctuation}[1]{Supplemental Punctuation,Punctuation,2E00,2E7F,26,#1} \newcommand{\unicodeblockSupplementaryPrivateUseAreaA}[1]{Supplementary Private Use Area-A,Other,F0000,FFFFF,2,#1} \newcommand{\unicodeblockSupplementaryPrivateUseAreaB}[1]{Supplementary Private Use Area-B,Other,100000,10FFFF,2,#1} \newcommand{\unicodeblockSylotiNagri}[1]{Syloti Nagri,Other,A800,A82F,44,#1} \newcommand{\unicodeblockSyriac}[1]{Syriac,Other,0700,074F,77,#1} \newcommand{\unicodeblockTagalog}[1]{Tagalog,Other,1700,171F,20,#1} \newcommand{\unicodeblockTagbanwa}[1]{Tagbanwa,Other,1760,177F,18,#1} \newcommand{\unicodeblockTags}[1]{Tags,Other,E0000,E007F,97,#1} \newcommand{\unicodeblockTaiLe}[1]{Tai Le,Other,1950,197F,35,#1} \newcommand{\unicodeblockTaiXuanJingSymbols}[1]{Tai Xuan Jing Symbols,Other,1D300,1D35F,87,#1} \newcommand{\unicodeblockTamil}[1]{Tamil,Other,0B80,0BFF,71,#1} \newcommand{\unicodeblockTelugu}[1]{Telugu,Other,0C00,0C7F,80,#1} \newcommand{\unicodeblockThaana}[1]{Thaana,Other,0780,07BF,50,#1} \newcommand{\unicodeblockThai}[1]{Thai,Other,0E00,0E7F,87,#1} \newcommand{\unicodeblockTibetan}[1]{Tibetan,Other,0F00,0FFF,195,#1} \newcommand{\unicodeblockTifinagh}[1]{Tifinagh,Other,2D30,2D7F,55,#1} \newcommand{\unicodeblockUgaritic}[1]{Ugaritic,Other,10380,1039F,31,#1} \newcommand{\unicodeblockUnifiedCanadianAboriginalSyllabics}[1]{Unified Canadian Aboriginal Syllabics,Other,1400,167F,630,#1} \newcommand{\unicodeblockVariationSelectors}[1]{Variation Selectors,Symbols,FE00,FE0F,16,#1} \newcommand{\unicodeblockVariationSelectorsSupplement}[1]{Variation Selectors Supplement,Symbols,E0100,E01EF,240,#1} \newcommand{\unicodeblockVerticalForms}[1]{Vertical Forms,Other,FE10,FE1F,10,#1} \newcommand{\unicodeblockYiRadicals}[1]{Yi Radicals,Yi,A490,A4CF,55,#1} \newcommand{\unicodeblockYiSyllables}[1]{Yi Syllables,Yi,A000,A48F,1165,#1} \newcommand{\unicodeblockYijingHexagramSymbols}[1]{Yijing Hexagram Symbols,Other,4DC0,4DFF,64,#1} % ------------------------------------------------------- % % And finally, the macro that generates the whole zwiq % % ------------------------------------------------------- \newcommand{\autfontunicodedata}{ % these first four blocks have been ordered for fast resolution by the perl script, % because the list of blocks is checked in order. \unicodeblockBasicLatin{ \unicodeblockBasicLatinFont } \unicodeblockHiragana{ \unicodeblockHiraganaFont } \unicodeblockCJKUnifiedIdeographs{ \unicodeblockCJKUnifiedIdeographsFont } \unicodeblockKatakana{ \unicodeblockKatakanaFont } \unicodeblockAegeanNumbers{ \unicodeblockAegeanNumbersFont } \unicodeblockAlphabeticPresentationForms{ \unicodeblockAlphabeticPresentationFormsFont } \unicodeblockAncientGreekMusicalNotation{ \unicodeblockAncientGreekMusicalNotationFont } \unicodeblockAncientGreekNumbers{ \unicodeblockAncientGreekNumbersFont } \unicodeblockArabic{ \unicodeblockArabicFont } \unicodeblockArabicPresentationFormsA{ \unicodeblockArabicPresentationFormsAFont } \unicodeblockArabicPresentationFormsB{ \unicodeblockArabicPresentationFormsBFont } \unicodeblockArabicSupplement{ \unicodeblockArabicSupplementFont } \unicodeblockArmenian{ \unicodeblockArmenianFont } \unicodeblockArrows{ \unicodeblockArrowsFont } \unicodeblockBalinese{ \unicodeblockBalineseFont } \unicodeblockBengali{ \unicodeblockBengaliFont } \unicodeblockBlockElements{ \unicodeblockBlockElementsFont } \unicodeblockBopomofo{ \unicodeblockBopomofoFont } \unicodeblockBopomofoExtended{ \unicodeblockBopomofoExtendedFont } \unicodeblockBoxDrawing{ \unicodeblockBoxDrawingFont } \unicodeblockBraillePatterns{ \unicodeblockBraillePatternsFont } \unicodeblockBuginese{ \unicodeblockBugineseFont } \unicodeblockBuhid{ \unicodeblockBuhidFont } \unicodeblockByzantineMusicalSymbols{ \unicodeblockByzantineMusicalSymbolsFont } \unicodeblockCherokee{ \unicodeblockCherokeeFont } \unicodeblockCJKCompatibility{ \unicodeblockCJKCompatibilityFont } \unicodeblockCJKCompatibilityForms{ \unicodeblockCJKCompatibilityFormsFont } \unicodeblockCJKCompatibilityIdeographs{ \unicodeblockCJKCompatibilityIdeographsFont } \unicodeblockCJKCompatibilityIdeographsSupplement{ \unicodeblockCJKCompatibilityIdeographsSupplementFont } \unicodeblockCJKRadicalsSupplement{ \unicodeblockCJKRadicalsSupplementFont } \unicodeblockCJKStrokes{ \unicodeblockCJKStrokesFont } \unicodeblockCJKSymbolsandPunctuation{ \unicodeblockCJKSymbolsandPunctuationFont } \unicodeblockCJKUnifiedIdeographsExtensionA{ \unicodeblockCJKUnifiedIdeographsExtensionAFont } \unicodeblockCJKUnifiedIdeographsExtensionB{ \unicodeblockCJKUnifiedIdeographsExtensionBFont } \unicodeblockCombiningDiacriticalMarks{ \unicodeblockCombiningDiacriticalMarksFont } \unicodeblockCombiningDiacriticalMarksforSymbols{ \unicodeblockCombiningDiacriticalMarksforSymbolsFont } \unicodeblockCombiningDiacriticalMarksSupplement{ \unicodeblockCombiningDiacriticalMarksSupplementFont } \unicodeblockCombiningHalfMarks{ \unicodeblockCombiningHalfMarksFont } \unicodeblockControlPictures{ \unicodeblockControlPicturesFont } \unicodeblockCoptic{ \unicodeblockCopticFont } \unicodeblockCountingRodNumerals{ \unicodeblockCountingRodNumeralsFont } \unicodeblockCuneiform{ \unicodeblockCuneiformFont } \unicodeblockCuneiformNumbersandPunctuation{ \unicodeblockCuneiformNumbersandPunctuationFont } \unicodeblockCurrencySymbols{ \unicodeblockCurrencySymbolsFont } \unicodeblockCypriotSyllabary{ \unicodeblockCypriotSyllabaryFont } \unicodeblockCyrillic{ \unicodeblockCyrillicFont } \unicodeblockCyrillicExtendedA{ \unicodeblockCyrillicExtendedAFont } \unicodeblockCyrillicExtendedB{ \unicodeblockCyrillicExtendedBFont } \unicodeblockCyrillicSupplement{ \unicodeblockCyrillicSupplementFont } \unicodeblockDeseret{ \unicodeblockDeseretFont } \unicodeblockDevanagari{ \unicodeblockDevanagariFont } \unicodeblockDingbats{ \unicodeblockDingbatsFont } \unicodeblockDominoTiles{ \unicodeblockDominoTilesFont } \unicodeblockEnclosedAlphanumerics{ \unicodeblockEnclosedAlphanumericsFont } \unicodeblockEnclosedCJKLettersandMonths{ \unicodeblockEnclosedCJKLettersandMonthsFont } \unicodeblockEthiopic{ \unicodeblockEthiopicFont } \unicodeblockEthiopicExtended{ \unicodeblockEthiopicExtendedFont } \unicodeblockEthiopicSupplement{ \unicodeblockEthiopicSupplementFont } \unicodeblockGeneralPunctuation{ \unicodeblockGeneralPunctuationFont } \unicodeblockGeometricShapes{ \unicodeblockGeometricShapesFont } \unicodeblockGeorgian{ \unicodeblockGeorgianFont } \unicodeblockGeorgianSupplement{ \unicodeblockGeorgianSupplementFont } \unicodeblockGlagolitic{ \unicodeblockGlagoliticFont } \unicodeblockGothic{ \unicodeblockGothicFont } \unicodeblockGreekandCoptic{ \unicodeblockGreekandCopticFont } \unicodeblockGreekExtended{ \unicodeblockGreekExtendedFont } \unicodeblockGujarati{ \unicodeblockGujaratiFont } \unicodeblockGurmukhi{ \unicodeblockGurmukhiFont } \unicodeblockHalfwidthandFullwidthForms{ \unicodeblockHalfwidthandFullwidthFormsFont } \unicodeblockHangulCompatibilityJamo{ \unicodeblockHangulCompatibilityJamoFont } \unicodeblockHangulJamo{ \unicodeblockHangulJamoFont } \unicodeblockHangulSyllables{ \unicodeblockHangulSyllablesFont } \unicodeblockHanunoo{ \unicodeblockHanunooFont } \unicodeblockHebrew{ \unicodeblockHebrewFont } \unicodeblockHighPrivateUseSurrogates{ \unicodeblockHighPrivateUseSurrogatesFont } \unicodeblockHighSurrogates{ \unicodeblockHighSurrogatesFont } \unicodeblockIdeographicDescriptionCharacters{ \unicodeblockIdeographicDescriptionCharactersFont } \unicodeblockIPAExtensions{ \unicodeblockIPAExtensionsFont } \unicodeblockKanbun{ \unicodeblockKanbunFont } \unicodeblockKangxiRadicals{ \unicodeblockKangxiRadicalsFont } \unicodeblockKannada{ \unicodeblockKannadaFont } \unicodeblockKatakanaPhoneticExtensions{ \unicodeblockKatakanaPhoneticExtensionsFont } \unicodeblockKharoshthi{ \unicodeblockKharoshthiFont } \unicodeblockKhmer{ \unicodeblockKhmerFont } \unicodeblockKhmerSymbols{ \unicodeblockKhmerSymbolsFont } \unicodeblockLao{ \unicodeblockLaoFont } \unicodeblockLatinExtendedAdditional{ \unicodeblockLatinExtendedAdditionalFont } \unicodeblockLatinExtendedA{ \unicodeblockLatinExtendedAFont } \unicodeblockLatinExtendedB{ \unicodeblockLatinExtendedBFont } \unicodeblockLatinExtendedC{ \unicodeblockLatinExtendedCFont } \unicodeblockLatinExtendedD{ \unicodeblockLatinExtendedDFont } \unicodeblockLatinSupplement{ \unicodeblockLatinSupplementFont } \unicodeblockLetterlikeSymbols{ \unicodeblockLetterlikeSymbolsFont } \unicodeblockLimbu{ \unicodeblockLimbuFont } \unicodeblockLinearBIdeograms{ \unicodeblockLinearBIdeogramsFont } \unicodeblockLinearBSyllabary{ \unicodeblockLinearBSyllabaryFont } \unicodeblockLowSurrogates{ \unicodeblockLowSurrogatesFont } \unicodeblockMahjongTiles{ \unicodeblockMahjongTilesFont } \unicodeblockMalayalam{ \unicodeblockMalayalamFont } \unicodeblockMathematicalAlphanumericSymbols{ \unicodeblockMathematicalAlphanumericSymbolsFont } \unicodeblockMathematicalOperators{ \unicodeblockMathematicalOperatorsFont } \unicodeblockMiscellaneousMathematicalSymbolsA{ \unicodeblockMiscellaneousMathematicalSymbolsAFont } \unicodeblockMiscellaneousMathematicalSymbolsB{ \unicodeblockMiscellaneousMathematicalSymbolsBFont } \unicodeblockMiscellaneousSymbols{ \unicodeblockMiscellaneousSymbolsFont } \unicodeblockMiscellaneousSymbolsandArrows{ \unicodeblockMiscellaneousSymbolsandArrowsFont } \unicodeblockMiscellaneousTechnical{ \unicodeblockMiscellaneousTechnicalFont } \unicodeblockModifierToneLetters{ \unicodeblockModifierToneLettersFont } \unicodeblockMongolian{ \unicodeblockMongolianFont } \unicodeblockMusicalSymbols{ \unicodeblockMusicalSymbolsFont } \unicodeblockMyanmar{ \unicodeblockMyanmarFont } \unicodeblockNewTaiLue{ \unicodeblockNewTaiLueFont } \unicodeblockNKo{ \unicodeblockNKoFont } \unicodeblockNumberForms{ \unicodeblockNumberFormsFont } \unicodeblockOgham{ \unicodeblockOghamFont } \unicodeblockOldItalic{ \unicodeblockOldItalicFont } \unicodeblockOldPersian{ \unicodeblockOldPersianFont } \unicodeblockOpticalCharacterRecognition{ \unicodeblockOpticalCharacterRecognitionFont } \unicodeblockOriya{ \unicodeblockOriyaFont } \unicodeblockOsmanya{ \unicodeblockOsmanyaFont } \unicodeblockPhagsPa{ \unicodeblockPhagsPaFont } \unicodeblockPhoenician{ \unicodeblockPhoenicianFont } \unicodeblockPhoneticExtensions{ \unicodeblockPhoneticExtensionsFont } \unicodeblockPhoneticExtensionsSupplement{ \unicodeblockPhoneticExtensionsSupplementFont } \unicodeblockPrivateUseArea{ \unicodeblockPrivateUseAreaFont } \unicodeblockRunic{ \unicodeblockRunicFont } \unicodeblockShavian{ \unicodeblockShavianFont } \unicodeblockSinhala{ \unicodeblockSinhalaFont } \unicodeblockSmallFormVariants{ \unicodeblockSmallFormVariantsFont } \unicodeblockSpacingModifierLetters{ \unicodeblockSpacingModifierLettersFont } \unicodeblockSpecials{ \unicodeblockSpecialsFont } \unicodeblockSuperscriptsandSubscripts{ \unicodeblockSuperscriptsandSubscriptsFont } \unicodeblockSupplementalArrowsA{ \unicodeblockSupplementalArrowsAFont } \unicodeblockSupplementalArrowsB{ \unicodeblockSupplementalArrowsBFont } \unicodeblockSupplementalMathematicalOperators{ \unicodeblockSupplementalMathematicalOperatorsFont } \unicodeblockSupplementalPunctuation{ \unicodeblockSupplementalPunctuationFont } \unicodeblockSupplementaryPrivateUseAreaA{ \unicodeblockSupplementaryPrivateUseAreaAFont } \unicodeblockSupplementaryPrivateUseAreaB{ \unicodeblockSupplementaryPrivateUseAreaBFont } \unicodeblockSylotiNagri{ \unicodeblockSylotiNagriFont } \unicodeblockSyriac{ \unicodeblockSyriacFont } \unicodeblockTagalog{ \unicodeblockTagalogFont } \unicodeblockTagbanwa{ \unicodeblockTagbanwaFont } \unicodeblockTags{ \unicodeblockTagsFont } \unicodeblockTaiLe{ \unicodeblockTaiLeFont } \unicodeblockTaiXuanJingSymbols{ \unicodeblockTaiXuanJingSymbolsFont } \unicodeblockTamil{ \unicodeblockTamilFont } \unicodeblockTelugu{ \unicodeblockTeluguFont } \unicodeblockThaana{ \unicodeblockThaanaFont } \unicodeblockThai{ \unicodeblockThaiFont } \unicodeblockTibetan{ \unicodeblockTibetanFont } \unicodeblockTifinagh{ \unicodeblockTifinaghFont } \unicodeblockUgaritic{ \unicodeblockUgariticFont } \unicodeblockUnifiedCanadianAboriginalSyllabics{ \unicodeblockUnifiedCanadianAboriginalSyllabicsFont } \unicodeblockVariationSelectors{ \unicodeblockVariationSelectorsFont } \unicodeblockVariationSelectorsSupplement{ \unicodeblockVariationSelectorsSupplementFont } \unicodeblockVerticalForms{ \unicodeblockVerticalFormsFont } \unicodeblockYiRadicals{ \unicodeblockYiRadicalsFont } \unicodeblockYiSyllables{ \unicodeblockYiSyllablesFont } \unicodeblockYijingHexagramSymbols{ \unicodeblockYijingHexagramSymbolsFont } } % ---------------------------------------------------------- % % After all this unicode font and data stuff, the perl % program for placing fontspec codes appropriately is % defined in a \perlautolang 'macro', which is wrapped % by the \fontwrap macro, which passes it the text it % needs to add font macros into, and the list of unicode % block data entries, with appropriate font macros loaded. % % ---------------------------------------------------------- % % The list of macros fontwrap is allowed to go inside of, to add font % renew this command to actually get things to happen =) % \newcommand{\fontwrapallowedmacros}{} \newcommand{\setfontwrapallowedmacros}[1]{\renewcommand{\fontwrapallowedmacros}{#1}} % % The list of environments fontwrap is allowed to go inside of, to add font % renew this command to actually get things to happen =) % \newcommand{\fontwrapallowedenvironments}{} \newcommand{\setfontwrapallowedenvironments}[1]{\renewcommand{\fontwrapallowedenvironments}{#1}} % % Normally it doesn't matter that (Xe)(La)TeX strips all the whitespace when % running through a command, but in settings where it is of the utmost % importance that they are preserved, you should wrap the text in the % fontwrap equivalent of the verbatim environment: "verbatimfontwrap". % \newenvironment{verbatimfontwrap}{\begingroup \obeylines \obeyspaces}{\endgroup} % % a wrapper to the perl command. Feeds it both the text to wrap, % and the unicode block data, with font bindings. % \newcommand{\fontwrap}[1]{ \perlfontwrap{#1}{\autfontunicodedata}{\fontwrapallowedmacros}{\fontwrapallowedenvironments} } % % the language detection/font insertion command % \perlnewcommand{\perlfontwrap}[4]{ # the input text $text = $_[0]; # all the unicode data $blockdata = &trim($_[1]); # the list of macros we're allowed to process the content of @args = split(/,/,&trim($_[2])); foreach $am (@args) { $allowedmacros{$am}='1'; } # the list of environments we're allowed to process the content of @args = split(/,/,&trim($_[3])); foreach $am (@args) { $allowedenvironments{$am}='1'; } # clear @args=(); # Also, I know these two look silly. Without them, checks on $swapped break... $true=1; $false=0; # ------------------------------------------------ # Create the unicode block data array-of-arrays. # This is essentially an array with on each position # a list representing a unicode block. The data is # is organised as: # # [0] - block name # [1] - informal group name # [2] - first codepoint in the block (in hex) # [3] - last codepoint in the block (in hex) # [4] - number of glyphs in the block # [5] - font bound to this block # # mostly for my personal ease of code-use, these # indices are never used numerically, instead I # will use indicatively named variables. # ----------------------------------------------- $idx_blockname=0; $idx_groupname=1; $idx_start=2; $idx_end=3; $idx_glyphs=4; $idx_font=5; $unicodeblockcount=0; @lines = split(/\\par /,$blockdata); for $i (0 .. @lines) { $line = &trim($lines[$i]); # skip any comments if(substr($line,0,1) eq chr(35) || $line eq '') { next; } else { ($blockname,$groupname,$start,$end,$glyphs,$font) = split(/,/,$line); $unicodeblocks[$unicodeblockcount++] = [$blockname,$groupname,$start,$end,$glyphs,(&trim($font) ne '')?&trim($font):'fontwrapdefaultfont']; } } # ---------------------------------------------------------------------- # # Miscellaneous subroutines # # ---------------------------------------------------------------------- # # very simple trim function # sub trim { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string } # # checks if the passed byte indicates the start of a new unicode glyph # sub isnewglyph { $byte = shift; # in utf8 context, anything that isn't 10xxxxxxx is a new byte return (ord($byte)<128 || ord($byte)>191); } # # detemines the hexadecimal location of the passed glyph # in the unicode 5.0 space # sub codepoint { @utf_lead_bytes = (0,0,192,224,240,248,252); @utf_first_byte = (0,7,5,4,3,2,1); @utf_total_bits = (0,7,11,16,21,26,31); @bytes=split(//,shift); $blength = @bytes; # if we're dealing with a single byte, its ordinal value can be obtained by calling ord() directly if($blength==1) { $ordinal = ord($bytes[0]); } # ---------------------------------------------------------------------------- # # UTF8 byte layout: # # U-00000000 ... U-0000007F = 0xxxxxxx # U-00000080 ... U-000007FF = 110xxxxx 10xxxxxx # U-00000800 ... U-0000FFFF = 1110xxxx 10xxxxxx 10xxxxxx # U-00010000 ... U-001FFFFF = 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx # U-00200000 ... U-03FFFFFF = 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx # U-04000000 ... U-7FFFFFFF = 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx # # After 2,147,483,648 characters, we run out of UTF8 places... then again, even if there # are languages with tens of thousands of characters (42 thousand for the unified CJK # block extended B, for instance), it's really hard to fill 2 billion glyphs. Even # if all currently used languages had 100,000 letters, that would only use up a little # over a quarter of all UTF8 spots available... and most languages have less than 250 # characters. # # ---------------------------------------------------------------------------- # if there are more bytes, we need to play by the UTF8 rules else { # handle first byte $bpos = 0; $baseval = ord($bytes[0]) - $utf_lead_bytes[$blength]; # remove the first byte mask (see list above) $basepower = 2**(6*($blength-1)); # this number starts at bit 1+6*(bytes-1), because bytes 2-> are 6 bits $ordinal = $baseval * $basepower; # fairly obvious calculation # handle further bytes for($bpos=1;$bpos<$blength;$bpos++) { $baseval = ord($bytes[$bpos]) - 128; # remove the 10xxxxxx mask that 'followup' utf8 bytes have $basepower = 2**(6*($blength-($bpos+1))); # this number starts at bit 1+6*((bytes-currentpos)-1) $ordinal += $baseval * $basepower; } # again, fairly obvious calculation } # return uppercase hex representation $hexval = sprintf("%x",$ordinal); if(length($hexval)%2!=0) { $hexval = "0".$hexval; } # add a leading zero (purely cosmetic) return uc($hexval); # return in uppercase (again, purely cosmetic) } # # appends the output string with the passed string. # sub appendoutput { $string=shift; $rewritten .= $string; } # ---------------------------------------------------------------------- # # Font changing subroutine # # ---------------------------------------------------------------------- sub attemptfontchange { $glyphblock{'blockname'} = 'unknown...'; $cp = &codepoint($glyph); # check which unicode block this character is in for($pos=0;$pos<$unicodeblockcount;$pos++) { # add '0' padding to the codepoint if necessary $ncp = ("0" x (length($unicodeblocks[$pos][$idx_start]) - length($cp))) . $cp; $higher = $ncp cmp $unicodeblocks[$pos][$idx_start]; $lower = $ncp cmp $unicodeblocks[$pos][$idx_end]; # if within range, we found the right unicode block if($higher>=0 && $lower <=0) { $glyphblock = $unicodeblocks[$pos][$idx_blockname]; $glyphfont = $unicodeblocks[$pos][$idx_font]; last; }} # if this differs from what we were already in, add a fontchange code if we need to, and set currentblock to the new block if($glyphblock ne $currentblock) { if($currentfont ne $glyphfont) { &appendoutput("\\fontspec{".$glyphfont."}"); $currentfont=$glyphfont; $swapped=$true; } $currentblock=$glyphblock; } # if we swapped fonts, and the next glyph was a space, escape it for good measure. if($swapped==1 && $glyph eq ' ') { &appendoutput("\\"); } $swapped=$false; } # ---------------------------------------------------------------------- # # Macro parsing support subroutines # # ---------------------------------------------------------------------- # # runs through the text array, extracting the macro's command. # In case this was an argumented macro, the text pointer $i is # moved to the first non-whitespace character after the macro # name. If it was an argumentless macro, the pointer is set to # directly after the macro name. # sub getmacrocommand { for($i=$i+1;$i<@textarray;$i++) { $glyph = $textarray[$i]; if($glyph ne chr(123) && &trim($glyph) ne '') { undef $textarray[$i]; $macro .= $glyph; } else { # if glyph was whitespace, check if there is an argument list to come, or not. if($glyph ne chr(123)) { for($j=1;$j<@textarray;$j++) { # non-whitespace: we need to stop. but do we need to discard the crossed whitespace? if(&trim($textarray[$i+$j]) ne '') { # if we found an argument list, we do. otherwise, we don't have to. if($textarray[$i+$j] eq chr(123)) { $i=$i+$j; } last; }} } $i--; return $macro; }} } # # simply copies from the textarray to the rewritten string until we run out of macrobody. # sub copymacrotooutput { $depth=0; for(;$i<@textarray;$i++) { $glyph = $textarray[$i]; $textarray[$i]=''; if($depth==0 && &trim($glyph) eq '') { next; } # { increments $depth, } decrements it. if($glyph eq chr(123)) { $depth++;} elsif($glyph eq chr(125)) { $depth--; } $macrobody .= $glyph; if($depth==0 && $glyph ne chr(123)) { &appendoutput($macrobody); $macrobody = ''; if(&hasarguments) { # recurse, for as many arguments as there are... ©macrotooutput(); } $i--; last; }} } # # check whether a macro has arguments or not # sub hasarguments { $j=$i; for(;$j<@textarray;$j++) { $check = $textarray[$j]; if(&trim($check) ne '') { return ($check eq chr(123)); }}} # ---------------------------------------------------------------------- # # Environment parsing support subroutines # # ---------------------------------------------------------------------- # # runs through the text array, extracting the environments's name # when this is done, $i will point to the glyph *after* the closing # brace: # # \begin{environmentname} Whatever text # L # $i # sub getenvironment { for($i=$i+1;$i<@textarray;$i++) { $glyph = $textarray[$i]; # ignore opening brace and possible whitespace if($glyph eq chr(123) || &trim($glyph) eq '') { next; } # something that's part of environment name elsif($glyph ne chr(123) && $glyph ne chr(125) && &trim($glyph) ne '') { $textarray[$i]=''; $environment .= $glyph; } # because of the way environments are defined, this has to now # be a closing brace... unless you messed up your TeX. else { if($glyph ne chr(125)) { die "You messed up your TeX - environment name \"".$environment."\" not followed by '".chr(125)."' but by '".$glyph."'!"; } $i=$i+1; return $environment; }} } # # copies from the textarray to the rewritten string until we hit \end{environment} # sub copyenvironmenttooutput { $environment=shift; $environmentbody=''; for(;$i<@textarray;$i++) { $glyph = $textarray[$i]; $textarray[$i]=''; if($glyph eq chr(92)) { $check=''; for $j (1..3) { $check .= $textarray[$i+$j]; } if($check eq 'end') { # make sure this is the end tag for the right environment! $checkenv=''; $spaces=0; # $i+1..3 was 'end', so the next read spot is at $i+4 for($j=4;$j<@textarray;$j++) { if(&trim($textarray[$i+$j]) eq '') { $spaces++; } else { last; }} $start = 4+$spaces+1; $end = $start + length($environment)-1; for $j ($start .. $end) { $checkenv .= $textarray[$i+$j]; } if($checkenv eq $environment) { &appendoutput($environmentbody . "\\end{".$environment."}"); $i = $i + length("end{".$environment."}")+1; $environmentbody=''; return; } # "end" macro, just not the right one. append glyph to environment body else { $environmentbody .= $glyph; }} # some marco that isn't "end": append glyph to body else { $environmentbody .= $glyph; }} # regular glyph... you guessed it: append else { $environmentbody .= $glyph; }} } # ---------------------------------------------------------------------- # # Macro parsing subroutine # # ---------------------------------------------------------------------- # # processes a LaTeX macro # sub processmacro { $macro = &getmacrocommand(); # rather special macro if($macro eq 'begin') { $macro=''; # find out which environment is being entered, whether we may work inside it $environment = &getenvironment(); &appendoutput("\\begin{" . $environment . "}"); if($allowedenvironments{$environment} ne '') { # allowed to process, which means we don't need to pay any special # attention to end{environment} commands, as they'll be processed like # any other macro (though you should not put them in the allowedmacro # list, obviously :) $environment=''; return; } else { # not allowed to process environment. copy until we hit "\end{environment}" ©environmenttooutput($environment); $environment=''; } return; } else { &appendoutput("\\" . $macro); if($allowedmacros{$macro} ne '') { # if we're allowed to work inside it, return so that the # processing of glyphs continues as normal $macro=''; return; } else { $macro=''; # not allowed to do anything with this content - write out any arguments if it has # them and stop processing, returning to the toplevel processglyph subroutine if(&hasarguments) { ©macrotooutput(); }}} } # ---------------------------------------------------------------------- # # Glyph parsing subroutines # # ---------------------------------------------------------------------- # # processes one glyph # sub processglyph { # get the current glyph $glyph = $textarray[$i]; $textarray[$i]=''; # maintainance. progressively clears up a bit of mem if($glyph eq '') { return; } # shortcut for the first, empty, split character in the array # if we find a '\', we process the text as macro. if($glyph eq chr(92)) { &processmacro(); # after we process a macro, it will have already been inserted into the $rewritten string, so we just return. return; } else { # check if the font needs changing &attemptfontchange(); # finally, add the glyph to the $rewritten string &appendoutput($glyph); } } # ---------------------------------------------------------------------- # start running through the passed text. # ---------------------------------------------------------------------- # # first off, convert the passed text into an array of unicode glyphs, so # we can iterate over it letter by letter. # @bytes = split(//,$text); $text = ""; # maintenance. Saves a bit of memory. $buffer=""; @textarray=(); foreach $byte (@bytes) { if(&isnewglyph($byte)) { push @textarray, $buffer; $buffer=$byte; } else { $buffer.=$byte; }} push @textarray, $buffer; @bytes=(); # # At this point the text has been pulled apart as unicode glyphs, rather than just bytes, which means we can start walking through the array. # $rewritten=''; $currentblock='not set yet'; $currentfont='not set yet'; # set the processing in motion (because all variables are persistent, # this script runs through the text a like a finite state machine) $i=0; for(;$i<@textarray; $i++) { &processglyph(); } # # perltex uses persistent variables, but in some cases frees the wrong threadpool's memory. # as such, emptying the variables before exit is not a bad good idea, although normally # you wouldn't bother with it this close to the return statement. # undef $text; undef @textarray; undef $blockdata; undef @args; undef %allowedmacros; undef %allowedenvironments; undef $glyph; undef $currentblock; undef $currentfont; undef $glyphblock; undef $glyphfont; undef $macro; undef $macrobody; undef $cp; # return the now font-tagged text return $rewritten; } % ---------------------------------------------------------------------------- \endinput % % End of file `langfont.sty'.