respect-validation/library/Rules/LanguageCode.php
Henrique Moody e4f2c8154a
Use PHP attributes to define templates
Creating a specific exception for each rule adds a painful overhead. If
you want to make a custom message for your rule, you will need to create
an exception and then register that exception namespace to be able to
use it—all that is just for customizing the message of your rule.

Having different namespaces also implies that you need to fetch the
exception of the rule from another directory to change it. As Uncle Bob
said, "Classes that change together belong together. Classes that are
not reused together should not be grouped."

This commit will drastically change this library, moving all the
templates from the exceptions to the rules. Consequently, the Factory
becomes much simpler, and the library gets a bit smaller, too.

Signed-off-by: Henrique Moody <henriquemoody@gmail.com>
2024-01-29 23:43:57 +01:00

545 lines
19 KiB
PHP

<?php
/*
* Copyright (c) Alexandre Gomes Gaigalas <alganet@gmail.com>
* SPDX-License-Identifier: MIT
*/
declare(strict_types=1);
namespace Respect\Validation\Rules;
use Respect\Validation\Attributes\Template;
use Respect\Validation\Exceptions\ComponentException;
use function array_column;
use function array_filter;
use function array_search;
use function sprintf;
#[Template(
'{{name}} must be a valid ISO 639 {{set}} language code',
'{{name}} must not be a valid ISO 639 {{set}} language code',
)]
final class LanguageCode extends AbstractEnvelope
{
public const ALPHA2 = 'alpha-2';
public const ALPHA3 = 'alpha-3';
public const AVAILABLE_SETS = [self::ALPHA2, self::ALPHA3];
/**
* @see http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
*/
public const LANGUAGE_CODES = [
// phpcs:disable Squiz.PHP.CommentedOutCode.Found
['aa', 'aar'], // Afar
['ab', 'abk'], // Abkhazian
['', 'ace'], // Achinese
['', 'ach'], // Acoli
['', 'ada'], // Adangme
['', 'ady'], // Adyghe; Adygei
['', 'afa'], // Afro-Asiatic languages
['', 'afh'], // Afrihili
['af', 'afr'], // Afrikaans
['', 'ain'], // Ainu
['ak', 'aka'], // Akan
['', 'akk'], // Akkadian
['sq', 'alb'], // Albanian
['', 'ale'], // Aleut
['', 'alg'], // Algonquian languages
['', 'alt'], // Southern Altai
['am', 'amh'], // Amharic
['', 'ang'], // English, Old (ca.450-1100)
['', 'anp'], // Angika
['', 'apa'], // Apache languages
['ar', 'ara'], // Arabic
['', 'arc'], // Official Aramaic (700-300 BCE); Imperial Aramaic (700-300 BCE)
['an', 'arg'], // Aragonese
['hy', 'arm'], // Armenian
['', 'arn'], // Mapudungun; Mapuche
['', 'arp'], // Arapaho
['', 'art'], // Artificial languages
['', 'arw'], // Arawak
['as', 'asm'], // Assamese
['', 'ast'], // Asturian; Bable; Leonese; Asturleonese
['', 'ath'], // Athapascan languages
['', 'aus'], // Australian languages
['av', 'ava'], // Avaric
['ae', 'ave'], // Avestan
['', 'awa'], // Awadhi
['ay', 'aym'], // Aymara
['az', 'aze'], // Azerbaijani
['', 'bad'], // Banda languages
['', 'bai'], // Bamileke languages
['ba', 'bak'], // Bashkir
['', 'bal'], // Baluchi
['bm', 'bam'], // Bambara
['', 'ban'], // Balinese
['eu', 'baq'], // Basque
['', 'bas'], // Basa
['', 'bat'], // Baltic languages
['', 'bej'], // Beja; Bedawiyet
['be', 'bel'], // Belarusian
['', 'bem'], // Bemba
['bn', 'ben'], // Bengali
['', 'ber'], // Berber languages
['', 'bho'], // Bhojpuri
['bh', 'bih'], // Bihari languages
['', 'bik'], // Bikol
['', 'bin'], // Bini; Edo
['bi', 'bis'], // Bislama
['', 'bla'], // Siksika
['', 'bnt'], // Bantu languages
['bs', 'bos'], // Bosnian
['', 'bra'], // Braj
['br', 'bre'], // Breton
['', 'btk'], // Batak languages
['', 'bua'], // Buriat
['', 'bug'], // Buginese
['bg', 'bul'], // Bulgarian
['my', 'bur'], // Burmese
['', 'byn'], // Blin; Bilin
['', 'cad'], // Caddo
['', 'cai'], // Central American Indian languages
['', 'car'], // Galibi Carib
['ca', 'cat'], // Catalan; Valencian
['', 'cau'], // Caucasian languages
['', 'ceb'], // Cebuano
['', 'cel'], // Celtic languages
['ch', 'cha'], // Chamorro
['', 'chb'], // Chibcha
['ce', 'che'], // Chechen
['', 'chg'], // Chagatai
['zh', 'chi'], // Chinese
['', 'chk'], // Chuukese
['', 'chm'], // Mari
['', 'chn'], // Chinook jargon
['', 'cho'], // Choctaw
['', 'chp'], // Chipewyan; Dene Suline
['', 'chr'], // Cherokee
['cu', 'chu'], // Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic
['cv', 'chv'], // Chuvash
['', 'chy'], // Cheyenne
['', 'cmc'], // Chamic languages
['', 'cnr'], // Montenegrin
['', 'cop'], // Coptic
['kw', 'cor'], // Cornish
['co', 'cos'], // Corsican
['', 'cpe'], // Creoles and pidgins, English based
['', 'cpf'], // Creoles and pidgins, French-based
['', 'cpp'], // Creoles and pidgins, Portuguese-based
['cr', 'cre'], // Cree
['', 'crh'], // Crimean Tatar; Crimean Turkish
['', 'crp'], // Creoles and pidgins
['', 'csb'], // Kashubian
['', 'cus'], // Cushitic languages
['cs', 'cze'], // Czech
['', 'dak'], // Dakota
['da', 'dan'], // Danish
['', 'dar'], // Dargwa
['', 'day'], // Land Dayak languages
['', 'del'], // Delaware
['', 'den'], // Slave (Athapascan)
['', 'dgr'], // Dogrib
['', 'din'], // Dinka
['dv', 'div'], // Divehi; Dhivehi; Maldivian
['', 'doi'], // Dogri
['', 'dra'], // Dravidian languages
['', 'dsb'], // Lower Sorbian
['', 'dua'], // Duala
['', 'dum'], // Dutch, Middle (ca.1050-1350)
['nl', 'dut'], // Dutch; Flemish
['', 'dyu'], // Dyula
['dz', 'dzo'], // Dzongkha
['', 'efi'], // Efik
['', 'egy'], // Egyptian (Ancient)
['', 'eka'], // Ekajuk
['', 'elx'], // Elamite
['en', 'eng'], // English
['', 'enm'], // English, Middle (1100-1500)
['eo', 'epo'], // Esperanto
['et', 'est'], // Estonian
['ee', 'ewe'], // Ewe
['', 'ewo'], // Ewondo
['', 'fan'], // Fang
['fo', 'fao'], // Faroese
['', 'fat'], // Fanti
['fj', 'fij'], // Fijian
['', 'fil'], // Filipino; Pilipino
['fi', 'fin'], // Finnish
['', 'fiu'], // Finno-Ugrian languages
['', 'fon'], // Fon
['fr', 'fre'], // French
['', 'frm'], // French, Middle (ca.1400-1600)
['', 'fro'], // French, Old (842-ca.1400)
['', 'frr'], // Northern Frisian
['', 'frs'], // Eastern Frisian
['fy', 'fry'], // Western Frisian
['ff', 'ful'], // Fulah
['', 'fur'], // Friulian
['', 'gaa'], // Ga
['', 'gay'], // Gayo
['', 'gba'], // Gbaya
['', 'gem'], // Germanic languages
['ka', 'geo'], // Georgian
['de', 'ger'], // German
['', 'gez'], // Geez
['', 'gil'], // Gilbertese
['gd', 'gla'], // Gaelic; Scottish Gaelic
['ga', 'gle'], // Irish
['gl', 'glg'], // Galician
['gv', 'glv'], // Manx
['', 'gmh'], // German, Middle High (ca.1050-1500)
['', 'goh'], // German, Old High (ca.750-1050)
['', 'gon'], // Gondi
['', 'gor'], // Gorontalo
['', 'got'], // Gothic
['', 'grb'], // Grebo
['', 'grc'], // Greek, Ancient (to 1453)
['el', 'gre'], // Greek, Modern (1453-)
['gn', 'grn'], // Guarani
['', 'gsw'], // Swiss German; Alemannic; Alsatian
['gu', 'guj'], // Gujarati
['', 'gwi'], // Gwich'in
['', 'hai'], // Haida
['ht', 'hat'], // Haitian; Haitian Creole
['ha', 'hau'], // Hausa
['', 'haw'], // Hawaiian
['he', 'heb'], // Hebrew
['hz', 'her'], // Herero
['', 'hil'], // Hiligaynon
['', 'him'], // Himachali languages; Western Pahari languages
['hi', 'hin'], // Hindi
['', 'hit'], // Hittite
['', 'hmn'], // Hmong; Mong
['ho', 'hmo'], // Hiri Motu
['hr', 'hrv'], // Croatian
['', 'hsb'], // Upper Sorbian
['hu', 'hun'], // Hungarian
['', 'hup'], // Hupa
['', 'iba'], // Iban
['ig', 'ibo'], // Igbo
['is', 'ice'], // Icelandic
['io', 'ido'], // Ido
['ii', 'iii'], // Sichuan Yi; Nuosu
['', 'ijo'], // Ijo languages
['iu', 'iku'], // Inuktitut
['ie', 'ile'], // Interlingue; Occidental
['', 'ilo'], // Iloko
['ia', 'ina'], // Interlingua (International Auxiliary Language Association)
['', 'inc'], // Indic languages
['id', 'ind'], // Indonesian
['', 'ine'], // Indo-European languages
['', 'inh'], // Ingush
['ik', 'ipk'], // Inupiaq
['', 'ira'], // Iranian languages
['', 'iro'], // Iroquoian languages
['it', 'ita'], // Italian
['jv', 'jav'], // Javanese
['', 'jbo'], // Lojban
['ja', 'jpn'], // Japanese
['', 'jpr'], // Judeo-Persian
['', 'jrb'], // Judeo-Arabic
['', 'kaa'], // Kara-Kalpak
['', 'kab'], // Kabyle
['', 'kac'], // Kachin; Jingpho
['kl', 'kal'], // Kalaallisut; Greenlandic
['', 'kam'], // Kamba
['kn', 'kan'], // Kannada
['', 'kar'], // Karen languages
['ks', 'kas'], // Kashmiri
['kr', 'kau'], // Kanuri
['', 'kaw'], // Kawi
['kk', 'kaz'], // Kazakh
['', 'kbd'], // Kabardian
['', 'kha'], // Khasi
['', 'khi'], // Khoisan languages
['km', 'khm'], // Central Khmer
['', 'kho'], // Khotanese; Sakan
['ki', 'kik'], // Kikuyu; Gikuyu
['rw', 'kin'], // Kinyarwanda
['ky', 'kir'], // Kirghiz; Kyrgyz
['', 'kmb'], // Kimbundu
['', 'kok'], // Konkani
['kv', 'kom'], // Komi
['kg', 'kon'], // Kongo
['ko', 'kor'], // Korean
['', 'kos'], // Kosraean
['', 'kpe'], // Kpelle
['', 'krc'], // Karachay-Balkar
['', 'krl'], // Karelian
['', 'kro'], // Kru languages
['', 'kru'], // Kurukh
['kj', 'kua'], // Kuanyama; Kwanyama
['', 'kum'], // Kumyk
['ku', 'kur'], // Kurdish
['', 'kut'], // Kutenai
['', 'lad'], // Ladino
['', 'lah'], // Lahnda
['', 'lam'], // Lamba
['lo', 'lao'], // Lao
['la', 'lat'], // Latin
['lv', 'lav'], // Latvian
['', 'lez'], // Lezghian
['li', 'lim'], // Limburgan; Limburger; Limburgish
['ln', 'lin'], // Lingala
['lt', 'lit'], // Lithuanian
['', 'lol'], // Mongo
['', 'loz'], // Lozi
['lb', 'ltz'], // Luxembourgish; Letzeburgesch
['', 'lua'], // Luba-Lulua
['lu', 'lub'], // Luba-Katanga
['lg', 'lug'], // Ganda
['', 'lui'], // Luiseno
['', 'lun'], // Lunda
['', 'luo'], // Luo (Kenya and Tanzania)
['', 'lus'], // Lushai
['mk', 'mac'], // Macedonian
['', 'mad'], // Madurese
['', 'mag'], // Magahi
['mh', 'mah'], // Marshallese
['', 'mai'], // Maithili
['', 'mak'], // Makasar
['ml', 'mal'], // Malayalam
['', 'man'], // Mandingo
['mi', 'mao'], // Maori
['', 'map'], // Austronesian languages
['mr', 'mar'], // Marathi
['', 'mas'], // Masai
['ms', 'may'], // Malay
['', 'mdf'], // Moksha
['', 'mdr'], // Mandar
['', 'men'], // Mende
['', 'mga'], // Irish, Middle (900-1200)
['', 'mic'], // Mi'kmaq; Micmac
['', 'min'], // Minangkabau
['', 'mis'], // Uncoded languages
['', 'mkh'], // Mon-Khmer languages
['mg', 'mlg'], // Malagasy
['mt', 'mlt'], // Maltese
['', 'mnc'], // Manchu
['', 'mni'], // Manipuri
['', 'mno'], // Manobo languages
['', 'moh'], // Mohawk
['mn', 'mon'], // Mongolian
['', 'mos'], // Mossi
['', 'mul'], // Multiple languages
['', 'mun'], // Munda languages
['', 'mus'], // Creek
['', 'mwl'], // Mirandese
['', 'mwr'], // Marwari
['', 'myn'], // Mayan languages
['', 'myv'], // Erzya
['', 'nah'], // Nahuatl languages
['', 'nai'], // North American Indian languages
['', 'nap'], // Neapolitan
['na', 'nau'], // Nauru
['nv', 'nav'], // Navajo; Navaho
['nr', 'nbl'], // Ndebele, South; South Ndebele
['nd', 'nde'], // Ndebele, North; North Ndebele
['ng', 'ndo'], // Ndonga
['', 'nds'], // Low German; Low Saxon; German, Low; Saxon, Low
['ne', 'nep'], // Nepali
['', 'new'], // Nepal Bhasa; Newari
['', 'nia'], // Nias
['', 'nic'], // Niger-Kordofanian languages
['', 'niu'], // Niuean
['nn', 'nno'], // Norwegian Nynorsk; Nynorsk, Norwegian
['nb', 'nob'], // Bokmål, Norwegian; Norwegian Bokmål
['', 'nog'], // Nogai
['', 'non'], // Norse, Old
['no', 'nor'], // Norwegian
['', 'nqo'], // N'Ko
['', 'nso'], // Pedi; Sepedi; Northern Sotho
['', 'nub'], // Nubian languages
['', 'nwc'], // Classical Newari; Old Newari; Classical Nepal Bhasa
['ny', 'nya'], // Chichewa; Chewa; Nyanja
['', 'nym'], // Nyamwezi
['', 'nyn'], // Nyankole
['', 'nyo'], // Nyoro
['', 'nzi'], // Nzima
['oc', 'oci'], // Occitan (post 1500)
['oj', 'oji'], // Ojibwa
['or', 'ori'], // Oriya
['om', 'orm'], // Oromo
['', 'osa'], // Osage
['os', 'oss'], // Ossetian; Ossetic
['', 'ota'], // Turkish, Ottoman (1500-1928)
['', 'oto'], // Otomian languages
['', 'paa'], // Papuan languages
['', 'pag'], // Pangasinan
['', 'pal'], // Pahlavi
['', 'pam'], // Pampanga; Kapampangan
['pa', 'pan'], // Panjabi; Punjabi
['', 'pap'], // Papiamento
['', 'pau'], // Palauan
['', 'peo'], // Persian, Old (ca.600-400 B.C.)
['fa', 'per'], // Persian
['', 'phi'], // Philippine languages
['', 'phn'], // Phoenician
['pi', 'pli'], // Pali
['pl', 'pol'], // Polish
['', 'pon'], // Pohnpeian
['pt', 'por'], // Portuguese
['', 'pra'], // Prakrit languages
['', 'pro'], // Provençal, Old (to 1500); Occitan, Old (to 1500)
['ps', 'pus'], // Pushto; Pashto
['', 'qaaqtz'], // Reserved for local use
['qu', 'que'], // Quechua
['', 'raj'], // Rajasthani
['', 'rap'], // Rapanui
['', 'rar'], // Rarotongan; Cook Islands Maori
['', 'roa'], // Romance languages
['rm', 'roh'], // Romansh
['', 'rom'], // Romany
['ro', 'rum'], // Romanian; Moldavian; Moldovan
['rn', 'run'], // Rundi
['', 'rup'], // Aromanian; Arumanian; Macedo-Romanian
['ru', 'rus'], // Russian
['', 'sad'], // Sandawe
['sg', 'sag'], // Sango
['', 'sah'], // Yakut
['', 'sai'], // South American Indian languages
['', 'sal'], // Salishan languages
['', 'sam'], // Samaritan Aramaic
['sa', 'san'], // Sanskrit
['', 'sas'], // Sasak
['', 'sat'], // Santali
['', 'scn'], // Sicilian
['', 'sco'], // Scots
['', 'sel'], // Selkup
['', 'sem'], // Semitic languages
['', 'sga'], // Irish, Old (to 900)
['', 'sgn'], // Sign Languages
['', 'shn'], // Shan
['', 'sid'], // Sidamo
['si', 'sin'], // Sinhala; Sinhalese
['', 'sio'], // Siouan languages
['', 'sit'], // Sino-Tibetan languages
['', 'sla'], // Slavic languages
['sk', 'slo'], // Slovak
['sl', 'slv'], // Slovenian
['', 'sma'], // Southern Sami
['se', 'sme'], // Northern Sami
['', 'smi'], // Sami languages
['', 'smj'], // Lule Sami
['', 'smn'], // Inari Sami
['sm', 'smo'], // Samoan
['', 'sms'], // Skolt Sami
['sn', 'sna'], // Shona
['sd', 'snd'], // Sindhi
['', 'snk'], // Soninke
['', 'sog'], // Sogdian
['so', 'som'], // Somali
['', 'son'], // Songhai languages
['st', 'sot'], // Sotho, Southern
['es', 'spa'], // Spanish; Castilian
['sc', 'srd'], // Sardinian
['', 'srn'], // Sranan Tongo
['sr', 'srp'], // Serbian
['', 'srr'], // Serer
['', 'ssa'], // Nilo-Saharan languages
['ss', 'ssw'], // Swati
['', 'suk'], // Sukuma
['su', 'sun'], // Sundanese
['', 'sus'], // Susu
['', 'sux'], // Sumerian
['sw', 'swa'], // Swahili
['sv', 'swe'], // Swedish
['', 'syc'], // Classical Syriac
['', 'syr'], // Syriac
['ty', 'tah'], // Tahitian
['', 'tai'], // Tai languages
['ta', 'tam'], // Tamil
['tt', 'tat'], // Tatar
['te', 'tel'], // Telugu
['', 'tem'], // Timne
['', 'ter'], // Tereno
['', 'tet'], // Tetum
['tg', 'tgk'], // Tajik
['tl', 'tgl'], // Tagalog
['th', 'tha'], // Thai
['bo', 'tib'], // Tibetan
['', 'tig'], // Tigre
['ti', 'tir'], // Tigrinya
['', 'tiv'], // Tiv
['', 'tkl'], // Tokelau
['', 'tlh'], // Klingon; tlhIngan-Hol
['', 'tli'], // Tlingit
['', 'tmh'], // Tamashek
['', 'tog'], // Tonga (Nyasa)
['to', 'ton'], // Tonga (Tonga Islands)
['', 'tpi'], // Tok Pisin
['', 'tsi'], // Tsimshian
['tn', 'tsn'], // Tswana
['ts', 'tso'], // Tsonga
['tk', 'tuk'], // Turkmen
['', 'tum'], // Tumbuka
['', 'tup'], // Tupi languages
['tr', 'tur'], // Turkish
['', 'tut'], // Altaic languages
['', 'tvl'], // Tuvalu
['tw', 'twi'], // Twi
['', 'tyv'], // Tuvinian
['', 'udm'], // Udmurt
['', 'uga'], // Ugaritic
['ug', 'uig'], // Uighur; Uyghur
['uk', 'ukr'], // Ukrainian
['', 'umb'], // Umbundu
['', 'und'], // Undetermined
['ur', 'urd'], // Urdu
['uz', 'uzb'], // Uzbek
['', 'vai'], // Vai
['ve', 'ven'], // Venda
['vi', 'vie'], // Vietnamese
['vo', 'vol'], // Volapük
['', 'vot'], // Votic
['', 'wak'], // Wakashan languages
['', 'wal'], // Wolaitta; Wolaytta
['', 'war'], // Waray
['', 'was'], // Washo
['cy', 'wel'], // Welsh
['', 'wen'], // Sorbian languages
['wa', 'wln'], // Walloon
['wo', 'wol'], // Wolof
['', 'xal'], // Kalmyk; Oirat
['xh', 'xho'], // Xhosa
['', 'yao'], // Yao
['', 'yap'], // Yapese
['yi', 'yid'], // Yiddish
['yo', 'yor'], // Yoruba
['', 'ypk'], // Yupik languages
['', 'zap'], // Zapotec
['', 'zbl'], // Blissymbols; Blissymbolics; Bliss
['', 'zen'], // Zenaga
['', 'zgh'], // Standard Moroccan Tamazight
['za', 'zha'], // Zhuang; Chuang
['', 'znd'], // Zande languages
['zu', 'zul'], // Zulu
['', 'zun'], // Zuni
['', 'zxx'], // No linguistic content; Not applicable
// phpcs:enable Squiz.PHP.CommentedOutCode.Found
];
/**
* @throws ComponentException
*/
public function __construct(string $set = self::ALPHA2)
{
$index = array_search($set, self::AVAILABLE_SETS, true);
if ($index === false) {
throw new ComponentException(sprintf('"%s" is not a valid language set for ISO 639', $set));
}
parent::__construct(new In($this->getHaystack($index), true), ['set' => $set]);
}
/**
* @return string[]
*/
private function getHaystack(int $index): array
{
return array_filter(array_column(self::LANGUAGE_CODES, $index));
}
}