use strict;
use warnings;
+use Encode;
use GetOptions;
sub convertlang($);
sub extractlist($$$); # my ($l, $islang, $txt, $rres) = @_;
-sub getIndex($$);
+sub getIndexes($$);
sub getVal($$$); # my ($l, $txtval, $txtlang) = @_;
sub getproperties($$$$);
sub ismathfont($$);
sub correctstyle($);
+sub decimalUnicode($);
+sub contains($$);
+sub sprintIntervalls($);
# Following fields for a parameter can be defined:
# fieldname: Name of entry in %options
# alias: reference to a list of aliases e.g. ["alias1", "alias2", ... ]
# listsep: Separator for multiple data
# comment: Parameter description
-my %optionsDef = (
+my @optionsDef = (
# help + verbose already handled in 'GetOptions'
- "l" => {fieldname => "Lang",
- type => "=s", alias=>["lang"],
- comment => "Comma separated list of desired languages"},
- "math" => {fieldname => "Math",
- comment => "Select fonts probably containing math glyphs"},
- "n" => {fieldname => "FontName", listsep => ',',
- type => "=s", alias => ["name"],
- comment => "Select font-names matching these (comma separated) regexes"},
- "nn" => {fieldname => "NFontName",
- type => "=s", listsep => ',',
- comment => "Select font-names NOT matching these (comma separated) regexes"},
- "pl" => {fieldname => "PrintLangs", alias => ["printlangs"],
- comment => "Print supported languages"},
- "pf" => {fieldname => "PrintFiles", alias => ["printfiles"],
- comment => "Print font file names"},
- "p" => {fieldname => "Property",
- type => "=s", listsep => ',',
- comment => "Select fonts with properties matching these (comma separated) regexes"},
- "np" => {fieldname => "NProperty",
- type => "=s", listsep => ',',
- comment => "Select fonts with properties NOT matching these (comma separated) regexes"},
- "pp" => {fieldname => "PrintProperties", alias => ["printproperties"],
- comment => "Print properties from weight, slant and width"},
- "s" => {fieldname => "Scripts",
- type => "=s", listsep => ',',
- comment => "Select fonts with scripts matching these (comma separated) regexes"},
- "ns" => {fieldname => "NScripts",
- type => "=s", listsep => ',',
- comment => "Select fonts with scripts NOT matching these (comma separated) regexes"},
- "ps" => {fieldname => "PrintScripts", alias => ["printscripts"],
- comment => "Print supported scripts"},
- "pw" => {fieldname => "PrintWarnings",
- comment => "Print warnings about discarded/overwritten fonts, conflicting styles"},
+ ["n",
+ {fieldname => "FontName", listsep => ',',
+ type => "=s", alias => ["name"],
+ comment => "Select font-names matching these (comma separated) regexes"},],
+ ["nn",
+ {fieldname => "NFontName",
+ type => "=s", listsep => ',',
+ comment => "Select font-names NOT matching these (comma separated) regexes"},],
+ ["p",
+ {fieldname => "Property",
+ type => "=s", listsep => ',',
+ comment => "Select fonts with properties matching these (comma separated) regexes"},],
+ ["np",
+ {fieldname => "NProperty",
+ type => "=s", listsep => ',',
+ comment => "Select fonts with properties NOT matching these (comma separated) regexes"},],
+ ["s",
+ {fieldname => "Scripts",
+ type => "=s", listsep => ',',
+ comment => "Select fonts with scripts matching these (comma separated) regexes"},],
+ ["ns",
+ {fieldname => "NScripts",
+ type => "=s", listsep => ',',
+ comment => "Select fonts with scripts NOT matching these (comma separated) regexes"},],
+ ["math",
+ {fieldname => "Math",
+ comment => "Select fonts probably containing math glyphs"},],
+ ["c",
+ {fieldname => "Contains", alias => ["contains"],
+ type => "=s", listsep => ',',
+ comment => "Select fonts containing all these (possibly comma separated) glyphs",
+ comment2 => "____example: -c=\"0-9,u+32-u+x7f\"",}],
+ ["nc",
+ {fieldname => "NContains",
+ type => "=s", listsep => ',',
+ comment => "Select fonts NOT containing any of these (possibly comma separated) glyphs",
+ comment2 => "____example: --nc=\"0-9,u+32-u+x7f\"",}],
+ ["l",
+ {fieldname => "Lang",
+ type => "=s", alias=>["lang"],
+ comment => "Comma separated list of desired languages"},],
+ ["pc",
+ {fieldname => "PrintCharset", alias => ["printcharset"],
+ comment => "Print intervals of supported unicode character values"},],
+ ["pl",
+ {fieldname => "PrintLangs", alias => ["printlangs"],
+ comment => "Print supported languages"},],
+ ["pp",
+ {fieldname => "PrintProperties", alias => ["printproperties"],
+ comment => "Print properties from weight, slant and width"},],
+ ["ps",
+ {fieldname => "PrintScripts", alias => ["printscripts"],
+ comment => "Print supported scripts"},],
+ ["pf",
+ {fieldname => "PrintFiles", alias => ["printfiles"],
+ comment => "Print font file names"},],
+ ["pw",
+ {fieldname => "PrintWarnings",
+ comment => "Print warnings about discarded/overwritten fonts, conflicting styles"},],
);
-my %options = %{&handleOptions(\%optionsDef)};
+my %options = %{&handleOptions(\@optionsDef)};
$options{Lang} = "" if (! defined($options{Lang}));
$lg = &convertlang($lg);
}
+for my $charFld ("Contains", "NContains") {
+ if (defined($options{$charFld})) {
+ my %glyphs = (); # To ignore duplicates
+ for my $a1 (@{$options{$charFld}}) {
+ for my $e (decimalUnicode($a1)) {
+ $glyphs{$e} = 1;
+ }
+ }
+ # create intervalls
+ my @glyphs = sort {$a <=> $b;} keys %glyphs;
+
+ # $options{$charFld} no longer needed, so use it for unicode-point intervalls
+ $options{$charFld} = [];
+ my ($first, $last) = (undef, undef);
+ for my $i (@glyphs) {
+ if (! defined($last)) {
+ $first = $i;
+ $last = $i;
+ next;
+ }
+ if ($i == $last+1) {
+ $last = $i;
+ next;
+ }
+ push(@{$options{$charFld}}, [$first, $last]);
+ $first = $i;
+ $last = $i;
+ }
+ if (defined($last)) {
+ push(@{$options{$charFld}}, [$first, $last]);
+ }
+ if (exists($options{verbose})) {
+ if ($charFld eq "Contains") {
+ print "Checking for unicode-points: " . &sprintIntervalls($options{$charFld}) . "\n";
+ }
+ else {
+ print "Ignore if matching unicode-points: " . &sprintIntervalls($options{$charFld}) . "\n";
+ }
+ }
+ }
+}
+
my $cmd = "fc-list";
if (defined($langs[0])) {
$cmd .= " :lang=" . join(',', @langs);
}
-my $format = "foundry=\"%{foundry}\" postscriptname=\"%{postscriptname}\" fn=\"%{fullname}\" fnl=\"%{fullnamelang}\" family=\"%{family}\" flang=\"%{familylang}\" style=\"%{style}\" stylelang=\"%{stylelang}\"";
+my $format = "foundry=\"%{foundry}\"" .
+ " postscriptname=\"%{postscriptname}\"" .
+ " fn=\"%{fullname}\" fnl=\"%{fullnamelang}\"" .
+ " family=\"%{family}\" flang=\"%{familylang}\" " .
+ " style=\"%{style}\" stylelang=\"%{stylelang}\"";
-if (exists($options{PrintScripts}) || defined($options{Scripts}) || defined($options{NSpripts}) || exists($options{Math})) {
+if (exists($options{PrintScripts}) || defined($options{Scripts}) || defined($options{NScripts}) || exists($options{Math})) {
$format .= " script=\"%{capability}\"";
}
if (exists($options{PrintLangs}) || defined($langs[0])) {
$format .= " lang=\"%{lang}\"";
}
-if (exists($options{PrintProperties}) || defined($options{Property})) {
+if (exists($options{PrintProperties}) || defined($options{Property}) || defined($options{NProperty})) {
$format .= " weight=%{weight} slant=%{slant} width=%{width} spacing=%{spacing}";
}
+if (defined($options{Contains}) || defined($options{NContains}) || exists($options{PrintCharset})) {
+ $format .= " charset=\"%{charset}\"";
+}
$format .= " file=\"%{file}\" abcd\\n";
$cmd .= " -f '$format'";
#print "$cmd\n";
# Dummy internal map
0 => "Serif",
100 => "Sans",
+ 110 => "Script",
+ 120 => "Fraktur",
+ 130 => "Fancy",
+ 140 => "Initials",
+ 200 => "Symbol",
"default" => "Serif",
);
200 => "Bold",
205 => "Extrabold",
210 => "Black",
+ 215 => "ExtraBlack",
);
my %slants = (
# list of regexes for known sans serif fonts
my %sansFonts = (
- "a" => qr/^(arial|andika|angostura|anonymous|arab|aroania|arimo|asap)/i,
- "b" => qr/^(baekmuk|bebas|berenika|beteckna|beuron|blue)/i,
- "c" => qr/^(cabin|caliban|cantarell|carbon|carlito|chandas|chivo|cmu bright|comfortaa|comic|cortoba|cousine|cuprum|cwtex(hei|yen)|cyklop|cypro)/i,
+ "value" => 100, # Sans serif
+ "a" => qr/^(aharoni|arial|andika|angostura|anonymous|arab|aroania|arimo|asap)/i,
+ "b" => qr/^b(aekmuk|ebas|erenika|eteckna|euron|lue)/i,
+ "c" => qr/^c(abin|aliban|antarell|arbon|arlito|handas|hivo|mu bright|omfortaa|omi[cx]|oolvetica|ortoba|ousine|uprum|wtex(hei|yen)|yklop|ypro)/i,
"d" => qr/^(d2coding|dimnah|dosis|dyuthi)/i,
"e" => qr/^(electron|engebrechtre)/i,
"f" => qr/^(fandolhei|fetamont|fira|font awesome 5|forgotten)/i,
"h" => qr/^(hack|hani|haramain|harano|harmattan|hor\b)/i,
"i" => qr/^(ibm plex|ikarius|inconsolata|induni.?h|iwona)/i,
"j" => qr/^(jara|jura)/i,
- "k" => qr/^(kalimati|kanji|karla|kayrawan|kenyan|keraleeyam|khalid|khmer [or]|kiloji|klaudia|komatu|kurier)/i,
- "l" => qr/^(laksaman|larabie|lato|league|lexend|lexigulim|libel|liberation|libre franklin|libris|linux biolinum|lobster|logix|lohit|loma)/i,
- "m" => qr/^(m\+ |manchu|manjari|marcellus|mashq|meera|metal|migmix|migu|mikachan|mintspirit|mona|monlam|mono(fonto|id|isome|noki)|montserrat|motoyal|mukti|musica)/i,
+ "k" => qr/^(kalimati|kanji|karla|karma|kayrawan|kenyan|keraleeyam|khalid|khmer [or]|kiloji|klaudia|ko[mn]atu|kurier|kustom)/i,
+ "l" => qr/^l(aksaman|arabie|ato|eague|exend|exigulim|ibel|iberation|ibre franklin|ibris|inux biolinum|obster|ogix|ohit|oma)/i,
+ "m" => qr/^m(\+ |anchu|anjari|arcellus|ashq|eera|etal|igmix|igu|ikachan|intspirit|iriam ?clm|ona|onlam|ono(fonto|id|isome|noki)|ontserrat|otoyal|ukti|usica)/i,
"n" => qr/^(nachlieli|nada|nafees|nagham|nanum(barunpen|square)|nice)/i,
"o" => qr/^(ocr|okolaks|opendyslexic|ostorah|ouhud|over|oxygen)/i,
- "p" => qr/^(padauk|padmaa|pagul|paktype|pakenham|palladio|petra|phetsarath|play\b|poiret|port\b|primer\b|prociono|pt\b|purisa)/i,
- "q" => qr/^(qt(ancient|helvet|avanti|eratype|eurotype|floraline|florencia|frank|fritz|future|greece|howard|letter|optimum|pandora)|quercus)/i,
+ "p" => qr/^(padauk|pagul|paktype|pakenham|palladio|petra|phetsarath|play\b|poiret|port\b|primer\b|prociono|pt\b|purisa)/i,
+ "q" => qr/^(qt(ancient|helvet|avanti|doghaus|eratype|eurotype|floraline|frank|fritz|future|greece|howard|letter|optimum)|quercus)/i,
"r" => qr/^(rachana|radio\b|raleway|ricty|roboto|rosario)/i,
"s" => qr/^(salem|samanata|sawasdee|shado|sharja|simple|sophia|soul|source|switzera)/i,
"t" => qr/^(tarablus|teen|texgyre(adventor|heros)|tiresias|trebuchet|tscu|tuffy)/i,
"y" => qr/^(yanone)/i,
"z" => qr/^(zekton|zero)/i,
);
+my %scriptFonts = (
+ "value" => 110, # Script
+ "c" => qr/^(chancery)/i,
+ "d" => qr/^(dancing)/i,
+ "e" => qr/^(elegante)/i,
+ "j" => qr/^jsmath.?(rsfs)/i,
+ "k" => qr/^(kaushan|karumbi|kristi)/i,
+ "m" => qr/^(mathjax_script|miama)/i,
+ "n" => qr/^(nanum (brush|pen) script)/i,
+ "q" => qr/^qt(arabian|boulevard|brushstroke|chancery|coronation|florencia|handwriting|linostroke|merry|pandora|slogan)/i,
+ "r" => qr/^((romande.*|ruf)script|rsfs)/i,
+ "u" => qr/^(un ?pilgi|urw ?chancery)/i,
+);
+
+my %fraktFonts = (
+ "value" => 120, # Fraktur
+ "e" => qr/^eufm/i,
+ "j" => qr/^(jsmath.?euf)/i,
+ "m" => qr/^(missaali)/i,
+ "o" => qr/^(oldania)/i,
+ "q" => qr/^qt(blackforest|cloisteredmonk|dublinirish|fraktur|heidelbergtype|(lino|london)scroll)/i,
+);
+
+my %fancyFonts = (
+ "value" => 130, # Fancy
+ "c" => qr/^(cretino)/i,
+ "d" => qr/^dseg/i,
+ "f" => qr/^frederika/i,
+ "g" => qr/^(gfs.?theo)/i,
+ "k" => qr/^keter|kicking|kredit|kouzan|kerkis calligraphic/i,
+);
+
+my %initialFonts = (
+ "value" => 140, # Initials
+ "e" => qr/^(eb.?garamond.?init)/i,
+ "l" => qr/^(libertinus|linux).*initials/i,
+ "y" => qr/^(yinit)/i,
+);
+
+my %symbolFonts = (
+ "value" => 200, # Symbol
+ "a" => qr/^(academicons)/i,
+ "c" => qr/^(caladings|ccicons|chess|cmsy|cmex)/i,
+ "d" => qr/^(dingbats|drmsym|d05)/i,
+ "e" => qr/^(elusiveicons|emoji|esint|euterpe)/i,
+ "f" => qr/^(fandol.?brail|fdsymbol|fourierorns|font(awesome|ello|.?mfizz))/i,
+ "h" => qr/^(hots)/i,
+ "j" => qr/^jsmath.?(msam|cmsy|masm|msbm|wasy|cmex|stmary)/i,
+ "m" => qr/^(marvosym|material|msam|msbm)/i,
+ "n" => qr/^(noto.*emoji)/i,
+ "o" => qr/^(octicons)/i,
+ "q" => qr/^(qtdingbits)/i,
+ "s" => qr/^stmary/i,
+ "t" => qr/^(typicons|twemoji)/i,
+ "w" => qr/^(webdings|wasy)/i,
+);
+
if (open(FI, "$cmd |")) {
NXTLINE: while (my $l = <FI>) {
chomp($l);
$nexttype++;
}
}
- my $nfound = 0;
my %usedlangs = ();
if ($l =~ / lang=\"([^\"]+)\"/) {
my @ll = split(/\|/, $1);
for my $lang (@langs) {
next NXTLINE if (! defined($usedlangs{$lang}));
}
- next if ($nfound);
my $style = &getVal($l, "style", "stylelang");
$style =~ s/^\\040//;
my $fullname = &getVal($l, "fn", "fnl");
next NXTLINE if ($fontname !~ /$fn/i);
}
}
+ my @charlist = ();
+ if (defined($options{Contains}) || defined($options{NContains}) || exists($options{PrintCharset})) {
+ if ($l =~ / charset=\"([^\"]+)\"/) {
+ my @list = split(/\s+/, $1);
+ for my $e (@list) {
+ my ($l, $h) = split('-', $e);
+ $h = $l if (! defined($h));
+ push(@charlist, [hex($l), hex($h)]);
+ }
+ }
+ if (defined($options{Contains})) {
+ for my $g (@{$options{Contains}}) {
+ next NXTLINE if (! contains($g, \@charlist));
+ }
+ }
+ if (defined($options{NContains})) {
+ for my $g (@{$options{NContains}}) {
+ # Ignore if ANY char exist in @charlist
+ for (my $i = $g->[0]; $i <= $g->[1]; $i++) {
+ next NXTLINE if (contains([$i,$i], \@charlist));
+ }
+ }
+ }
+ }
my $props = "";
my @errors = ();
if (exists($options{PrintProperties}) || defined($options{Property}) || defined($options{NProperty})) {
if (exists($options{PrintLangs})) {
$props .= '(' . join(',', sort keys %usedlangs) . ')';
}
+ if (exists($options{PrintCharset})) {
+ $props .= '(' . &sprintIntervalls(\@charlist) . ')';
+ }
if (exists($options{PrintScripts}) || defined($options{Scripts}) || defined($options{NScripts}) || exists($options{Math})) {
my @scripts = ();
my $scripts = "";
return($ftypes{100}); # Sans Serif
}
}
+ elsif ($style eq "PatchSans" && $fontname eq "font") {
+ return($ftypes{200}); # Symbol
+ }
elsif ($fontname =~ /serif|times|mincho|batang/i) {
if ($fontname =~ /good times/i) {
return($ftypes{100}); # Sans Serif
}
- else {
+ elsif ($fontname !~ /initials/i) {
return($ftypes{0}); # Serif
}
}
# Now check for fonts without a hint in font name
- if ($fontname =~ /([a-z])/i) {
+ if ($fontname =~ /^([a-z])/i) {
my $key = lc($1);
- if (defined($sansFonts{$key})) {
- if ($fontname =~ $sansFonts{$key}) {
- return($ftypes{100}); # Sans Serif
+ for my $rFonts (\%sansFonts, \%scriptFonts, \%fraktFonts, \%fancyFonts, \%initialFonts, \%symbolFonts) {
+ if (defined($rFonts->{$key})) {
+ if ($fontname =~ $rFonts->{$key}) {
+ return($ftypes{$rFonts->{"value"}});
+ }
}
}
}
{
my ($fontname, $style) = @_;
my $result = undef;
- for my $key (keys %weights) {
- next if ($key !~ /^\d+$/);
- my $val = $weights{$key};
- for my $info ($style, $fontname) {
+ for my $info ($style, $fontname) {
+ for my $key (keys %weights) {
+ next if ($key !~ /^\d+$/);
+ my $val = $weights{$key};
if ($info =~ /\b$val\b/i) {
- if ($val eq "Regular") {
- $result = $val; # It may refer to width
- }
- else {
- return($val);
- }
+ return($val);
}
}
}
return($spacings{$key});
}
}
- if ("$fontname $style" =~ /(mono|typewriter|cursor|fixed)\b/i) {
+ if ("$fontname $style" =~ /(\bmono\b|luximono|typewriter|cursor|fixed)\b/i) {
return($spacings{100}); # Mono
}
else {
my $val1 = $rget->($newfam, $newstyle);
my $val;
if (defined($val2) && defined($val1) && ($val2 ne $val1)) {
- push(@{$rerrors}, "Fontname($fontname),Style($style): Values for $txt ($val1 != $val2) differ, selecting internal $txt($val2)");
- $val = $val2;
+ if (($txt =~/^(weight|slant)$/) && ($newstyle =~ /$val1/)){
+ # style overrides weight and slant
+ push(@{$rerrors}, "Fontname($fontname),Style($style): Values for $txt ($val1 != $val2) differ, pick $val1 from style");
+ $val = $val1;
+ }
+ elsif ($newfam =~ /$val1/) {
+ push(@{$rerrors}, "Fontname($fontname),Style($style): Values for $txt ($val1 != $val2) differ, pick $val1 from fontname");
+ $val = $val1;
+ }
+ else {
+ push(@{$rerrors}, "Fontname($fontname),Style($style): Values for $txt ($val1 != $val2) differ, pick $val2 from $txt-property");
+ $val = $val2;
+ }
}
elsif (! defined($val2)) {
$val = $val1;
$style =~ s/\b(SC|Small(caps(alt)?)?)\b/SmallCaps/i;
$style =~ s/w3 mono/Dual/i;
$style =~ s/Regul[ea]r/Regular/i;
+ $style =~ s/Megablack/ExtraBlack/i;
$style =~ s/ +/ /g;
return($style);
}
+
+# return list of unicode values of the input string
+#Allow input of intervals (e.g. 'a-z')
+sub decimalUnicode($)
+{
+ my ($a) = @_;
+ my @res = ();
+ # Convert to unicode chars first
+ while ($a =~ /^(.*)u\+(0?x[\da-f]+|\d+)(.*)$/i) {
+ my ($prev, $d, $post) = ($1, $2, $3);
+ if ($d =~ /^0?x(.+)$/) {
+ $d = hex($1);
+ }
+ my $chr = encode('utf-8', chr($d));
+ $a = $prev . $chr . $post;
+ }
+ # $a is now a string of unicode chars
+ my $u = decode('utf-8', $a);
+ my @a = split(//, $u);
+ my $interval = 0;
+ my $start = undef;
+ for my $x (@a) {
+ if ($x eq '-') { # Interval
+ $interval = 1;
+ next;
+ }
+ if ($interval && defined($start)) {
+ if (ord($x) < $start) {
+ for (my $i = $start - 1; $i >= ord($x); $i--) {
+ push(@res, $i);
+ }
+ }
+ else {
+ for (my $i = $start + 1; $i <= ord($x); $i++) {
+ push(@res, $i);
+ }
+ }
+ $start = undef;
+ }
+ else {
+ $start = ord($x);
+ push(@res, $start);
+ }
+ $interval = 0;
+ }
+ return(@res);
+}
+
+
+# check if the glyph-values in interval @{$ri} are contained
+# in one of the (sorted) intervals
+sub contains($$)
+{
+ # ok if
+ # ...re0..........re1...
+ # ......start..end......
+ my ($ri, $rList) = @_;
+ my $start = $ri->[0];
+ my $end = $ri->[1];
+
+ for my $re (@{$rList}) {
+ next if ($re->[1] < $start);
+ # now we found a possible matching interval
+ return 1 if (($start >= $re->[0]) && ($end <= $re->[1]));
+ return 0;
+ }
+ return 0;
+}
+
+sub sprintIntervalls($)
+{
+ my ($rList) = @_;
+ my @out = ();
+ for my $rE (@{$rList}) {
+ if ($rE->[0] != $rE->[1]) {
+ push(@out, $rE->[0] . '-' . $rE->[1]);
+ }
+ else {
+ push(@out, $rE->[0]);
+ }
+ }
+ return join(',', @out);
+}