package Font::TTF::Cmap;
=head1 NAME
Font::TTF::Cmap - Character map table
Looks after the character map. For ease of use, the actual cmap is held in
a hash against codepoint. Thus for a given table:
$gid = $font->{'cmap'}{'Tables'}[0]{'val'}{$code};
Note that C<$code> should be a true value (0x1234) rather than a string representation.
The instance variables listed here are not preceded by a space due to their
emulating structural information in the font.
=over 4
=item Num
Number of subtables in this table
=item Tables
An array of subtables ([0..Num-1])
Each subtable also has its own instance variables which are, again, not
preceded by a space.
=over 4
=item Platform
The platform number for this subtable
=item Encoding
The encoding number for this subtable
=item Format
Gives the stored format of this subtable
=item Ver
Gives the version (or language) information for this subtable
=item val
A hash keyed by the codepoint value (not a string) storing the glyph id
The following cmap options are controlled by instance variables that start with a space:
=over 4
=item allowholes
By default, when generating format 4 cmap subtables character codes that point to glyph zero
(normally called .notdef) are not included in the subtable. In some cases including some of these
character codes can result in a smaller format 4 subtable. To enable this behavior, set allowholes
to non-zero.
=head1 METHODS
use strict;
use vars qw(@ISA);
use Font::TTF::Table;
use Font::TTF::Utils;
@ISA = qw(Font::TTF::Table);
=head2 $t->read
Reads the cmap into memory. Format 4 subtables read the whole subtable and
fill in the segmented array accordingly.
sub read
my ($self, $keepzeros) = @_;
$self->SUPER::read or return $self;
my ($dat, $i, $j, $k, $id, @ids, $s);
my ($start, $end, $range, $delta, $form, $len, $num, $ver, $sg);
my ($fh) = $self->{' INFILE'};
$fh->read($dat, 4);
$self->{'Num'} = unpack("x2n", $dat);
$self->{'Tables'} = [];
for ($i = 0; $i < $self->{'Num'}; $i++)
$s = {};
$fh->read($dat, 8);
($s->{'Platform'}, $s->{'Encoding'}, $s->{'LOC'}) = (unpack("nnN", $dat));
$s->{'LOC'} += $self->{' OFFSET'};
push(@{$self->{'Tables'}}, $s);
for ($i = 0; $i < $self->{'Num'}; $i++)
$s = $self->{'Tables'}[$i];
$fh->seek($s->{'LOC'}, 0);
$fh->read($dat, 2);
$form = unpack("n", $dat);
$s->{'Format'} = $form;
if ($form == 0)
my $j = 0;
$fh->read($dat, 4);
($len, $s->{'Ver'}) = unpack('n2', $dat);
$fh->read($dat, 256);
$s->{'val'} = {map {$j++; ($_ ? ($j - 1, $_) : ())} unpack("C*", $dat)};
} elsif ($form == 6)
my ($start, $ecount);
$fh->read($dat, 8);
($len, $s->{'Ver'}, $start, $ecount) = unpack('n4', $dat);
$fh->read($dat, $ecount << 1);
$s->{'val'} = {map {$start++; ($_ ? ($start - 1, $_) : ())} unpack("n*", $dat)};
} elsif ($form == 2) # Contributed by Huw Rogers
$fh->read($dat, 4);
($len, $s->{'Ver'}) = unpack('n2', $dat);
$fh->read($dat, 512);
my ($j, $k, $l, $m, $n, @subHeaderKeys, @subHeaders, $subHeader);
$n = 1;
for ($j = 0; $j < 256; $j++) {
my $k = unpack('@'.($j<<1).'n', $dat)>>3;
$n = $k + 1 if $k >= $n;
$subHeaders[$subHeaderKeys[$j] = $k] ||= [ ];
$fh->read($dat, $n<<3); # read subHeaders[]
for ($k = 0; $k < $n; $k++) {
$subHeader = $subHeaders[$k];
$l = $k<<3;
@$subHeader = unpack('@'.$l.'n4', $dat);
$subHeader->[2] = unpack('s', pack('S', $subHeader->[2]))
if $subHeader->[2] & 0x8000; # idDelta
$subHeader->[3] =
($subHeader->[3] - (($n - $k)<<3) + 6)>>1; # idRangeOffset
$fh->read($dat, $len - ($n<<3) - 518); # glyphIndexArray[]
for ($j = 0; $j < 256; $j++) {
$k = $subHeaderKeys[$j];
$subHeader = $subHeaders[$k];
unless ($k) {
$l = $j - $subHeader->[0];
if ($l >= 0 && $l < $subHeader->[1]) {
$m = unpack('@'.(($l + $subHeader->[3])<<1).'n', $dat);
$m += $subHeader->[2] if $m;
$s->{'val'}{$j} = $m;
} else {
for ($l = 0; $l < $subHeader->[1]; $l++) {
$m = unpack('@'.(($l + $subHeader->[3])<<1).'n', $dat);
$m += $subHeader->[2] if $m;
$s->{'val'}{($j<<8) + $l + $subHeader->[0]} = $m;
} elsif ($form == 4)
$fh->read($dat, 12);
($len, $s->{'Ver'}, $num) = unpack('n3', $dat);
$num >>= 1;
$fh->read($dat, $len - 14);
for ($j = 0; $j < $num; $j++)
$end = unpack("n", substr($dat, $j << 1, 2));
$start = unpack("n", substr($dat, ($j << 1) + ($num << 1) + 2, 2));
$delta = unpack("n", substr($dat, ($j << 1) + ($num << 2) + 2, 2));
$delta -= 65536 if $delta > 32767;
$range = unpack("n", substr($dat, ($j << 1) + $num * 6 + 2, 2));
for ($k = $start; $k <= $end; $k++)
if ($range == 0 || $range == 65535) # support the buggy FOG with its range=65535 for final segment
{ $id = $k + $delta; }
{ $id = unpack("n", substr($dat, ($j << 1) + $num * 6 +
2 + ($k - $start) * 2 + $range, 2)) + $delta; }
$id -= 65536 if $id >= 65536;
$s->{'val'}{$k} = $id if ($id || $keepzeros);
} elsif ($form == 8 || $form == 12 || $form == 13)
$fh->read($dat, 10);
($len, $s->{'Ver'}) = unpack('x2N2', $dat);
if ($form == 8)
$fh->read($dat, 8196);
$num = unpack("N", substr($dat, 8192, 4)); # don't need the map
} else
$fh->read($dat, 4);
$num = unpack("N", $dat);
$fh->read($dat, 12 * $num);
for ($j = 0; $j < $num; $j++)
($start, $end, $sg) = unpack("N3", substr($dat, $j * 12, 12));
for ($k = $start; $k <= $end; $k++)
{ $s->{'val'}{$k} = $form == 13 ? $sg : $sg++; }
} elsif ($form == 10)
$fh->read($dat, 18);
($len, $s->{'Ver'}, $start, $num) = unpack('x2N4', $dat);
$fh->read($dat, $num << 1);
for ($j = 0; $j < $num; $j++)
{ $s->{'val'}{$start + $j} = unpack("n", substr($dat, $j << 1, 2)); }
=head2 $t->ms_lookup($uni)
Finds a Unicode table, giving preference to the MS one, and looks up the given
Unicode codepoint in it to find the glyph id.
sub ms_lookup
my ($self, $uni) = @_;
$self->find_ms || return undef unless (defined $self->{' mstable'});
return $self->{' mstable'}{'val'}{$uni};
=head2 $t->find_ms
Finds the a Unicode table, giving preference to the Microsoft one, and sets the C<mstable> instance variable
to it if found. Returns the table it finds.
sub find_ms
my ($self) = @_;
my ($i, $s, $alt, $found);
return $self->{' mstable'} if defined $self->{' mstable'};
for ($i = 0; $i < $self->{'Num'}; $i++)
$s = $self->{'Tables'}[$i];
if ($s->{'Platform'} == 3)
$self->{' mstable'} = $s;
return $s if ($s->{'Encoding'} == 10);
$found = 1 if ($s->{'Encoding'} == 1);
} elsif ($s->{'Platform'} == 0 || ($s->{'Platform'} == 2 && $s->{'Encoding'} == 1))
{ $alt = $s; }
$self->{' mstable'} = $alt if ($alt && !$found);
$self->{' mstable'};
=head2 $t->ms_enc
Returns the encoding of the microsoft table (0 => symbol, etc.). Returns undef if there is
no Microsoft cmap.
sub ms_enc
my ($self) = @_;
my ($s);
return $self->{' mstable'}{'Encoding'}
if (defined $self->{' mstable'} && $self->{' mstable'}{'Platform'} == 3);
foreach $s (@{$self->{'Tables'}})
return $s->{'Encoding'} if ($s->{'Platform'} == 3);
return undef;
=head2 $t->out($fh)
Writes out a cmap table to a filehandle. If it has not been read, then
just copies from input file to output
sub out
my ($self, $fh) = @_;
my ($loc, $s, $i, $base_loc, $j, @keys);
return $self->SUPER::out($fh) unless $self->{' read'};
$self->{'Tables'} = [sort {$a->{'Platform'} <=> $b->{'Platform'}
|| $a->{'Encoding'} <=> $b->{'Encoding'}
|| $a->{'Ver'} <=> $b->{'Ver'}} @{$self->{'Tables'}}];
$self->{'Num'} = scalar @{$self->{'Tables'}};
$base_loc = $fh->tell();
$fh->print(pack("n2", 0, $self->{'Num'}));
for ($i = 0; $i < $self->{'Num'}; $i++)
{ $fh->print(pack("nnN", $self->{'Tables'}[$i]{'Platform'}, $self->{'Tables'}[$i]{'Encoding'}, 0)); }
for ($i = 0; $i < $self->{'Num'}; $i++)
$s = $self->{'Tables'}[$i];
if ($s->{'Format'} < 8)
{ @keys = sort {$a <=> $b} grep { $_ <= 0xFFFF} keys %{$s->{'val'}}; }
{ @keys = sort {$a <=> $b} keys %{$s->{'val'}}; }
$s->{' outloc'} = $fh->tell();
if ($s->{'Format'} < 8)
{ $fh->print(pack("n3", $s->{'Format'}, 0, $s->{'Ver'})); } # come back for length
{ $fh->print(pack("n2N2", $s->{'Format'}, 0, 0, $s->{'Ver'})); }
if ($s->{'Format'} == 0)
$fh->print(pack("C256", map {defined $_ ? $_ : 0} @{$s->{'val'}}{0 .. 255}));
} elsif ($s->{'Format'} == 6)
$fh->print(pack("n2", $keys[0], $keys[-1] - $keys[0] + 1));
$fh->print(pack("n*", map {defined $_ ? $_ : 0} @{$s->{'val'}}{$keys[0] .. $keys[-1]}));
} elsif ($s->{'Format'} == 2) # Contributed by Huw Rogers
my ($g, $k, $h, $l, $m, $n);
my (@subHeaderKeys, @subHeaders, $subHeader, @glyphIndexArray);
$n = 0;
@subHeaderKeys = (-1) x 256;
for $j (@keys) {
next unless defined($g = $s->{'val'}{$j});
$h = int($j>>8);
$l = ($j & 0xff);
if (($k = $subHeaderKeys[$h]) < 0) {
$subHeader = [ $l, 1, 0, 0, [ $g ] ];
$subHeaders[$k = $n++] = $subHeader;
$subHeaderKeys[$h] = $k;
} else {
$subHeader = $subHeaders[$k];
$m = ($l - $subHeader->[0] + 1) - $subHeader->[1];
$subHeader->[1] += $m;
push @{$subHeader->[4]}, (0) x ($m - 1), $g - $subHeader->[2];
@subHeaderKeys = map { $_ < 0 ? 0 : $_ } @subHeaderKeys;
$subHeader = $subHeaders[0];
$subHeader->[3] = 0;
push @glyphIndexArray, @{$subHeader->[4]};
splice(@$subHeader, 4);
my @subHeaders_ = sort {@{$a->[4]} <=> @{$b->[4]}} @subHeaders[1..$#subHeaders];
my ($f, $d, $r, $subHeader_);
for ($k = 0; $k < @subHeaders_; $k++) {
$subHeader = $subHeaders_[$k];
$f = $r = shift @{$subHeader->[4]};
$subHeader->[5] = join(':',
map {
$d = $_ - $r;
$r = $_;
$d < 0 ?
sprintf('-%04x', -$d) :
sprintf('+%04x', $d)
} @{$subHeader->[4]});
unshift @{$subHeader->[4]}, $f;
for ($k = 0; $k < @subHeaders_; $k++) {
$subHeader = $subHeaders_[$k];
next unless $subHeader->[4];
$subHeader->[3] = @glyphIndexArray;
push @glyphIndexArray, @{$subHeader->[4]};
for ($l = $k + 1; $l < @subHeaders_; $l++) {
$subHeader_ = $subHeaders_[$l];
next unless $subHeader_->[4];
$d = $subHeader_->[5];
if ($subHeader->[5] =~ /\Q$d\E/) {
my $o = length($`)/6; #`
$subHeader_->[2] +=
$subHeader_->[4]->[$o] - $subHeader->[4]->[0];
$subHeader_->[3] = $subHeader->[3] + $o;
splice(@$subHeader_, 4);
splice(@$subHeader, 4);
$fh->print(pack('n*', map { $_<<3 } @subHeaderKeys));
for ($j = 0; $j < 256; $j++) {
$k = $subHeaderKeys[$j];
$subHeader = $subHeaders[$k];
for ($k = 0; $k < $n; $k++) {
$subHeader = $subHeaders[$k];
$subHeader->[2] < 0 ?
unpack('S', pack('s', $subHeader->[2])) :
($subHeader->[3]<<1) + (($n - $k)<<3) - 6
$fh->print(pack('n*', @glyphIndexArray));
} elsif ($s->{'Format'} == 4)
my (@starts, @ends, @deltas, @range);
# There appears to be a bug in Windows that requires the final 0xFFFF (sentry)
# to be in a segment by itself -- otherwise Windows 7 and 8 (at least) won't install
# or preview the font, complaining that it doesn't appear to be a valid font.
# Therefore we can't just add 0XFFFF to the USV list as we used to do:
# push(@keys, 0xFFFF) unless ($keys[-1] == 0xFFFF);
# Instead, for now *remove* 0xFFFF from the USV list, and add a segement
# for it after all the other segments are computed.
pop @keys if $keys[-1] == 0xFFFF;
# Step 1: divide into maximal length idDelta runs
my ($prevUSV, $prevgid);
for ($j = 0; $j <= $#keys; $j++)
my $u = $keys[$j];
my $g = $s->{'val'}{$u};
if ($j == 0 || $u != $prevUSV+1 || $g != $prevgid+1)
push @ends, $prevUSV unless $j == 0;
push @starts, $u;
push @range, 0;
$prevUSV = $u;
$prevgid = $g;
push @ends, $prevUSV;
# Step 2: find each macro-range
my ($start, $end); # Start and end of macro-range
for ($start = 0; $start < $#starts; $start++)
next if $ends[$start] - $starts[$start] > 7; # if count > 8, we always treat this as a run unto itself
for ($end = $start+1; $end <= $#starts; $end++)
last if $starts[$end] - $ends[$end-1] > ($self->{' allowholes'} ? 5 : 1)
|| $ends[$end] - $starts[$end] > 7; # gap > 4 or count > 8 so $end is beyond end of macro-range
$end--; #Ending index of this macro-range
# Step 3: optimize this macro-range (from $start through $end)
L1: for ($j = $start; $j < $end; )
my $size1 = ($range[$j] ? 8 + 2 * ($ends[$j] - $starts[$j] + 1) : 8); # size of first range (which may now be idRange type)
for (my $k = $j+1; $k <= $end; $k++)
if (8 + 2 * ($ends[$k] - $starts[$j] + 1) <= $size1 + 8 * ($k - $j))
# Need to coalesce $j..$k into $j:
$ends[$j] = $ends[$k];
$range[$j] = 1; # for now use boolean to indicate this is an idRange segment
splice @starts, $j+1, $k-$j;
splice @ends, $j+1, $k-$j;
splice @range, $j+1, $k-$j;
$end -= ($k-$j);
next L1; # Note that $j isn't incremented so this is a redo
# Nothing coalesced
# Finished with this macro-range
$start = $end;
# Ok, add the final segment containing the sentry value
push(@keys, 0xFFFF);
push @starts, 0xFFFF;
push @ends, 0xFFFF;
push @range, 0;
# What is left is a collection of segments that will represent the cmap in mimimum-sized format 4 subtable
my ($num, $count, $sRange, $eSel, $eShift);
$num = scalar(@starts);
$count = 0;
for ($j = 0; $j < $num; $j++)
if ($range[$j])
$range[$j] = ($count + $num - $j) << 1;
$count += $ends[$j] - $starts[$j] + 1;
push @deltas, 0;
push @deltas, ($s->{'val'}{$starts[$j]} || 0) - $starts[$j];
($num, $sRange, $eSel, $eShift) = Font::TTF::Utils::TTF_bininfo($num, 2);
$fh->print(pack("n4", $num * 2, $sRange, $eSel, $eShift));
$fh->print(pack("n*", @ends));
$fh->print(pack("n", 0));
$fh->print(pack("n*", @starts));
$fh->print(pack("n*", @deltas));
$fh->print(pack("n*", @range));
for ($j = 0; $j < $num; $j++)
next if ($range[$j] == 0);
$fh->print(pack("n*", map {$_ || 0} @{$s->{'val'}}{$starts[$j] .. $ends[$j]}));
} elsif ($s->{'Format'} == 8 || $s->{'Format'} == 12 || $s->{'Format'} == 13)
my (@jobs, $start, $current, $curr_glyf, $map);
$current = 0; $curr_glyf = 0;
$map = "\000" x 8192;
foreach $j (@keys)
if ($j > 0xFFFF && $s->{'Format'} == 8)
if (defined $s->{'val'}{$j >> 16})
{ $s->{'Format'} = 12; }
vec($map, $j >> 16, 1) = 1;
if ($j != $current + 1 || $s->{'val'}{$j} != ($s->{'Format'} == 13 ? $curr_glyf : $curr_glyf + 1))
push (@jobs, [$start, $current, $s->{'Format'} == 13 ? $curr_glyf : $curr_glyf - ($current - $start)]) if (defined $start);
$start = $j; $current = $j; $curr_glyf = $s->{'val'}{$j};
$current = $j;
$curr_glyf = $s->{'val'}{$j};
push (@jobs, [$start, $current, $s->{'Format'} == 13 ? $curr_glyf : $curr_glyf - ($current - $start)]) if (defined $start);
$fh->print($map) if ($s->{'Format'} == 8);
$fh->print(pack('N', $#jobs + 1));
foreach $j (@jobs)
{ $fh->print(pack('N3', @{$j})); }
} elsif ($s->{'Format'} == 10)
$fh->print(pack('N2', $keys[0], $keys[-1] - $keys[0] + 1));
$fh->print(pack('n*', $s->{'val'}{$keys[0] .. $keys[-1]}));
$loc = $fh->tell();
if ($s->{'Format'} < 8)
$fh->seek($s->{' outloc'} + 2, 0);
$fh->print(pack("n", $loc - $s->{' outloc'}));
} else
$fh->seek($s->{' outloc'} + 4, 0);
$fh->print(pack("N", $loc - $s->{' outloc'}));
$fh->seek($base_loc + 8 + ($i << 3), 0);
$fh->print(pack("N", $s->{' outloc'} - $base_loc));
$fh->seek($loc, 0);
=head2 $t->XML_element($context, $depth, $name, $val)
Outputs the elements of the cmap in XML. We only need to process val here
sub XML_element
my ($self, $context, $depth, $k, $val) = @_;
my ($fh) = $context->{'fh'};
my ($i);
return $self if ($k eq 'LOC');
return $self->SUPER::XML_element($context, $depth, $k, $val) unless ($k eq 'val');
foreach $i (sort {$a <=> $b} keys %{$val})
{ $fh->printf("%s<map code='%04X' glyph='%s'/>\n", $depth . $context->{'indent'}, $i, $val->{$i}); }
=head2 $t->minsize()
Returns the minimum size this table can be in bytes. If it is smaller than this, then the table
must be bad and should be deleted or whatever.
sub minsize
return 4;
=head2 $t->update
Tidies the cmap table.
Removes MS Fmt12 cmap if it is no longer needed.
Removes from all cmaps any codepoint that map to GID=0. Note that such entries will
be re-introduced as necessary depending on the cmap format.
sub update
my ($self) = @_;
my ($max, $code, $gid, @keep);
return undef unless ($self->SUPER::update);
foreach my $s (@{$self->{'Tables'}})
$max = 0;
while (($code, $gid) = each %{$s->{'val'}})
if ($gid)
# remember max USV
$max = $code if $max < $code;
# Remove unneeded key
delete $s->{'val'}{$code}; # nb: this is a safe delete according to perldoc perlfunc.
push @keep, $s unless $s->{'Platform'} == 3 && $s->{'Encoding'} == 10 && $s->{'Format'} == 12 && $max <= 0xFFFF;
$self->{'Tables'} = [ @keep ];
delete $self->{' mstable'}; # Force rediscovery of this.
=head2 @map = $t->reverse(%opt)
Returns a reverse map of the Unicode cmap. I.e. given a glyph gives the Unicode value for it. Options are:
=over 4
=item tnum
Table number to use rather than the default Unicode table
=item array
Returns each element of reverse as an array since a glyph may be mapped by more
than one Unicode value. The arrays are unsorted. Otherwise store any one unicode value for a glyph.
sub reverse
my ($self, %opt) = @_;
my ($table) = defined $opt{'tnum'} ? $self->{'Tables'}[$opt{'tnum'}] : $self->find_ms;
my (@res, $code, $gid);
while (($code, $gid) = each(%{$table->{'val'}}))
if ($opt{'array'})
{ push (@{$res[$gid]}, $code); }
{ $res[$gid] = $code unless (defined $res[$gid] && $res[$gid] > 0 && $res[$gid] < $code); }
=head2 is_unicode($index)
Returns whether the table of a given index is known to be a unicode table
(as specified in the specifications)
sub is_unicode
my ($self, $index) = @_;
my ($pid, $eid) = ($self->{'Tables'}[$index]{'Platform'}, $self->{'Tables'}[$index]{'Encoding'});
return ($pid == 3 || $pid == 0 || ($pid == 2 && $eid == 1));
=head1 BUGS
=over 4
=item *
Format 14 (Unicode Variation Sequences) cmaps are not supported.
=head1 AUTHOR
Martin Hosken L<>.
Copyright (c) 1998-2016, SIL International (
This module is released under the terms of the Artistic License 2.0.
For details, see the full text of the license in the file LICENSE.