ILIAS  Release_4_2_x_branch Revision 61807
 All Data Structures Namespaces Files Functions Variables Groups Pages
FontFamily.php
Go to the documentation of this file.
1 <?php
2 
7 {
8 
9  protected $mask = null;
10 
11  public function __construct() {
12  $this->mask = '- ';
13  for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c;
14  for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c;
15  for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine
16  // special bytes used by UTF-8
17  for ($i = 0x80; $i <= 0xFF; $i++) {
18  // We don't bother excluding invalid bytes in this range,
19  // because the our restriction of well-formed UTF-8 will
20  // prevent these from ever occurring.
21  $this->mask .= chr($i);
22  }
23 
24  /*
25  PHP's internal strcspn implementation is
26  O(length of string * length of mask), making it inefficient
27  for large masks. However, it's still faster than
28  preg_match 8)
29  for (p = s1;;) {
30  spanp = s2;
31  do {
32  if (*spanp == c || p == s1_end) {
33  return p - s1;
34  }
35  } while (spanp++ < (s2_end - 1));
36  c = *++p;
37  }
38  */
39  // possible optimization: invert the mask.
40  }
41 
42  public function validate($string, $config, $context) {
43  static $generic_names = array(
44  'serif' => true,
45  'sans-serif' => true,
46  'monospace' => true,
47  'fantasy' => true,
48  'cursive' => true
49  );
50  $allowed_fonts = $config->get('CSS.AllowedFonts');
51 
52  // assume that no font names contain commas in them
53  $fonts = explode(',', $string);
54  $final = '';
55  foreach($fonts as $font) {
56  $font = trim($font);
57  if ($font === '') continue;
58  // match a generic name
59  if (isset($generic_names[$font])) {
60  if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
61  $final .= $font . ', ';
62  }
63  continue;
64  }
65  // match a quoted name
66  if ($font[0] === '"' || $font[0] === "'") {
67  $length = strlen($font);
68  if ($length <= 2) continue;
69  $quote = $font[0];
70  if ($font[$length - 1] !== $quote) continue;
71  $font = substr($font, 1, $length - 2);
72  }
73 
74  $font = $this->expandCSSEscape($font);
75 
76  // $font is a pure representation of the font name
77 
78  if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
79  continue;
80  }
81 
82  if (ctype_alnum($font) && $font !== '') {
83  // very simple font, allow it in unharmed
84  $final .= $font . ', ';
85  continue;
86  }
87 
88  // bugger out on whitespace. form feed (0C) really
89  // shouldn't show up regardless
90  $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
91 
92  // Here, there are various classes of characters which need
93  // to be treated differently:
94  // - Alphanumeric characters are essentially safe. We
95  // handled these above.
96  // - Spaces require quoting, though most parsers will do
97  // the right thing if there aren't any characters that
98  // can be misinterpreted
99  // - Dashes rarely occur, but they fairly unproblematic
100  // for parsing/rendering purposes.
101  // The above characters cover the majority of Western font
102  // names.
103  // - Arbitrary Unicode characters not in ASCII. Because
104  // most parsers give little thought to Unicode, treatment
105  // of these codepoints is basically uniform, even for
106  // punctuation-like codepoints. These characters can
107  // show up in non-Western pages and are supported by most
108  // major browsers, for example: "MS 明朝" is a
109  // legitimate font-name
110  // <http://ja.wikipedia.org/wiki/MS_明朝>. See
111  // the CSS3 spec for more examples:
112  // <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
113  // You can see live samples of these on the Internet:
114  // <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
115  // However, most of these fonts have ASCII equivalents:
116  // for example, 'MS Mincho', and it's considered
117  // professional to use ASCII font names instead of
118  // Unicode font names. Thanks Takeshi Terada for
119  // providing this information.
120  // The following characters, to my knowledge, have not been
121  // used to name font names.
122  // - Single quote. While theoretically you might find a
123  // font name that has a single quote in its name (serving
124  // as an apostrophe, e.g. Dave's Scribble), I haven't
125  // been able to find any actual examples of this.
126  // Internet Explorer's cssText translation (which I
127  // believe is invoked by innerHTML) normalizes any
128  // quoting to single quotes, and fails to escape single
129  // quotes. (Note that this is not IE's behavior for all
130  // CSS properties, just some sort of special casing for
131  // font-family). So a single quote *cannot* be used
132  // safely in the font-family context if there will be an
133  // innerHTML/cssText translation. Note that Firefox 3.x
134  // does this too.
135  // - Double quote. In IE, these get normalized to
136  // single-quotes, no matter what the encoding. (Fun
137  // fact, in IE8, the 'content' CSS property gained
138  // support, where they special cased to preserve encoded
139  // double quotes, but still translate unadorned double
140  // quotes into single quotes.) So, because their
141  // fixpoint behavior is identical to single quotes, they
142  // cannot be allowed either. Firefox 3.x displays
143  // single-quote style behavior.
144  // - Backslashes are reduced by one (so \\ -> \) every
145  // iteration, so they cannot be used safely. This shows
146  // up in IE7, IE8 and FF3
147  // - Semicolons, commas and backticks are handled properly.
148  // - The rest of the ASCII punctuation is handled properly.
149  // We haven't checked what browsers do to unadorned
150  // versions, but this is not important as long as the
151  // browser doesn't /remove/ surrounding quotes (as IE does
152  // for HTML).
153  //
154  // With these results in hand, we conclude that there are
155  // various levels of safety:
156  // - Paranoid: alphanumeric, spaces and dashes(?)
157  // - International: Paranoid + non-ASCII Unicode
158  // - Edgy: Everything except quotes, backslashes
159  // - NoJS: Standards compliance, e.g. sod IE. Note that
160  // with some judicious character escaping (since certain
161  // types of escaping doesn't work) this is theoretically
162  // OK as long as innerHTML/cssText is not called.
163  // We believe that international is a reasonable default
164  // (that we will implement now), and once we do more
165  // extensive research, we may feel comfortable with dropping
166  // it down to edgy.
167 
168  // Edgy: alphanumeric, spaces, dashes and Unicode. Use of
169  // str(c)spn assumes that the string was already well formed
170  // Unicode (which of course it is).
171  if (strspn($font, $this->mask) !== strlen($font)) {
172  continue;
173  }
174 
175  // Historical:
176  // In the absence of innerHTML/cssText, these ugly
177  // transforms don't pose a security risk (as \\ and \"
178  // might--these escapes are not supported by most browsers).
179  // We could try to be clever and use single-quote wrapping
180  // when there is a double quote present, but I have choosen
181  // not to implement that. (NOTE: you can reduce the amount
182  // of escapes by one depending on what quoting style you use)
183  // $font = str_replace('\\', '\\5C ', $font);
184  // $font = str_replace('"', '\\22 ', $font);
185  // $font = str_replace("'", '\\27 ', $font);
186 
187  // font possibly with spaces, requires quoting
188  $final .= "'$font', ";
189  }
190  $final = rtrim($final, ', ');
191  if ($final === '') return false;
192  return $final;
193  }
194 
195 }
196 
197 // vim: et sw=4 sts=4