11 # For the full license information, view the LICENSE file that was distributed
12 # with this source code.
20 const version = '1.8.0';
26 $Elements = $this->textElements($text);
29 $markup = $this->elements($Elements);
32 $markup = trim($markup, "\n");
37 protected function textElements($text)
39 # make sure no definitions are set
40 $this->DefinitionData = array();
42 # standardize line breaks
43 $text = str_replace(array("\r\n", "\r"), "\n", $text);
45 # remove surrounding line breaks
46 $text = trim($text, "\n");
48 # split text into lines
49 $lines = explode("\n", $text);
51 # iterate through lines to identify blocks
52 return $this->linesElements($lines);
59 function setBreaksEnabled($breaksEnabled)
61 $this->breaksEnabled = $breaksEnabled;
66 protected $breaksEnabled;
68 function setMarkupEscaped($markupEscaped)
70 $this->markupEscaped = $markupEscaped;
75 protected $markupEscaped;
77 function setUrlsLinked($urlsLinked)
79 $this->urlsLinked = $urlsLinked;
84 protected $urlsLinked = true;
86 function setSafeMode($safeMode)
88 $this->safeMode = (bool) $safeMode;
95 function setStrictMode($strictMode)
97 $this->strictMode = (bool) $strictMode;
102 protected $strictMode;
104 protected $safeLinksWhitelist = array(
111 'data:image/png;base64,',
112 'data:image/gif;base64,',
113 'data:image/jpeg;base64,',
126 protected $BlockTypes = array(
127 '#' => array('Header'),
128 '*' => array('Rule', 'List'),
129 '+' => array('List'),
130 '-' => array('SetextHeader', 'Table', 'Rule', 'List'),
131 '0' => array('List'),
132 '1' => array('List'),
133 '2' => array('List'),
134 '3' => array('List'),
135 '4' => array('List'),
136 '5' => array('List'),
137 '6' => array('List'),
138 '7' => array('List'),
139 '8' => array('List'),
140 '9' => array('List'),
141 ':' => array('Table'),
142 '<' => array('Comment', 'Markup'),
143 '=' => array('SetextHeader'),
144 '>' => array('Quote'),
145 '[' => array('Reference'),
146 '_' => array('Rule'),
147 '`' => array('FencedCode'),
148 '|' => array('Table'),
149 '~' => array('FencedCode'),
154 protected $unmarkedBlockTypes = array(
162 protected function lines(array $lines)
164 return $this->elements($this->linesElements($lines));
167 protected function linesElements(array $lines)
170 $CurrentBlock = null;
172 foreach ($lines as $line)
174 if (chop($line) === '')
176 if (isset($CurrentBlock))
178 $CurrentBlock['interrupted'] = (isset($CurrentBlock['interrupted'])
179 ? $CurrentBlock['interrupted'] + 1 : 1
186 while (($beforeTab = strstr($line, "\t", true)) !== false)
188 $shortage = 4 - mb_strlen($beforeTab, 'utf-8') % 4;
191 . str_repeat(' ', $shortage)
192 . substr($line, strlen($beforeTab) + 1)
196 $indent = strspn($line, ' ');
198 $text = $indent > 0 ? substr($line, $indent) : $line;
202 $Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
206 if (isset($CurrentBlock['continuable']))
208 $methodName = 'block' . $CurrentBlock['type'] . 'Continue';
209 $Block = $this->$methodName($Line, $CurrentBlock);
213 $CurrentBlock = $Block;
219 if ($this->isBlockCompletable($CurrentBlock['type']))
221 $methodName = 'block' . $CurrentBlock['type'] . 'Complete';
222 $CurrentBlock = $this->$methodName($CurrentBlock);
233 $blockTypes = $this->unmarkedBlockTypes;
235 if (isset($this->BlockTypes[$marker]))
237 foreach ($this->BlockTypes[$marker] as $blockType)
239 $blockTypes []= $blockType;
246 foreach ($blockTypes as $blockType)
248 $Block = $this->{"block$blockType"}($Line, $CurrentBlock);
252 $Block['type'] = $blockType;
254 if ( ! isset($Block['identified']))
256 if (isset($CurrentBlock))
258 $Elements[] = $this->extractElement($CurrentBlock);
261 $Block['identified'] = true;
264 if ($this->isBlockContinuable($blockType))
266 $Block['continuable'] = true;
269 $CurrentBlock = $Block;
277 if (isset($CurrentBlock) and $CurrentBlock['type'] === 'Paragraph')
279 $Block = $this->paragraphContinue($Line, $CurrentBlock);
284 $CurrentBlock = $Block;
288 if (isset($CurrentBlock))
290 $Elements[] = $this->extractElement($CurrentBlock);
293 $CurrentBlock = $this->paragraph($Line);
295 $CurrentBlock['identified'] = true;
301 if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type']))
303 $methodName = 'block' . $CurrentBlock['type'] . 'Complete';
304 $CurrentBlock = $this->$methodName($CurrentBlock);
309 if (isset($CurrentBlock))
311 $Elements[] = $this->extractElement($CurrentBlock);
319 protected function extractElement(array $Component)
321 if ( ! isset($Component['element']))
323 if (isset($Component['markup']))
325 $Component['element'] = array('rawHtml' => $Component['markup']);
327 elseif (isset($Component['hidden']))
329 $Component['element'] = array();
333 return $Component['element'];
336 protected function isBlockContinuable($Type)
338 return method_exists($this, 'block' . $Type . 'Continue');
341 protected function isBlockCompletable($Type)
343 return method_exists($this, 'block' . $Type . 'Complete');
349 protected function blockCode($Line, $Block = null)
351 if (isset($Block) and $Block['type'] === 'Paragraph' and ! isset($Block['interrupted']))
356 if ($Line['indent'] >= 4)
358 $text = substr($Line['body'], 4);
374 protected function blockCodeContinue($Line, $Block)
376 if ($Line['indent'] >= 4)
378 if (isset($Block['interrupted']))
380 $Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']);
382 unset($Block['interrupted']);
385 $Block['element']['element']['text'] .= "\n";
387 $text = substr($Line['body'], 4);
389 $Block['element']['element']['text'] .= $text;
395 protected function blockCodeComplete($Block)
403 protected function blockComment($Line)
405 if ($this->markupEscaped or $this->safeMode)
410 if (strpos($Line['text'], '<!--') === 0)
414 'rawHtml' => $Line['body'],
419 if (strpos($Line['text'], '-->') !== false)
421 $Block['closed'] = true;
428 protected function blockCommentContinue($Line, array $Block)
430 if (isset($Block['closed']))
435 $Block['element']['rawHtml'] .= "\n" . $Line['body'];
437 if (strpos($Line['text'], '-->') !== false)
439 $Block['closed'] = true;
448 protected function blockFencedCode($Line)
450 $marker = $Line['text'][0];
452 $openerLength = strspn($Line['text'], $marker);
454 if ($openerLength < 3)
459 $infostring = trim(substr($Line['text'], $openerLength), "\t ");
461 if (strpos($infostring, '`') !== false)
471 if ($infostring !== '')
474 * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes
475 * Every HTML element may have a class attribute specified.
476 * The attribute, if specified, must have a value that is a set
477 * of space-separated tokens representing the various classes
478 * that the element belongs to.
480 * The space characters, for the purposes of this specification,
481 * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab),
482 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and
483 * U+000D CARRIAGE RETURN (CR).
485 $language = substr($infostring, 0, strcspn($infostring, " \t\n\f\r"));
487 $Element['attributes'] = array('class' => "language-$language");
492 'openerLength' => $openerLength,
495 'element' => $Element,
502 protected function blockFencedCodeContinue($Line, $Block)
504 if (isset($Block['complete']))
509 if (isset($Block['interrupted']))
511 $Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']);
513 unset($Block['interrupted']);
516 if (($len = strspn($Line['text'], $Block['char'])) >= $Block['openerLength']
517 and chop(substr($Line['text'], $len), ' ') === ''
519 $Block['element']['element']['text'] = substr($Block['element']['element']['text'], 1);
521 $Block['complete'] = true;
526 $Block['element']['element']['text'] .= "\n" . $Line['body'];
531 protected function blockFencedCodeComplete($Block)
539 protected function blockHeader($Line)
541 $level = strspn($Line['text'], '#');
548 $text = trim($Line['text'], '#');
550 if ($this->strictMode and isset($text[0]) and $text[0] !== ' ')
555 $text = trim($text, ' ');
559 'name' => 'h' . $level,
561 'function' => 'lineElements',
563 'destination' => 'elements',
574 protected function blockList($Line, ?array $CurrentBlock = null)
576 list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]{1,9}+[.\)]');
578 if (preg_match('/^('.$pattern.'([ ]++|$))(.*+)/', $Line['text'], $matches))
580 $contentIndent = strlen($matches[2]);
582 if ($contentIndent >= 5)
585 $matches[1] = substr($matches[1], 0, -$contentIndent);
586 $matches[3] = str_repeat(' ', $contentIndent) . $matches[3];
588 elseif ($contentIndent === 0)
593 $markerWithoutWhitespace = strstr($matches[1], ' ', true);
596 'indent' => $Line['indent'],
597 'pattern' => $pattern,
600 'marker' => $matches[1],
601 'markerType' => ($name === 'ul' ? $markerWithoutWhitespace : substr($markerWithoutWhitespace, -1)),
605 'elements' => array(),
608 $Block['data']['markerTypeRegex'] = preg_quote($Block['data']['markerType'], '/');
612 $listStart = ltrim(strstr($matches[1], $Block['data']['markerType'], true), '0') ?: '0';
614 if ($listStart !== '1')
618 and $CurrentBlock['type'] === 'Paragraph'
619 and ! isset($CurrentBlock['interrupted'])
624 $Block['element']['attributes'] = array('start' => $listStart);
628 $Block['li'] = array(
632 'argument' => !empty($matches[3]) ? array($matches[3]) : array(),
633 'destination' => 'elements'
637 $Block['element']['elements'] []= & $Block['li'];
643 protected function blockListContinue($Line, array $Block)
645 if (isset($Block['interrupted']) and empty($Block['li']['handler']['argument']))
650 $requiredIndent = ($Block['indent'] + strlen($Block['data']['marker']));
652 if ($Line['indent'] < $requiredIndent
655 $Block['data']['type'] === 'ol'
656 and preg_match('/^[0-9]++'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches)
658 $Block['data']['type'] === 'ul'
659 and preg_match('/^'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches)
663 if (isset($Block['interrupted']))
665 $Block['li']['handler']['argument'] []= '';
667 $Block['loose'] = true;
669 unset($Block['interrupted']);
674 $text = isset($matches[1]) ? $matches[1] : '';
676 $Block['indent'] = $Line['indent'];
678 $Block['li'] = array(
682 'argument' => array($text),
683 'destination' => 'elements'
687 $Block['element']['elements'] []= & $Block['li'];
691 elseif ($Line['indent'] < $requiredIndent and $this->blockList($Line))
696 if ($Line['text'][0] === '[' and $this->blockReference($Line))
701 if ($Line['indent'] >= $requiredIndent)
703 if (isset($Block['interrupted']))
705 $Block['li']['handler']['argument'] []= '';
707 $Block['loose'] = true;
709 unset($Block['interrupted']);
712 $text = substr($Line['body'], $requiredIndent);
714 $Block['li']['handler']['argument'] []= $text;
719 if ( ! isset($Block['interrupted']))
721 $text = preg_replace('/^[ ]{0,'.$requiredIndent.'}+/', '', $Line['body']);
723 $Block['li']['handler']['argument'] []= $text;
729 protected function blockListComplete(array $Block)
731 if (isset($Block['loose']))
733 foreach ($Block['element']['elements'] as &$li)
735 if (end($li['handler']['argument']) !== '')
737 $li['handler']['argument'] []= '';
748 protected function blockQuote($Line)
750 if (preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches))
754 'name' => 'blockquote',
756 'function' => 'linesElements',
757 'argument' => (array) $matches[1],
758 'destination' => 'elements',
767 protected function blockQuoteContinue($Line, array $Block)
769 if (isset($Block['interrupted']))
774 if ($Line['text'][0] === '>' and preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches))
776 $Block['element']['handler']['argument'] []= $matches[1];
781 if ( ! isset($Block['interrupted']))
783 $Block['element']['handler']['argument'] []= $Line['text'];
792 protected function blockRule($Line)
794 $marker = $Line['text'][0];
796 if (substr_count($Line['text'], $marker) >= 3 and chop($Line['text'], " $marker") === '')
811 protected function blockSetextHeader($Line, ?array $Block = null)
813 if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted']))
818 if ($Line['indent'] < 4 and chop(chop($Line['text'], ' '), $Line['text'][0]) === '')
820 $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
829 protected function blockMarkup($Line)
831 if ($this->markupEscaped or $this->safeMode)
836 if (preg_match('/^<[\/]?+(\w*)(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+(\/)?>/', $Line['text'], $matches))
838 $element = strtolower($matches[1]);
840 if (in_array($element, $this->textLevelElements))
846 'name' => $matches[1],
848 'rawHtml' => $Line['text'],
857 protected function blockMarkupContinue($Line, array $Block)
859 if (isset($Block['closed']) or isset($Block['interrupted']))
864 $Block['element']['rawHtml'] .= "\n" . $Line['body'];
872 protected function blockReference($Line)
874 if (strpos($Line['text'], ']') !== false
875 and preg_match('/^\[(.+?)\]:[ ]*+<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*+$/', $Line['text'], $matches)
877 $id = strtolower($matches[1]);
880 'url' => $matches[2],
881 'title' => isset($matches[3]) ? $matches[3] : null,
884 $this->DefinitionData['Reference'][$id] = $Data;
887 'element' => array(),
897 protected function blockTable($Line, ?array $Block = null)
899 if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted']))
905 strpos($Block['element']['handler']['argument'], '|') === false
906 and strpos($Line['text'], '|') === false
907 and strpos($Line['text'], ':') === false
908 or strpos($Block['element']['handler']['argument'], "\n") !== false
913 if (chop($Line['text'], ' -:|') !== '')
918 $alignments = array();
920 $divider = $Line['text'];
922 $divider = trim($divider);
923 $divider = trim($divider, '|');
925 $dividerCells = explode('|', $divider);
927 foreach ($dividerCells as $dividerCell)
929 $dividerCell = trim($dividerCell);
931 if ($dividerCell === '')
938 if ($dividerCell[0] === ':')
943 if (substr($dividerCell, - 1) === ':')
945 $alignment = $alignment === 'left' ? 'center' : 'right';
948 $alignments []= $alignment;
953 $HeaderElements = array();
955 $header = $Block['element']['handler']['argument'];
957 $header = trim($header);
958 $header = trim($header, '|');
960 $headerCells = explode('|', $header);
962 if (count($headerCells) !== count($alignments))
967 foreach ($headerCells as $index => $headerCell)
969 $headerCell = trim($headerCell);
971 $HeaderElement = array(
974 'function' => 'lineElements',
975 'argument' => $headerCell,
976 'destination' => 'elements',
980 if (isset($alignments[$index]))
982 $alignment = $alignments[$index];
984 $HeaderElement['attributes'] = array(
985 'style' => "text-align: $alignment;",
989 $HeaderElements []= $HeaderElement;
995 'alignments' => $alignments,
996 'identified' => true,
999 'elements' => array(),
1003 $Block['element']['elements'] []= array(
1007 $Block['element']['elements'] []= array(
1009 'elements' => array(),
1012 $Block['element']['elements'][0]['elements'] []= array(
1014 'elements' => $HeaderElements,
1020 protected function blockTableContinue($Line, array $Block)
1022 if (isset($Block['interrupted']))
1027 if (count($Block['alignments']) === 1 or $Line['text'][0] === '|' or strpos($Line['text'], '|'))
1029 $Elements = array();
1031 $row = $Line['text'];
1034 $row = trim($row, '|');
1036 preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]++`|`)++/', $row, $matches);
1038 $cells = array_slice($matches[0], 0, count($Block['alignments']));
1040 foreach ($cells as $index => $cell)
1042 $cell = trim($cell);
1047 'function' => 'lineElements',
1048 'argument' => $cell,
1049 'destination' => 'elements',
1053 if (isset($Block['alignments'][$index]))
1055 $Element['attributes'] = array(
1056 'style' => 'text-align: ' . $Block['alignments'][$index] . ';',
1060 $Elements []= $Element;
1065 'elements' => $Elements,
1068 $Block['element']['elements'][1]['elements'] []= $Element;
1078 protected function paragraph($Line)
1081 'type' => 'Paragraph',
1085 'function' => 'lineElements',
1086 'argument' => $Line['text'],
1087 'destination' => 'elements',
1093 protected function paragraphContinue($Line, array $Block)
1095 if (isset($Block['interrupted']))
1100 $Block['element']['handler']['argument'] .= "\n".$Line['text'];
1109 protected $InlineTypes = array(
1110 '!' => array('Image'),
1111 '&' => array('SpecialCharacter'),
1112 '*' => array('Emphasis'),
1113 ':' => array('Url'),
1114 '<' => array('UrlTag', 'EmailTag', 'Markup'),
1115 '[' => array('Link'),
1116 '_' => array('Emphasis'),
1117 '`' => array('Code'),
1118 '~' => array('Strikethrough'),
1119 '\\' => array('EscapeSequence'),
1124 protected $inlineMarkerList = '!*_&[:<`~\\';
1130 public function line($text, $nonNestables = array())
1132 return $this->elements($this->lineElements($text, $nonNestables));
1135 protected function lineElements($text, $nonNestables = array())
1137 # standardize line breaks
1138 $text = str_replace(array("\r\n", "\r"), "\n", $text);
1140 $Elements = array();
1142 $nonNestables = (empty($nonNestables)
1144 : array_combine($nonNestables, $nonNestables)
1147 # $excerpt is based on the first occurrence of a marker
1149 while ($excerpt = strpbrk($text, $this->inlineMarkerList))
1151 $marker = $excerpt[0];
1153 $markerPosition = strlen($text) - strlen($excerpt);
1155 $Excerpt = array('text' => $excerpt, 'context' => $text);
1157 foreach ($this->InlineTypes[$marker] as $inlineType)
1159 # check to see if the current inline type is nestable in the current context
1161 if (isset($nonNestables[$inlineType]))
1166 $Inline = $this->{"inline$inlineType"}($Excerpt);
1168 if ( ! isset($Inline))
1173 # makes sure that the inline belongs to "our" marker
1175 if (isset($Inline['position']) and $Inline['position'] > $markerPosition)
1180 # sets a default inline position
1182 if ( ! isset($Inline['position']))
1184 $Inline['position'] = $markerPosition;
1187 # cause the new element to 'inherit' our non nestables
1190 $Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables'])
1191 ? array_merge($Inline['element']['nonNestables'], $nonNestables)
1195 # the text that comes before the inline
1196 $unmarkedText = substr($text, 0, $Inline['position']);
1198 # compile the unmarked text
1199 $InlineText = $this->inlineText($unmarkedText);
1200 $Elements[] = $InlineText['element'];
1202 # compile the inline
1203 $Elements[] = $this->extractElement($Inline);
1205 # remove the examined text
1206 $text = substr($text, $Inline['position'] + $Inline['extent']);
1211 # the marker does not belong to an inline
1213 $unmarkedText = substr($text, 0, $markerPosition + 1);
1215 $InlineText = $this->inlineText($unmarkedText);
1216 $Elements[] = $InlineText['element'];
1218 $text = substr($text, $markerPosition + 1);
1221 $InlineText = $this->inlineText($text);
1222 $Elements[] = $InlineText['element'];
1224 foreach ($Elements as &$Element)
1226 if ( ! isset($Element['autobreak']))
1228 $Element['autobreak'] = false;
1239 protected function inlineText($text)
1242 'extent' => strlen($text),
1243 'element' => array(),
1246 $Inline['element']['elements'] = self::pregReplaceElements(
1247 $this->breaksEnabled ? '/[ ]*+\n/' : '/(?:[ ]*+\\\\|[ ]{2,}+)\n/',
1249 array('name' => 'br'),
1250 array('text' => "\n"),
1258 protected function inlineCode($Excerpt)
1260 $marker = $Excerpt['text'][0];
1262 if (preg_match('/^(['.$marker.']++)[ ]*+(.+?)[ ]*+(?<!['.$marker.'])\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
1264 $text = $matches[2];
1265 $text = preg_replace('/[ ]*+\n/', ' ', $text);
1268 'extent' => strlen($matches[0]),
1277 protected function inlineEmailTag($Excerpt)
1279 $hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?';
1281 $commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@'
1282 . $hostnameLabel . '(?:\.' . $hostnameLabel . ')*';
1284 if (strpos($Excerpt['text'], '>') !== false
1285 and preg_match("/^<((mailto:)?$commonMarkEmail)>/i", $Excerpt['text'], $matches)
1289 if ( ! isset($matches[2]))
1291 $url = "mailto:$url";
1295 'extent' => strlen($matches[0]),
1298 'text' => $matches[1],
1299 'attributes' => array(
1307 protected function inlineEmphasis($Excerpt)
1309 if ( ! isset($Excerpt['text'][1]))
1314 $marker = $Excerpt['text'][0];
1316 if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
1318 $emphasis = 'strong';
1320 elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
1330 'extent' => strlen($matches[0]),
1332 'name' => $emphasis,
1334 'function' => 'lineElements',
1335 'argument' => $matches[1],
1336 'destination' => 'elements',
1342 protected function inlineEscapeSequence($Excerpt)
1344 if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
1347 'element' => array('rawHtml' => $Excerpt['text'][1]),
1353 protected function inlineImage($Excerpt)
1355 if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[')
1360 $Excerpt['text']= substr($Excerpt['text'], 1);
1362 $Link = $this->inlineLink($Excerpt);
1370 'extent' => $Link['extent'] + 1,
1373 'attributes' => array(
1374 'src' => $Link['element']['attributes']['href'],
1375 'alt' => $Link['element']['handler']['argument'],
1377 'autobreak' => true,
1381 $Inline['element']['attributes'] += $Link['element']['attributes'];
1383 unset($Inline['element']['attributes']['href']);
1388 protected function inlineLink($Excerpt)
1393 'function' => 'lineElements',
1395 'destination' => 'elements',
1397 'nonNestables' => array('Url', 'Link'),
1398 'attributes' => array(
1406 $remainder = $Excerpt['text'];
1408 if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches))
1410 $Element['handler']['argument'] = $matches[1];
1412 $extent += strlen($matches[0]);
1414 $remainder = substr($remainder, $extent);
1421 if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*+"|\'[^\']*+\'))?\s*+[)]/', $remainder, $matches))
1423 $Element['attributes']['href'] = $matches[1];
1425 if (isset($matches[2]))
1427 $Element['attributes']['title'] = substr($matches[2], 1, - 1);
1430 $extent += strlen($matches[0]);
1434 if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches))
1436 $definition = strlen($matches[1]) ? $matches[1] : $Element['handler']['argument'];
1437 $definition = strtolower($definition);
1439 $extent += strlen($matches[0]);
1443 $definition = strtolower($Element['handler']['argument']);
1446 if ( ! isset($this->DefinitionData['Reference'][$definition]))
1451 $Definition = $this->DefinitionData['Reference'][$definition];
1453 $Element['attributes']['href'] = $Definition['url'];
1454 $Element['attributes']['title'] = $Definition['title'];
1458 'extent' => $extent,
1459 'element' => $Element,
1463 protected function inlineMarkup($Excerpt)
1465 if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false)
1470 if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*+[ ]*+>/s', $Excerpt['text'], $matches))
1473 'element' => array('rawHtml' => $matches[0]),
1474 'extent' => strlen($matches[0]),
1478 if ($Excerpt['text'][1] === '!' and preg_match('/^<!---?[^>-](?:-?+[^-])*-->/s', $Excerpt['text'], $matches))
1481 'element' => array('rawHtml' => $matches[0]),
1482 'extent' => strlen($matches[0]),
1486 if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*+(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+\/?>/s', $Excerpt['text'], $matches))
1489 'element' => array('rawHtml' => $matches[0]),
1490 'extent' => strlen($matches[0]),
1495 protected function inlineSpecialCharacter($Excerpt)
1497 if (substr($Excerpt['text'], 1, 1) !== ' ' and strpos($Excerpt['text'], ';') !== false
1498 and preg_match('/^&(#?+[0-9a-zA-Z]++);/', $Excerpt['text'], $matches)
1501 'element' => array('rawHtml' => '&' . $matches[1] . ';'),
1502 'extent' => strlen($matches[0]),
1507 protected function inlineStrikethrough($Excerpt)
1509 if ( ! isset($Excerpt['text'][1]))
1514 if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
1517 'extent' => strlen($matches[0]),
1521 'function' => 'lineElements',
1522 'argument' => $matches[1],
1523 'destination' => 'elements',
1530 protected function inlineUrl($Excerpt)
1532 if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/')
1537 if (strpos($Excerpt['context'], 'http') !== false
1538 and preg_match('/\bhttps?+:[\/]{2}[^\s<]+\b\/*+/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)
1540 $url = $matches[0][0];
1543 'extent' => strlen($matches[0][0]),
1544 'position' => $matches[0][1],
1548 'attributes' => array(
1558 protected function inlineUrlTag($Excerpt)
1560 if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w++:\/{2}[^ >]++)>/i', $Excerpt['text'], $matches))
1565 'extent' => strlen($matches[0]),
1569 'attributes' => array(
1579 protected function unmarkedText($text)
1581 $Inline = $this->inlineText($text);
1582 return $this->element($Inline['element']);
1589 protected function handle(array $Element)
1591 if (isset($Element['handler']))
1593 if (!isset($Element['nonNestables']))
1595 $Element['nonNestables'] = array();
1598 if (is_string($Element['handler']))
1600 $function = $Element['handler'];
1601 $argument = $Element['text'];
1602 unset($Element['text']);
1603 $destination = 'rawHtml';
1607 $function = $Element['handler']['function'];
1608 $argument = $Element['handler']['argument'];
1609 $destination = $Element['handler']['destination'];
1612 $Element[$destination] = $this->{$function}($argument, $Element['nonNestables']);
1614 if ($destination === 'handler')
1616 $Element = $this->handle($Element);
1619 unset($Element['handler']);
1625 protected function handleElementRecursive(array $Element)
1627 return $this->elementApplyRecursive(array($this, 'handle'), $Element);
1630 protected function handleElementsRecursive(array $Elements)
1632 return $this->elementsApplyRecursive(array($this, 'handle'), $Elements);
1635 protected function elementApplyRecursive($closure, array $Element)
1637 $Element = call_user_func($closure, $Element);
1639 if (isset($Element['elements']))
1641 $Element['elements'] = $this->elementsApplyRecursive($closure, $Element['elements']);
1643 elseif (isset($Element['element']))
1645 $Element['element'] = $this->elementApplyRecursive($closure, $Element['element']);
1651 protected function elementApplyRecursiveDepthFirst($closure, array $Element)
1653 if (isset($Element['elements']))
1655 $Element['elements'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['elements']);
1657 elseif (isset($Element['element']))
1659 $Element['element'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['element']);
1662 $Element = call_user_func($closure, $Element);
1667 protected function elementsApplyRecursive($closure, array $Elements)
1669 foreach ($Elements as &$Element)
1671 $Element = $this->elementApplyRecursive($closure, $Element);
1677 protected function elementsApplyRecursiveDepthFirst($closure, array $Elements)
1679 foreach ($Elements as &$Element)
1681 $Element = $this->elementApplyRecursiveDepthFirst($closure, $Element);
1687 protected function element(array $Element)
1689 if ($this->safeMode)
1691 $Element = $this->sanitiseElement($Element);
1694 # identity map if element has no handler
1695 $Element = $this->handle($Element);
1697 $hasName = isset($Element['name']);
1703 $markup .= '<' . $Element['name'];
1705 if (isset($Element['attributes']))
1707 foreach ($Element['attributes'] as $name => $value)
1709 if ($value === null)
1714 $markup .= " $name=\"".self::escape($value).'"';
1719 $permitRawHtml = false;
1721 if (isset($Element['text']))
1723 $text = $Element['text'];
1725 // very strongly consider an alternative if you're writing an
1727 elseif (isset($Element['rawHtml']))
1729 $text = $Element['rawHtml'];
1731 $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode'];
1732 $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode;
1735 $hasContent = isset($text) || isset($Element['element']) || isset($Element['elements']);
1739 $markup .= $hasName ? '>' : '';
1741 if (isset($Element['elements']))
1743 $markup .= $this->elements($Element['elements']);
1745 elseif (isset($Element['element']))
1747 $markup .= $this->element($Element['element']);
1751 if (!$permitRawHtml)
1753 $markup .= self::escape($text, true);
1761 $markup .= $hasName ? '</' . $Element['name'] . '>' : '';
1771 protected function elements(array $Elements)
1777 foreach ($Elements as $Element)
1779 if (empty($Element))
1784 $autoBreakNext = (isset($Element['autobreak'])
1785 ? $Element['autobreak'] : isset($Element['name'])
1787 // (autobreak === false) covers both sides of an element
1788 $autoBreak = !$autoBreak ? $autoBreak : $autoBreakNext;
1790 $markup .= ($autoBreak ? "\n" : '') . $this->element($Element);
1791 $autoBreak = $autoBreakNext;
1794 $markup .= $autoBreak ? "\n" : '';
1801 protected function li($lines)
1803 $Elements = $this->linesElements($lines);
1805 if ( ! in_array('', $lines)
1806 and isset($Elements[0]) and isset($Elements[0]['name'])
1807 and $Elements[0]['name'] === 'p'
1809 unset($Elements[0]['name']);
1820 * Replace occurrences $regexp with $Elements in $text. Return an array of
1821 * elements representing the replacement.
1823 protected static function pregReplaceElements($regexp, $Elements, $text)
1825 $newElements = array();
1827 while (preg_match($regexp, $text, $matches, PREG_OFFSET_CAPTURE))
1829 $offset = $matches[0][1];
1830 $before = substr($text, 0, $offset);
1831 $after = substr($text, $offset + strlen($matches[0][0]));
1833 $newElements[] = array('text' => $before);
1835 foreach ($Elements as $Element)
1837 $newElements[] = $Element;
1843 $newElements[] = array('text' => $text);
1845 return $newElements;
1849 # Deprecated Methods
1853 * @deprecated use text() instead
1855 function parse($text)
1857 $markup = $this->text($text);
1862 protected function sanitiseElement(array $Element)
1864 static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
1865 static $safeUrlNameToAtt = array(
1870 if ( ! isset($Element['name']))
1872 unset($Element['attributes']);
1876 if (isset($safeUrlNameToAtt[$Element['name']]))
1878 $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
1881 if ( ! empty($Element['attributes']))
1883 foreach ($Element['attributes'] as $att => $val)
1885 # filter out badly parsed attribute
1886 if ( ! preg_match($goodAttribute, $att))
1888 unset($Element['attributes'][$att]);
1890 # dump onevent attribute
1891 elseif (self::striAtStart($att, 'on'))
1893 unset($Element['attributes'][$att]);
1901 protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
1903 foreach ($this->safeLinksWhitelist as $scheme)
1905 if (self::striAtStart($Element['attributes'][$attribute], $scheme))
1911 $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]);
1920 protected static function escape($text, $allowQuotes = false)
1922 return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
1925 protected static function striAtStart($string, $needle)
1927 $len = strlen($needle);
1929 if ($len > strlen($string))
1935 return strtolower(substr($string, 0, $len)) === strtolower($needle);
1939 static function instance($name = 'default')
1941 if (isset(self::$instances[$name]))
1943 return self::$instances[$name];
1946 $instance = new static();
1948 self::$instances[$name] = $instance;
1953 private static $instances = array();
1959 protected $DefinitionData;
1964 protected $specialCharacters = array(
1965 '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', '~'
1968 protected $StrongRegex = array(
1969 '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*+[*])+?)[*]{2}(?![*])/s',
1970 '_' => '/^__((?:\\\\_|[^_]|_[^_]*+_)+?)__(?!_)/us',
1973 protected $EmRegex = array(
1974 '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
1975 '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
1978 protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+';
1980 protected $voidElements = array(
1981 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
1984 protected $textLevelElements = array(
1985 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
1986 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
1987 'i', 'rp', 'del', 'code', 'strike', 'marquee',
1988 'q', 'rt', 'ins', 'font', 'strong',
1989 's', 'tt', 'kbd', 'mark',
1990 'u', 'xm', 'sub', 'nobr',