]> git.bts.cx Git - cx.git/blob - cx/third_party/parsedown/Parsedown.php
added youtube embeds
[cx.git] / cx / third_party / parsedown / Parsedown.php
1 <?php
2
3 #
4 #
5 # Parsedown
6 # http://parsedown.org
7 #
8 # (c) Emanuil Rusev
9 # http://erusev.com
10 #
11 # For the full license information, view the LICENSE file that was distributed
12 # with this source code.
13 #
14 #
15
16 class Parsedown
17 {
18 # ~
19
20 const version = '1.7.4';
21
22 # ~
23
24 function text($text)
25 {
26 # make sure no definitions are set
27 $this->DefinitionData = array();
28
29 # standardize line breaks
30 $text = str_replace(array("\r\n", "\r"), "\n", $text);
31
32 # remove surrounding line breaks
33 $text = trim($text, "\n");
34
35 # split text into lines
36 $lines = explode("\n", $text);
37
38 # iterate through lines to identify blocks
39 $markup = $this->lines($lines);
40
41 # trim line breaks
42 $markup = trim($markup, "\n");
43
44 return $markup;
45 }
46
47 #
48 # Setters
49 #
50
51 function setBreaksEnabled($breaksEnabled)
52 {
53 $this->breaksEnabled = $breaksEnabled;
54
55 return $this;
56 }
57
58 protected $breaksEnabled;
59
60 function setMarkupEscaped($markupEscaped)
61 {
62 $this->markupEscaped = $markupEscaped;
63
64 return $this;
65 }
66
67 protected $markupEscaped;
68
69 function setUrlsLinked($urlsLinked)
70 {
71 $this->urlsLinked = $urlsLinked;
72
73 return $this;
74 }
75
76 protected $urlsLinked = true;
77
78 function setSafeMode($safeMode)
79 {
80 $this->safeMode = (bool) $safeMode;
81
82 return $this;
83 }
84
85 protected $safeMode;
86
87 protected $safeLinksWhitelist = array(
88 'http://',
89 'https://',
90 'ftp://',
91 'ftps://',
92 'mailto:',
93 'data:image/png;base64,',
94 'data:image/gif;base64,',
95 'data:image/jpeg;base64,',
96 'irc:',
97 'ircs:',
98 'git:',
99 'ssh:',
100 'news:',
101 'steam:',
102 );
103
104 #
105 # Lines
106 #
107
108 protected $BlockTypes = array(
109 '#' => array('Header'),
110 '*' => array('Rule', 'List'),
111 '+' => array('List'),
112 '-' => array('SetextHeader', 'Table', 'Rule', 'List'),
113 '0' => array('List'),
114 '1' => array('List'),
115 '2' => array('List'),
116 '3' => array('List'),
117 '4' => array('List'),
118 '5' => array('List'),
119 '6' => array('List'),
120 '7' => array('List'),
121 '8' => array('List'),
122 '9' => array('List'),
123 ':' => array('Table'),
124 '<' => array('Comment', 'Markup'),
125 '=' => array('SetextHeader'),
126 '>' => array('Quote'),
127 '[' => array('Reference'),
128 '_' => array('Rule'),
129 '`' => array('FencedCode'),
130 '|' => array('Table'),
131 '~' => array('FencedCode'),
132 );
133
134 # ~
135
136 protected $unmarkedBlockTypes = array(
137 'Code',
138 );
139
140 #
141 # Blocks
142 #
143
144 protected function lines(array $lines)
145 {
146 $CurrentBlock = null;
147
148 foreach ($lines as $line)
149 {
150 if (chop($line) === '')
151 {
152 if (isset($CurrentBlock))
153 {
154 $CurrentBlock['interrupted'] = true;
155 }
156
157 continue;
158 }
159
160 if (strpos($line, "\t") !== false)
161 {
162 $parts = explode("\t", $line);
163
164 $line = $parts[0];
165
166 unset($parts[0]);
167
168 foreach ($parts as $part)
169 {
170 $shortage = 4 - mb_strlen($line, 'utf-8') % 4;
171
172 $line .= str_repeat(' ', $shortage);
173 $line .= $part;
174 }
175 }
176
177 $indent = 0;
178
179 while (isset($line[$indent]) and $line[$indent] === ' ')
180 {
181 $indent ++;
182 }
183
184 $text = $indent > 0 ? substr($line, $indent) : $line;
185
186 # ~
187
188 $Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
189
190 # ~
191
192 if (isset($CurrentBlock['continuable']))
193 {
194 $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock);
195
196 if (isset($Block))
197 {
198 $CurrentBlock = $Block;
199
200 continue;
201 }
202 else
203 {
204 if ($this->isBlockCompletable($CurrentBlock['type']))
205 {
206 $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
207 }
208 }
209 }
210
211 # ~
212
213 $marker = $text[0];
214
215 # ~
216
217 $blockTypes = $this->unmarkedBlockTypes;
218
219 if (isset($this->BlockTypes[$marker]))
220 {
221 foreach ($this->BlockTypes[$marker] as $blockType)
222 {
223 $blockTypes []= $blockType;
224 }
225 }
226
227 #
228 # ~
229
230 foreach ($blockTypes as $blockType)
231 {
232 $Block = $this->{'block'.$blockType}($Line, $CurrentBlock);
233
234 if (isset($Block))
235 {
236 $Block['type'] = $blockType;
237
238 if ( ! isset($Block['identified']))
239 {
240 $Blocks []= $CurrentBlock;
241
242 $Block['identified'] = true;
243 }
244
245 if ($this->isBlockContinuable($blockType))
246 {
247 $Block['continuable'] = true;
248 }
249
250 $CurrentBlock = $Block;
251
252 continue 2;
253 }
254 }
255
256 # ~
257
258 if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted']))
259 {
260 $CurrentBlock['element']['text'] .= "\n".$text;
261 }
262 else
263 {
264 $Blocks []= $CurrentBlock;
265
266 $CurrentBlock = $this->paragraph($Line);
267
268 $CurrentBlock['identified'] = true;
269 }
270 }
271
272 # ~
273
274 if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type']))
275 {
276 $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
277 }
278
279 # ~
280
281 $Blocks []= $CurrentBlock;
282
283 unset($Blocks[0]);
284
285 # ~
286
287 $markup = '';
288
289 foreach ($Blocks as $Block)
290 {
291 if (isset($Block['hidden']))
292 {
293 continue;
294 }
295
296 $markup .= "\n";
297 $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']);
298 }
299
300 $markup .= "\n";
301
302 # ~
303
304 return $markup;
305 }
306
307 protected function isBlockContinuable($Type)
308 {
309 return method_exists($this, 'block'.$Type.'Continue');
310 }
311
312 protected function isBlockCompletable($Type)
313 {
314 return method_exists($this, 'block'.$Type.'Complete');
315 }
316
317 #
318 # Code
319
320 protected function blockCode($Line, $Block = null)
321 {
322 if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted']))
323 {
324 return;
325 }
326
327 if ($Line['indent'] >= 4)
328 {
329 $text = substr($Line['body'], 4);
330
331 $Block = array(
332 'element' => array(
333 'name' => 'pre',
334 'handler' => 'element',
335 'text' => array(
336 'name' => 'code',
337 'text' => $text,
338 ),
339 ),
340 );
341
342 return $Block;
343 }
344 }
345
346 protected function blockCodeContinue($Line, $Block)
347 {
348 if ($Line['indent'] >= 4)
349 {
350 if (isset($Block['interrupted']))
351 {
352 $Block['element']['text']['text'] .= "\n";
353
354 unset($Block['interrupted']);
355 }
356
357 $Block['element']['text']['text'] .= "\n";
358
359 $text = substr($Line['body'], 4);
360
361 $Block['element']['text']['text'] .= $text;
362
363 return $Block;
364 }
365 }
366
367 protected function blockCodeComplete($Block)
368 {
369 $text = $Block['element']['text']['text'];
370
371 $Block['element']['text']['text'] = $text;
372
373 return $Block;
374 }
375
376 #
377 # Comment
378
379 protected function blockComment($Line)
380 {
381 if ($this->markupEscaped or $this->safeMode)
382 {
383 return;
384 }
385
386 if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!')
387 {
388 $Block = array(
389 'markup' => $Line['body'],
390 );
391
392 if (preg_match('/-->$/', $Line['text']))
393 {
394 $Block['closed'] = true;
395 }
396
397 return $Block;
398 }
399 }
400
401 protected function blockCommentContinue($Line, array $Block)
402 {
403 if (isset($Block['closed']))
404 {
405 return;
406 }
407
408 $Block['markup'] .= "\n" . $Line['body'];
409
410 if (preg_match('/-->$/', $Line['text']))
411 {
412 $Block['closed'] = true;
413 }
414
415 return $Block;
416 }
417
418 #
419 # Fenced Code
420
421 protected function blockFencedCode($Line)
422 {
423 if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches))
424 {
425 $Element = array(
426 'name' => 'code',
427 'text' => '',
428 );
429
430 if (isset($matches[1]))
431 {
432 /**
433 * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes
434 * Every HTML element may have a class attribute specified.
435 * The attribute, if specified, must have a value that is a set
436 * of space-separated tokens representing the various classes
437 * that the element belongs to.
438 * [...]
439 * The space characters, for the purposes of this specification,
440 * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab),
441 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and
442 * U+000D CARRIAGE RETURN (CR).
443 */
444 $language = substr($matches[1], 0, strcspn($matches[1], " \t\n\f\r"));
445
446 $class = 'language-'.$language;
447
448 $Element['attributes'] = array(
449 'class' => $class,
450 );
451 }
452
453 $Block = array(
454 'char' => $Line['text'][0],
455 'element' => array(
456 'name' => 'pre',
457 'handler' => 'element',
458 'text' => $Element,
459 ),
460 );
461
462 return $Block;
463 }
464 }
465
466 protected function blockFencedCodeContinue($Line, $Block)
467 {
468 if (isset($Block['complete']))
469 {
470 return;
471 }
472
473 if (isset($Block['interrupted']))
474 {
475 $Block['element']['text']['text'] .= "\n";
476
477 unset($Block['interrupted']);
478 }
479
480 if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text']))
481 {
482 $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1);
483
484 $Block['complete'] = true;
485
486 return $Block;
487 }
488
489 $Block['element']['text']['text'] .= "\n".$Line['body'];
490
491 return $Block;
492 }
493
494 protected function blockFencedCodeComplete($Block)
495 {
496 $text = $Block['element']['text']['text'];
497
498 $Block['element']['text']['text'] = $text;
499
500 return $Block;
501 }
502
503 #
504 # Header
505
506 protected function blockHeader($Line)
507 {
508 if (isset($Line['text'][1]))
509 {
510 $level = 1;
511
512 while (isset($Line['text'][$level]) and $Line['text'][$level] === '#')
513 {
514 $level ++;
515 }
516
517 if ($level > 6)
518 {
519 return;
520 }
521
522 $text = trim($Line['text'], '# ');
523
524 $Block = array(
525 'element' => array(
526 'name' => 'h' . min(6, $level),
527 'text' => $text,
528 'handler' => 'line',
529 ),
530 );
531
532 return $Block;
533 }
534 }
535
536 #
537 # List
538
539 protected function blockList($Line)
540 {
541 list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]');
542
543 if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches))
544 {
545 $Block = array(
546 'indent' => $Line['indent'],
547 'pattern' => $pattern,
548 'element' => array(
549 'name' => $name,
550 'handler' => 'elements',
551 ),
552 );
553
554 if($name === 'ol')
555 {
556 $listStart = stristr($matches[0], '.', true);
557
558 if($listStart !== '1')
559 {
560 $Block['element']['attributes'] = array('start' => $listStart);
561 }
562 }
563
564 $Block['li'] = array(
565 'name' => 'li',
566 'handler' => 'li',
567 'text' => array(
568 $matches[2],
569 ),
570 );
571
572 $Block['element']['text'] []= & $Block['li'];
573
574 return $Block;
575 }
576 }
577
578 protected function blockListContinue($Line, array $Block)
579 {
580 if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches))
581 {
582 if (isset($Block['interrupted']))
583 {
584 $Block['li']['text'] []= '';
585
586 $Block['loose'] = true;
587
588 unset($Block['interrupted']);
589 }
590
591 unset($Block['li']);
592
593 $text = isset($matches[1]) ? $matches[1] : '';
594
595 $Block['li'] = array(
596 'name' => 'li',
597 'handler' => 'li',
598 'text' => array(
599 $text,
600 ),
601 );
602
603 $Block['element']['text'] []= & $Block['li'];
604
605 return $Block;
606 }
607
608 if ($Line['text'][0] === '[' and $this->blockReference($Line))
609 {
610 return $Block;
611 }
612
613 if ( ! isset($Block['interrupted']))
614 {
615 $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
616
617 $Block['li']['text'] []= $text;
618
619 return $Block;
620 }
621
622 if ($Line['indent'] > 0)
623 {
624 $Block['li']['text'] []= '';
625
626 $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
627
628 $Block['li']['text'] []= $text;
629
630 unset($Block['interrupted']);
631
632 return $Block;
633 }
634 }
635
636 protected function blockListComplete(array $Block)
637 {
638 if (isset($Block['loose']))
639 {
640 foreach ($Block['element']['text'] as &$li)
641 {
642 if (end($li['text']) !== '')
643 {
644 $li['text'] []= '';
645 }
646 }
647 }
648
649 return $Block;
650 }
651
652 #
653 # Quote
654
655 protected function blockQuote($Line)
656 {
657 if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
658 {
659 $Block = array(
660 'element' => array(
661 'name' => 'blockquote',
662 'handler' => 'lines',
663 'text' => (array) $matches[1],
664 ),
665 );
666
667 return $Block;
668 }
669 }
670
671 protected function blockQuoteContinue($Line, array $Block)
672 {
673 if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
674 {
675 if (isset($Block['interrupted']))
676 {
677 $Block['element']['text'] []= '';
678
679 unset($Block['interrupted']);
680 }
681
682 $Block['element']['text'] []= $matches[1];
683
684 return $Block;
685 }
686
687 if ( ! isset($Block['interrupted']))
688 {
689 $Block['element']['text'] []= $Line['text'];
690
691 return $Block;
692 }
693 }
694
695 #
696 # Rule
697
698 protected function blockRule($Line)
699 {
700 if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text']))
701 {
702 $Block = array(
703 'element' => array(
704 'name' => 'hr'
705 ),
706 );
707
708 return $Block;
709 }
710 }
711
712 #
713 # Setext
714
715 protected function blockSetextHeader($Line, array $Block = null)
716 {
717 if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
718 {
719 return;
720 }
721
722 if (chop($Line['text'], $Line['text'][0]) === '')
723 {
724 $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
725
726 return $Block;
727 }
728 }
729
730 #
731 # Markup
732
733 protected function blockMarkup($Line)
734 {
735 if ($this->markupEscaped or $this->safeMode)
736 {
737 return;
738 }
739
740 if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
741 {
742 $element = strtolower($matches[1]);
743
744 if (in_array($element, $this->textLevelElements))
745 {
746 return;
747 }
748
749 $Block = array(
750 'name' => $matches[1],
751 'depth' => 0,
752 'markup' => $Line['text'],
753 );
754
755 $length = strlen($matches[0]);
756
757 $remainder = substr($Line['text'], $length);
758
759 if (trim($remainder) === '')
760 {
761 if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
762 {
763 $Block['closed'] = true;
764
765 $Block['void'] = true;
766 }
767 }
768 else
769 {
770 if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
771 {
772 return;
773 }
774
775 if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
776 {
777 $Block['closed'] = true;
778 }
779 }
780
781 return $Block;
782 }
783 }
784
785 protected function blockMarkupContinue($Line, array $Block)
786 {
787 if (isset($Block['closed']))
788 {
789 return;
790 }
791
792 if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
793 {
794 $Block['depth'] ++;
795 }
796
797 if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
798 {
799 if ($Block['depth'] > 0)
800 {
801 $Block['depth'] --;
802 }
803 else
804 {
805 $Block['closed'] = true;
806 }
807 }
808
809 if (isset($Block['interrupted']))
810 {
811 $Block['markup'] .= "\n";
812
813 unset($Block['interrupted']);
814 }
815
816 $Block['markup'] .= "\n".$Line['body'];
817
818 return $Block;
819 }
820
821 #
822 # Reference
823
824 protected function blockReference($Line)
825 {
826 if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches))
827 {
828 $id = strtolower($matches[1]);
829
830 $Data = array(
831 'url' => $matches[2],
832 'title' => null,
833 );
834
835 if (isset($matches[3]))
836 {
837 $Data['title'] = $matches[3];
838 }
839
840 $this->DefinitionData['Reference'][$id] = $Data;
841
842 $Block = array(
843 'hidden' => true,
844 );
845
846 return $Block;
847 }
848 }
849
850 #
851 # Table
852
853 protected function blockTable($Line, array $Block = null)
854 {
855 if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
856 {
857 return;
858 }
859
860 if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '')
861 {
862 $alignments = array();
863
864 $divider = $Line['text'];
865
866 $divider = trim($divider);
867 $divider = trim($divider, '|');
868
869 $dividerCells = explode('|', $divider);
870
871 foreach ($dividerCells as $dividerCell)
872 {
873 $dividerCell = trim($dividerCell);
874
875 if ($dividerCell === '')
876 {
877 continue;
878 }
879
880 $alignment = null;
881
882 if ($dividerCell[0] === ':')
883 {
884 $alignment = 'left';
885 }
886
887 if (substr($dividerCell, - 1) === ':')
888 {
889 $alignment = $alignment === 'left' ? 'center' : 'right';
890 }
891
892 $alignments []= $alignment;
893 }
894
895 # ~
896
897 $HeaderElements = array();
898
899 $header = $Block['element']['text'];
900
901 $header = trim($header);
902 $header = trim($header, '|');
903
904 $headerCells = explode('|', $header);
905
906 foreach ($headerCells as $index => $headerCell)
907 {
908 $headerCell = trim($headerCell);
909
910 $HeaderElement = array(
911 'name' => 'th',
912 'text' => $headerCell,
913 'handler' => 'line',
914 );
915
916 if (isset($alignments[$index]))
917 {
918 $alignment = $alignments[$index];
919
920 $HeaderElement['attributes'] = array(
921 'style' => 'text-align: '.$alignment.';',
922 );
923 }
924
925 $HeaderElements []= $HeaderElement;
926 }
927
928 # ~
929
930 $Block = array(
931 'alignments' => $alignments,
932 'identified' => true,
933 'element' => array(
934 'name' => 'table',
935 'handler' => 'elements',
936 ),
937 );
938
939 $Block['element']['text'] []= array(
940 'name' => 'thead',
941 'handler' => 'elements',
942 );
943
944 $Block['element']['text'] []= array(
945 'name' => 'tbody',
946 'handler' => 'elements',
947 'text' => array(),
948 );
949
950 $Block['element']['text'][0]['text'] []= array(
951 'name' => 'tr',
952 'handler' => 'elements',
953 'text' => $HeaderElements,
954 );
955
956 return $Block;
957 }
958 }
959
960 protected function blockTableContinue($Line, array $Block)
961 {
962 if (isset($Block['interrupted']))
963 {
964 return;
965 }
966
967 if ($Line['text'][0] === '|' or strpos($Line['text'], '|'))
968 {
969 $Elements = array();
970
971 $row = $Line['text'];
972
973 $row = trim($row);
974 $row = trim($row, '|');
975
976 preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches);
977
978 foreach ($matches[0] as $index => $cell)
979 {
980 $cell = trim($cell);
981
982 $Element = array(
983 'name' => 'td',
984 'handler' => 'line',
985 'text' => $cell,
986 );
987
988 if (isset($Block['alignments'][$index]))
989 {
990 $Element['attributes'] = array(
991 'style' => 'text-align: '.$Block['alignments'][$index].';',
992 );
993 }
994
995 $Elements []= $Element;
996 }
997
998 $Element = array(
999 'name' => 'tr',
1000 'handler' => 'elements',
1001 'text' => $Elements,
1002 );
1003
1004 $Block['element']['text'][1]['text'] []= $Element;
1005
1006 return $Block;
1007 }
1008 }
1009
1010 #
1011 # ~
1012 #
1013
1014 protected function paragraph($Line)
1015 {
1016 $Block = array(
1017 'element' => array(
1018 'name' => 'p',
1019 'text' => $Line['text'],
1020 'handler' => 'line',
1021 ),
1022 );
1023
1024 return $Block;
1025 }
1026
1027 #
1028 # Inline Elements
1029 #
1030
1031 protected $InlineTypes = array(
1032 '"' => array('SpecialCharacter'),
1033 '!' => array('Image'),
1034 '&' => array('SpecialCharacter'),
1035 '*' => array('Emphasis'),
1036 ':' => array('Url'),
1037 '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'),
1038 '>' => array('SpecialCharacter'),
1039 '[' => array('Link'),
1040 '_' => array('Emphasis'),
1041 '`' => array('Code'),
1042 '~' => array('Strikethrough'),
1043 '\\' => array('EscapeSequence'),
1044 );
1045
1046 # ~
1047
1048 protected $inlineMarkerList = '!"*_&[:<>`~\\';
1049
1050 #
1051 # ~
1052 #
1053
1054 public function line($text, $nonNestables=array())
1055 {
1056 $markup = '';
1057
1058 # $excerpt is based on the first occurrence of a marker
1059
1060 while ($excerpt = strpbrk($text, $this->inlineMarkerList))
1061 {
1062 $marker = $excerpt[0];
1063
1064 $markerPosition = strpos($text, $marker);
1065
1066 $Excerpt = array('text' => $excerpt, 'context' => $text);
1067
1068 foreach ($this->InlineTypes[$marker] as $inlineType)
1069 {
1070 # check to see if the current inline type is nestable in the current context
1071
1072 if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables))
1073 {
1074 continue;
1075 }
1076
1077 $Inline = $this->{'inline'.$inlineType}($Excerpt);
1078
1079 if ( ! isset($Inline))
1080 {
1081 continue;
1082 }
1083
1084 # makes sure that the inline belongs to "our" marker
1085
1086 if (isset($Inline['position']) and $Inline['position'] > $markerPosition)
1087 {
1088 continue;
1089 }
1090
1091 # sets a default inline position
1092
1093 if ( ! isset($Inline['position']))
1094 {
1095 $Inline['position'] = $markerPosition;
1096 }
1097
1098 # cause the new element to 'inherit' our non nestables
1099
1100 foreach ($nonNestables as $non_nestable)
1101 {
1102 $Inline['element']['nonNestables'][] = $non_nestable;
1103 }
1104
1105 # the text that comes before the inline
1106 $unmarkedText = substr($text, 0, $Inline['position']);
1107
1108 # compile the unmarked text
1109 $markup .= $this->unmarkedText($unmarkedText);
1110
1111 # compile the inline
1112 $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']);
1113
1114 # remove the examined text
1115 $text = substr($text, $Inline['position'] + $Inline['extent']);
1116
1117 continue 2;
1118 }
1119
1120 # the marker does not belong to an inline
1121
1122 $unmarkedText = substr($text, 0, $markerPosition + 1);
1123
1124 $markup .= $this->unmarkedText($unmarkedText);
1125
1126 $text = substr($text, $markerPosition + 1);
1127 }
1128
1129 $markup .= $this->unmarkedText($text);
1130
1131 return $markup;
1132 }
1133
1134 #
1135 # ~
1136 #
1137
1138 protected function inlineCode($Excerpt)
1139 {
1140 $marker = $Excerpt['text'][0];
1141
1142 if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
1143 {
1144 $text = $matches[2];
1145 $text = preg_replace("/[ ]*\n/", ' ', $text);
1146
1147 return array(
1148 'extent' => strlen($matches[0]),
1149 'element' => array(
1150 'name' => 'code',
1151 'text' => $text,
1152 ),
1153 );
1154 }
1155 }
1156
1157 protected function inlineEmailTag($Excerpt)
1158 {
1159 if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches))
1160 {
1161 $url = $matches[1];
1162
1163 if ( ! isset($matches[2]))
1164 {
1165 $url = 'mailto:' . $url;
1166 }
1167
1168 return array(
1169 'extent' => strlen($matches[0]),
1170 'element' => array(
1171 'name' => 'a',
1172 'text' => $matches[1],
1173 'attributes' => array(
1174 'href' => $url,
1175 ),
1176 ),
1177 );
1178 }
1179 }
1180
1181 protected function inlineEmphasis($Excerpt)
1182 {
1183 if ( ! isset($Excerpt['text'][1]))
1184 {
1185 return;
1186 }
1187
1188 $marker = $Excerpt['text'][0];
1189
1190 if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
1191 {
1192 $emphasis = 'strong';
1193 }
1194 elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
1195 {
1196 $emphasis = 'em';
1197 }
1198 else
1199 {
1200 return;
1201 }
1202
1203 return array(
1204 'extent' => strlen($matches[0]),
1205 'element' => array(
1206 'name' => $emphasis,
1207 'handler' => 'line',
1208 'text' => $matches[1],
1209 ),
1210 );
1211 }
1212
1213 protected function inlineEscapeSequence($Excerpt)
1214 {
1215 if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
1216 {
1217 return array(
1218 'markup' => $Excerpt['text'][1],
1219 'extent' => 2,
1220 );
1221 }
1222 }
1223
1224 protected function inlineImage($Excerpt)
1225 {
1226 if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[')
1227 {
1228 return;
1229 }
1230
1231 $Excerpt['text']= substr($Excerpt['text'], 1);
1232
1233 $Link = $this->inlineLink($Excerpt);
1234
1235 if ($Link === null)
1236 {
1237 return;
1238 }
1239
1240 $Inline = array(
1241 'extent' => $Link['extent'] + 1,
1242 'element' => array(
1243 'name' => 'img',
1244 'attributes' => array(
1245 'src' => $Link['element']['attributes']['href'],
1246 'alt' => $Link['element']['text'],
1247 ),
1248 ),
1249 );
1250
1251 $Inline['element']['attributes'] += $Link['element']['attributes'];
1252
1253 unset($Inline['element']['attributes']['href']);
1254
1255 return $Inline;
1256 }
1257
1258 protected function inlineLink($Excerpt)
1259 {
1260 $Element = array(
1261 'name' => 'a',
1262 'handler' => 'line',
1263 'nonNestables' => array('Url', 'Link'),
1264 'text' => null,
1265 'attributes' => array(
1266 'href' => null,
1267 'title' => null,
1268 ),
1269 );
1270
1271 $extent = 0;
1272
1273 $remainder = $Excerpt['text'];
1274
1275 if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches))
1276 {
1277 $Element['text'] = $matches[1];
1278
1279 $extent += strlen($matches[0]);
1280
1281 $remainder = substr($remainder, $extent);
1282 }
1283 else
1284 {
1285 return;
1286 }
1287
1288 if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches))
1289 {
1290 $Element['attributes']['href'] = $matches[1];
1291
1292 if (isset($matches[2]))
1293 {
1294 $Element['attributes']['title'] = substr($matches[2], 1, - 1);
1295 }
1296
1297 $extent += strlen($matches[0]);
1298 }
1299 else
1300 {
1301 if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches))
1302 {
1303 $definition = strlen($matches[1]) ? $matches[1] : $Element['text'];
1304 $definition = strtolower($definition);
1305
1306 $extent += strlen($matches[0]);
1307 }
1308 else
1309 {
1310 $definition = strtolower($Element['text']);
1311 }
1312
1313 if ( ! isset($this->DefinitionData['Reference'][$definition]))
1314 {
1315 return;
1316 }
1317
1318 $Definition = $this->DefinitionData['Reference'][$definition];
1319
1320 $Element['attributes']['href'] = $Definition['url'];
1321 $Element['attributes']['title'] = $Definition['title'];
1322 }
1323
1324 return array(
1325 'extent' => $extent,
1326 'element' => $Element,
1327 );
1328 }
1329
1330 protected function inlineMarkup($Excerpt)
1331 {
1332 if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false)
1333 {
1334 return;
1335 }
1336
1337 if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches))
1338 {
1339 return array(
1340 'markup' => $matches[0],
1341 'extent' => strlen($matches[0]),
1342 );
1343 }
1344
1345 if ($Excerpt['text'][1] === '!' and preg_match('/^<!---?[^>-](?:-?[^-])*-->/s', $Excerpt['text'], $matches))
1346 {
1347 return array(
1348 'markup' => $matches[0],
1349 'extent' => strlen($matches[0]),
1350 );
1351 }
1352
1353 if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches))
1354 {
1355 return array(
1356 'markup' => $matches[0],
1357 'extent' => strlen($matches[0]),
1358 );
1359 }
1360 }
1361
1362 protected function inlineSpecialCharacter($Excerpt)
1363 {
1364 if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text']))
1365 {
1366 return array(
1367 'markup' => '&amp;',
1368 'extent' => 1,
1369 );
1370 }
1371
1372 $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot');
1373
1374 if (isset($SpecialCharacter[$Excerpt['text'][0]]))
1375 {
1376 return array(
1377 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';',
1378 'extent' => 1,
1379 );
1380 }
1381 }
1382
1383 protected function inlineStrikethrough($Excerpt)
1384 {
1385 if ( ! isset($Excerpt['text'][1]))
1386 {
1387 return;
1388 }
1389
1390 if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
1391 {
1392 return array(
1393 'extent' => strlen($matches[0]),
1394 'element' => array(
1395 'name' => 'del',
1396 'text' => $matches[1],
1397 'handler' => 'line',
1398 ),
1399 );
1400 }
1401 }
1402
1403 protected function inlineUrl($Excerpt)
1404 {
1405 if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/')
1406 {
1407 return;
1408 }
1409
1410 if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
1411 {
1412 $url = $matches[0][0];
1413
1414 $Inline = array(
1415 'extent' => strlen($matches[0][0]),
1416 'position' => $matches[0][1],
1417 'element' => array(
1418 'name' => 'a',
1419 'text' => $url,
1420 'attributes' => array(
1421 'href' => $url,
1422 ),
1423 ),
1424 );
1425
1426 return $Inline;
1427 }
1428 }
1429
1430 protected function inlineUrlTag($Excerpt)
1431 {
1432 if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches))
1433 {
1434 $url = $matches[1];
1435
1436 return array(
1437 'extent' => strlen($matches[0]),
1438 'element' => array(
1439 'name' => 'a',
1440 'text' => $url,
1441 'attributes' => array(
1442 'href' => $url,
1443 ),
1444 ),
1445 );
1446 }
1447 }
1448
1449 # ~
1450
1451 protected function unmarkedText($text)
1452 {
1453 if ($this->breaksEnabled)
1454 {
1455 $text = preg_replace('/[ ]*\n/', "<br />\n", $text);
1456 }
1457 else
1458 {
1459 $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "<br />\n", $text);
1460 $text = str_replace(" \n", "\n", $text);
1461 }
1462
1463 return $text;
1464 }
1465
1466 #
1467 # Handlers
1468 #
1469
1470 protected function element(array $Element)
1471 {
1472 if ($this->safeMode)
1473 {
1474 $Element = $this->sanitiseElement($Element);
1475 }
1476
1477 $markup = '<'.$Element['name'];
1478
1479 if (isset($Element['attributes']))
1480 {
1481 foreach ($Element['attributes'] as $name => $value)
1482 {
1483 if ($value === null)
1484 {
1485 continue;
1486 }
1487
1488 $markup .= ' '.$name.'="'.self::escape($value).'"';
1489 }
1490 }
1491
1492 $permitRawHtml = false;
1493
1494 if (isset($Element['text']))
1495 {
1496 $text = $Element['text'];
1497 }
1498 // very strongly consider an alternative if you're writing an
1499 // extension
1500 elseif (isset($Element['rawHtml']))
1501 {
1502 $text = $Element['rawHtml'];
1503 $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode'];
1504 $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode;
1505 }
1506
1507 if (isset($text))
1508 {
1509 $markup .= '>';
1510
1511 if (!isset($Element['nonNestables']))
1512 {
1513 $Element['nonNestables'] = array();
1514 }
1515
1516 if (isset($Element['handler']))
1517 {
1518 $markup .= $this->{$Element['handler']}($text, $Element['nonNestables']);
1519 }
1520 elseif (!$permitRawHtml)
1521 {
1522 $markup .= self::escape($text, true);
1523 }
1524 else
1525 {
1526 $markup .= $text;
1527 }
1528
1529 $markup .= '</'.$Element['name'].'>';
1530 }
1531 else
1532 {
1533 $markup .= ' />';
1534 }
1535
1536 return $markup;
1537 }
1538
1539 protected function elements(array $Elements)
1540 {
1541 $markup = '';
1542
1543 foreach ($Elements as $Element)
1544 {
1545 $markup .= "\n" . $this->element($Element);
1546 }
1547
1548 $markup .= "\n";
1549
1550 return $markup;
1551 }
1552
1553 # ~
1554
1555 protected function li($lines)
1556 {
1557 $markup = $this->lines($lines);
1558
1559 $trimmedMarkup = trim($markup);
1560
1561 if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '<p>')
1562 {
1563 $markup = $trimmedMarkup;
1564 $markup = substr($markup, 3);
1565
1566 $position = strpos($markup, "</p>");
1567
1568 $markup = substr_replace($markup, '', $position, 4);
1569 }
1570
1571 return $markup;
1572 }
1573
1574 #
1575 # Deprecated Methods
1576 #
1577
1578 function parse($text)
1579 {
1580 $markup = $this->text($text);
1581
1582 return $markup;
1583 }
1584
1585 protected function sanitiseElement(array $Element)
1586 {
1587 static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
1588 static $safeUrlNameToAtt = array(
1589 'a' => 'href',
1590 'img' => 'src',
1591 );
1592
1593 if (isset($safeUrlNameToAtt[$Element['name']]))
1594 {
1595 $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
1596 }
1597
1598 if ( ! empty($Element['attributes']))
1599 {
1600 foreach ($Element['attributes'] as $att => $val)
1601 {
1602 # filter out badly parsed attribute
1603 if ( ! preg_match($goodAttribute, $att))
1604 {
1605 unset($Element['attributes'][$att]);
1606 }
1607 # dump onevent attribute
1608 elseif (self::striAtStart($att, 'on'))
1609 {
1610 unset($Element['attributes'][$att]);
1611 }
1612 }
1613 }
1614
1615 return $Element;
1616 }
1617
1618 protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
1619 {
1620 foreach ($this->safeLinksWhitelist as $scheme)
1621 {
1622 if (self::striAtStart($Element['attributes'][$attribute], $scheme))
1623 {
1624 return $Element;
1625 }
1626 }
1627
1628 $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]);
1629
1630 return $Element;
1631 }
1632
1633 #
1634 # Static Methods
1635 #
1636
1637 protected static function escape($text, $allowQuotes = false)
1638 {
1639 return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
1640 }
1641
1642 protected static function striAtStart($string, $needle)
1643 {
1644 $len = strlen($needle);
1645
1646 if ($len > strlen($string))
1647 {
1648 return false;
1649 }
1650 else
1651 {
1652 return strtolower(substr($string, 0, $len)) === strtolower($needle);
1653 }
1654 }
1655
1656 static function instance($name = 'default')
1657 {
1658 if (isset(self::$instances[$name]))
1659 {
1660 return self::$instances[$name];
1661 }
1662
1663 $instance = new static();
1664
1665 self::$instances[$name] = $instance;
1666
1667 return $instance;
1668 }
1669
1670 private static $instances = array();
1671
1672 #
1673 # Fields
1674 #
1675
1676 protected $DefinitionData;
1677
1678 #
1679 # Read-Only
1680
1681 protected $specialCharacters = array(
1682 '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|',
1683 );
1684
1685 protected $StrongRegex = array(
1686 '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s',
1687 '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us',
1688 );
1689
1690 protected $EmRegex = array(
1691 '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
1692 '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
1693 );
1694
1695 protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?';
1696
1697 protected $voidElements = array(
1698 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
1699 );
1700
1701 protected $textLevelElements = array(
1702 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
1703 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
1704 'i', 'rp', 'del', 'code', 'strike', 'marquee',
1705 'q', 'rt', 'ins', 'font', 'strong',
1706 's', 'tt', 'kbd', 'mark',
1707 'u', 'xm', 'sub', 'nobr',
1708 'sup', 'ruby',
1709 'var', 'span',
1710 'wbr', 'time',
1711 );
1712 }