class.WikiParser.php

Go to the documentation of this file.
00001 <?php
00002   /*
00003    This  file is part  of WikiConverter.   WikiConverter is  a program
00004    that  converts   text/wiki  into   other  formats  (like   html  or
00005    xml/docbook).
00006 
00007    Copyright (c) 2005 Dashamir Hoxha, dhoxha@inima.al
00008 
00009    WikiConverter  is free  software;  you can  redistribute it  and/or
00010    modify  it under the  terms of  the GNU  General Public  License as
00011    published by the Free Software  Foundation; either version 2 of the
00012    License, or (at your option) any later version.
00013 
00014    WikiConverter is  distributed in the  hope that it will  be useful,
00015    but  WITHOUT ANY  WARRANTY; without  even the  implied  warranty of
00016    MERCHANTABILITY or  FITNESS FOR A PARTICULAR PURPOSE.   See the GNU
00017    General Public License for more details.
00018 
00019    You should have  received a copy of the  GNU General Public License
00020    along  with  WikiConverter; if  not,  write  to  the Free  Software
00021    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00022    USA
00023   */
00024 
00029 class WikiParser
00030 {
00032   var $arr_lines;
00033   var $idx;
00034 
00035   function WikiParser()
00036   {
00037     $this->arr_lines = array();
00038     $this->idx = -1;
00039 
00040     global $tpl_collection;
00041     $tpl_collection = array();
00042   }
00043 
00044   function parse_file($filename)
00045   {   
00046     if ( !file_exists($filename) )
00047       {
00048         print "'$filename' not found\n";
00049         return UNDEFINED;  
00050       }
00051 
00052     $arr_lines = file($filename);
00053     $tpl = $this->parse_lines($arr_lines);
00054     return $tpl;
00055   }
00056 
00057   function parse_string($str)
00058   {
00059     $arr_lines = explode("\n", $str);
00060     for ($i=0; $i < sizeof($arr_lines); $i++)
00061       {
00062         $arr_lines[$i] .= "\n";
00063       }
00064 
00065     $tpl = $this->parse_lines($arr_lines);
00066     return $tpl;
00067   }
00068 
00069   function parse_lines($arr_lines)
00070   {
00071     $this->arr_lines = $arr_lines;
00072     $this->idx = 0;
00073     $tpl = $this->parse_text();
00074 
00075     global $tpl_collection;
00076     $tpl_collection['Text'] = $tpl;
00077 
00078     return $tpl;
00079   }
00080 
00082   function parse_text()
00083   {
00084     $text = new Text;
00085     while (!$this->end_of_lines())
00086       {
00087         if ($this->block_start())
00088           {
00089             $block = $this->parse_block();
00090             $this->link_template($text, $block);
00091           }
00092         else if ($this->list_start())
00093           {
00094             $list = $this->parse_list();
00095             $this->link_template($text, $list);
00096           }
00097         else if ($this->nonempty_line())
00098           {
00099             $paragraph = $this->parse_paragraph();
00100             $this->link_template($text, $paragraph);
00101           }
00102         else //line is empty
00103           {
00104             $this->next_line(); //skip it
00105           }
00106       }
00107     return $text;
00108   }
00109 
00110   function parse_paragraph()
00111   {
00112     $paragraph = new Paragraph;
00113     while (!$this->end_of_lines())
00114       {
00115         if ($this->block_start())
00116           {
00117             $block = $this->parse_block();
00118             $this->link_template($paragraph, $block);
00119           }
00120         else if ($this->list_start())
00121           {
00122             $list = $this->parse_list();
00123             $this->link_template($paragraph, $list);
00124           }
00125         else if ($this->nonempty_line())
00126           {
00127             $this->append_line($paragraph);
00128             $this->next_line();  //get the next line
00129           }
00130         else //empty-line
00131           {
00132             //end of this paragraph, stop parsing
00133             break;
00134           }
00135       }
00136     return $paragraph;
00137   }
00138 
00139   function parse_block()
00140   {
00141     //get the block type
00142     $line = $this->get_current_line();
00143     ereg('^--([^ ]+)(.*)$', trim($line), $regs);
00144     $type = $regs[1];
00145     $title = trim($regs[2]);
00146 
00147     $block = new Block($type, $title);
00148 
00149     $this->next_line();  //skip the block start line
00150     while (!$this->end_of_lines())
00151       {
00152         if ($this->block_end())
00153           {
00154             $this->next_line();  //skip the block end line
00155             break; //stop parsing this block
00156           }
00157         else if ($this->block_start())
00158           {
00159             $blk = $this->parse_block();
00160             $this->link_template($block, $blk);
00161           }
00162         else if ($this->list_start())
00163           {
00164             $list = $this->parse_list();
00165             $this->link_template($block, $list);
00166           }
00167         else
00168           {
00169             $this->append_line($block);
00170             $this->next_line();  //get the next line
00171           }
00172       }
00173     return $block;
00174   }
00175 
00176   function parse_info()
00177   {
00178     //get the block type
00179     $line = $this->get_current_line();
00180     ereg('^--([^ ]+)(.*)$', trim($line), $regs);
00181     $type = $regs[1];
00182     $title = trim($regs[2]);
00183 
00184     if ($type=='info')
00185       $block = new InfoBlock();
00186     else
00187       $block = new Block($type, $title);
00188 
00189     $this->next_line();  //skip the block start line
00190     while (!$this->end_of_lines())
00191       {
00192         if ($this->block_end())
00193           {
00194             $this->next_line();  //skip the block end line
00195             break; //stop parsing this block
00196           }
00197         else if ($this->block_start())
00198           {
00199             $blk = $this->parse_block();
00200             $this->link_template($block, $blk);
00201           }
00202         else if ($this->list_start())
00203           {
00204             $list = $this->parse_list();
00205             $this->link_template($block, $list);
00206           }
00207         else
00208           {
00209             $this->append_line($block);
00210             $this->next_line();  //get the next line
00211           }
00212       }
00213     return $block;
00214   }
00215 
00216   function parse_list()
00217   {
00218     $mark = $this->get_listitem_mark();
00219     $indent = $this->get_indentation($mark);
00220     $bullet = $this->get_bullet($mark);
00221     $list = new Listing($indent, $bullet);
00222     while (!$this->end_of_lines())
00223       {
00224         if ($this->list_end($mark))
00225           {
00226             break;  //stop parsing this list
00227           }
00228         else if ($this->listitem_start($mark))
00229           {
00230             $item = $this->parse_listitem($mark);
00231             $this->link_template($list, $item);
00232           }
00233       }
00234     return $list;
00235   }
00236 
00237   function parse_listitem($mark)
00238   {
00239     $listitem = new ListItem;
00240     $line = $this->get_current_line();
00241     $line = substr($line, strlen($mark));
00242     $this->append_line($listitem, $line);
00243     $this->next_line();  //get the next line
00244 
00245     while (!$this->end_of_lines())
00246       {
00247         if ($this->listitem_end($mark))
00248           {
00249             break;  //end of this listitem
00250           }
00251         else if ($this->block_start())
00252           {
00253             $block = $this->parse_block();
00254             $this->link_template($listitem, $block);
00255           }
00256         else if ($this->list_start($mark))
00257           {
00258             $list = $this->parse_list();
00259             $this->link_template($listitem, $list);
00260           }
00261         else if ($this->empty_line())
00262           {
00263             //skip empty lines
00264             $this->next_line();
00265           }
00266         else
00267           {
00268             $this->append_line($listitem);
00269             $this->next_line();  //get the next line
00270           }
00271       }
00272     return $listitem;
00273   }
00274 
00276   function get_current_line()
00277   {
00278     $line = $this->arr_lines[$this->idx];
00279     return $line;
00280   }
00281 
00283   function end_of_lines()
00284   {
00285     $eol = ($this->idx >= sizeof($this->arr_lines));
00286     return $eol;
00287   }
00288 
00290   function next_line()
00291   {
00292     $this->idx++;
00293   }
00294   
00296   function append_line(&$tpl, $line ='UNDEFINED')
00297   {
00298     if ($line=='UNDEFINED')  $line = $this->get_current_line();
00299     $tpl->contents[] = $line;
00300   }
00301 
00303   function link_template(&$parent, $child)
00304   {
00305     $parent->contents[] = '&&'.$child->id.";;\n";
00306 
00307     //also add the child template in the template collection
00308     global $tpl_collection;
00309     $tpl_collection[$child->id] = $child;
00310   }
00311 
00312   function empty_line()
00313   {
00314     $line = $this->get_current_line();
00315     return (trim($line) == '');
00316   }
00317 
00318   function nonempty_line()
00319   {
00320     $line = $this->get_current_line();
00321     return (trim($line) != '');
00322   }
00323 
00324   function block_start()
00325   {
00326     $line = $this->get_current_line();
00327     $line = trim($line);
00328     $pattern = '^--(info|code|scr|screen|ll|fig|figure|xmp|example'
00329       . '|n|note|w|warning|c|caution|tip|imp|important)';
00330     $b_start = ereg($pattern, $line);
00331     return $b_start;
00332   }
00333 
00334   function block_end()
00335   {
00336     $line = $this->get_current_line();
00337     return (trim($line)=='----');
00338   }
00339 
00340   function get_listitem_mark()
00341   {
00342     $line = $this->get_current_line();
00343     ereg('^( *(\*|1\.|a\.|i\.|A\.|I\.))', $line, $regs);
00344     $mark = $regs[1];
00345     return $mark;
00346   }
00347 
00349   function get_indentation($mark)
00350   {
00351     ereg('^( *)', $mark, $regs);
00352     return $regs[1];
00353   }
00354 
00356   function get_bullet($mark)
00357   {
00358     ereg('([^ ]*)$', $mark, $regs);
00359     return $regs[1];
00360   }
00361 
00363   function list_start($mark ='')
00364   {
00365     $new_mark = $this->get_listitem_mark();
00366     if ($new_mark=='')
00367       {
00368         //current line does not contain a listitem mark
00369         return false;
00370       }
00371 
00372     if ($mark=='')  return true;
00373 
00374     if ($new_mark==$mark)
00375       {
00376         //it is another item in the same list, not a new list
00377         return false;
00378       }
00379 
00380     $indent = $this->get_indentation($mark);
00381     $new_indent = $this->get_indentation($new_mark);
00382     if (strlen($new_indent) > strlen($indent))
00383       {
00384         //it is more indented than the current list,
00385         //so it must be a new list
00386         return true;
00387       }
00388 
00389     return false;
00390   }
00391 
00392   function list_end($mark)
00393   {
00394     $line = $this->get_current_line();
00395     if (trim($line)=='/')
00396       {
00397         //end of list marker
00398         $this->next_line();
00399         return true;
00400       }
00401 
00402     if (trim($line)=='----')
00403       {
00404         //end of block marker
00405         return true;
00406       }
00407 
00408     $new_mark = $this->get_listitem_mark();
00409     if ($new_mark=='')
00410       {
00411         //current line does not contain a listitem mark
00412         return false;
00413       }
00414 
00415     if ($new_mark==$mark)
00416       {
00417         //it is another item in the same list, not the end of the list
00418         return false;
00419       }
00420 
00421     $indent = $this->get_indentation($mark);
00422     $new_indent = $this->get_indentation($new_mark);
00423     if (strlen($new_indent) <= strlen($indent))
00424       {
00425         //it is less indented than the current list
00426         //or it has a different bullet
00427         //so it must belong to another list, which
00428         //denotes the end of the current list
00429         return true;
00430       }
00431 
00432     return false;
00433   }
00434 
00435   function listitem_start($mark)
00436   {
00437     $new_mark = $this->get_listitem_mark();
00438     if ($new_mark=='')
00439       {
00440         //current line does not contain a listitem mark
00441         return false;
00442       }
00443     else
00444       {
00445         //current line contains a list item mark
00446         return true;
00447       }
00448   }
00449 
00450   function listitem_end($mark)
00451   {
00452     $line = $this->get_current_line();
00453     if (trim($line)=='/')
00454       {
00455         //end of list marker also marks the end of the last listitem
00456         return true;
00457       }
00458 
00459     if (trim($line)=='----')
00460       {
00461         //end of block marker
00462         return true;
00463       }
00464 
00465     $new_mark = $this->get_listitem_mark();
00466     if ($new_mark=='')
00467       {
00468         //current line does not contain a listitem mark
00469         return false;
00470       }
00471     else if ($new_mark==$mark)
00472       {
00473         //it is a new item in the same list
00474         return true;
00475       }
00476     else
00477       {
00478         $indent = $this->get_indentation($mark);
00479         $new_indent = $this->get_indentation($new_mark);
00480         if (strlen($new_indent) > strlen($indent))
00481           {
00482             //it is the first item of a nested list
00483             //listitem has not ended yet
00484             return false;
00485           }
00486         else
00487           {
00488             //it is an item of another list
00489             return true;
00490           }
00491       }
00492   }
00493 
00494 
00495   /*========================= DEBUG ==================================*/
00496 
00498   function template_list()
00499   {
00500     global $tpl_collection;
00501 
00502     $tpl_index = "<strong>List of Parsed Templates:</strong>\n";
00503     $tpl_index .= "<ul>\n";
00504     reset($tpl_collection);
00505     while ( list($tpl_id, $tpl) = each($tpl_collection) )
00506       {
00507         $tpl_index .= "\t<li><a href='#$tpl->id'>$tpl->id</a></li>\n";
00508         $tpl_list .= $tpl->render_to_html_table();
00509       }
00510     $tpl_index .= "</ul>\n";
00511 
00512     $html = "<a name='top' id='top'></a>\n";
00513     $html .= "<div class='converter'>\n";
00514     $html .= $tpl_index;
00515     $html .= $tpl_list;
00516     $html .= "</div>\n";
00517     $html .= "<hr />\n";
00518 
00519     return $html;
00520   }
00521 
00523   function tpl_to_tree()  //for debugging parse()
00524   {
00525     global $tpl_collection;
00526     $tpl = $tpl_collection['Text'];
00527     $tree = "<hr />\n";
00528     $tree .= "<a name='top' id='top'></a>\n";
00529     $tree .= "<pre class='converter'>\n";
00530     $tree .= "<strong>The tree structure of the templates:</strong>\n\n";
00531     $tree .= $this->to_tree($tpl, '');
00532     $tree .= "</pre>\n";
00533     return $tree;
00534   }
00535         
00537   function to_tree($tpl, $indent)
00538   {
00539     global $tpl_collection;
00540 
00541     $tree = $indent."|\n";
00542     $tree .= $indent."+--<a href='#".$tpl->id."'>".$tpl->id."</a>"
00543       . " (".$tpl->type.")\n";
00544 
00545     $arr_tpl_id = $tpl->get_subtemplates();
00546     for ($i=0; $i < sizeof($arr_tpl_id); $i++)
00547       {
00548         $tpl_id = $arr_tpl_id[$i];
00549         $tpl = $tpl_collection[$tpl_id];
00550         $tree .= $this->to_tree($tpl, $indent."|  ");
00551       }
00552     return $tree;
00553   }
00554 }
00555 ?>

Generated on Wed Jan 9 08:27:32 2008 for DokBookWiki by  doxygen 1.5.2