403Webshell
Server IP : 216.106.184.20  /  Your IP : 216.73.216.234
Web Server : LiteSpeed
System : Linux asmodeus.in-hell.com 5.14.0-570.58.1.el9_6.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Oct 29 06:24:11 EDT 2025 x86_64
User : sekoaid1 ( 1891)
PHP Version : 7.3.33
Disable Function : NONE
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : OFF  |  Sudo : OFF  |  Pkexec : OFF
Directory :  /home/sekoaid1/public_html/rdmts/application/third_party/dompdf/I18N/Arabic/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /home/sekoaid1/public_html/rdmts/application/third_party/dompdf/I18N/Arabic/AutoSummarize.php
<?php
/**
 * ----------------------------------------------------------------------
 *  
 * Copyright (c) 2006-2016 Khaled Al-Sham'aa.
 *  
 * http://www.ar-php.org
 *  
 * PHP Version 5 
 *  
 * ----------------------------------------------------------------------
 *  
 * LICENSE
 *
 * This program is open source product; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License (LGPL)
 * as published by the Free Software Foundation; either version 3
 * of the License, or (at your option) any later version.
 *  
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *  
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/lgpl.txt>.
 *  
 * ----------------------------------------------------------------------
 *  
 * Class Name: Arabic Auto Summarize Class
 *  
 * Filename: AutoSummarize.php
 *  
 * Original Author(s): Khaled Al-Sham'aa <khaled@ar-php.org>
 *  
 * Purpose: Automatic keyphrase extraction to provide a quick mini-summary 
 *          for a long Arabic document.
 *           
 * ----------------------------------------------------------------------
 *  
 * Arabic Auto Summarize
 *
 * This class identifies the key points in an Arabic document for you to share with 
 * others or quickly scan. The class determines key points by analyzing an Arabic 
 * document and assigning a score to each sentence. Sentences that contain words 
 * used frequently in the document are given a higher score. You can then choose a 
 * percentage of the highest-scoring sentences to display in the summary. 
 * "ArAutoSummarize" class works best on well-structured documents such as reports, 
 * articles, and scientific papers.
 * 
 * "ArAutoSummarize" class cuts wordy copy to the bone by counting words and ranking 
 * sentences. First, "ArAutoSummarize" class identifies the most common words in the 
 * document and assigns a "score" to each word--the more frequently a word is used, 
 * the higher the score.
 * 
 * Then, it "averages" each sentence by adding the scores of its words and dividing 
 * the sum by the number of words in the sentence--the higher the average, the 
 * higher the rank of the sentence. "ArAutoSummarize" class can summarize texts to 
 * specific number of sentences or percentage of the original copy.
 * 
 * We use statistical approach, with some attention apparently paid to:
 * 
 * - Location: leading sentences of paragraph, title, introduction, and conclusion.
 * - Fixed phrases: in-text summaries.
 * - Frequencies of words, phrases, proper names
 * - Contextual material: query, title, headline, initial paragraph
 * 
 * The motivation for this class is the range of applications for key phrases:
 * 
 * - Mini-summary: Automatic key phrase extraction can provide a quick mini-summary 
 *   for a long document. For example, it could be a feature in a web sites; just 
 *   click the summarize button when browsing a long web page.
 * 
 * - Highlights: It can highlight key phrases in a long document, to facilitate 
 *   skimming the document.
 * 
 * - Author Assistance: Automatic key phrase extraction can help an author or editor 
 *   who wants to supply a list of key phrases for a document. For example, the 
 *   administrator of a web site might want to have a key phrase list at the top of 
 *   each web page. The automatically extracted phrases can be a starting point for 
 *   further manual refinement by the author or editor.
 * 
 * - Text Compression: On a device with limited display capacity or limited 
 *   bandwidth, key phrases can be a substitute for the full text. For example, an 
 *   email message could be reduced to a set of key phrases for display on a pager; 
 *   a web page could be reduced for display on a portable wireless web browser.
 * 
 * This list is not intended to be exhaustive, and there may be some overlap in 
 * the items.
 *
 * Example:
 * <code>
 * include('./I18N/Arabic.php');
 * $obj = new I18N_Arabic('AutoSummarize');
 * 
 * $file = 'Examples/Articles/Ajax.txt';
 * $r = 20;
 * 
 * // get contents of a file into a string
 * $fhandle = fopen($file, "r");
 * $c = fread($fhandle, filesize($file));
 * fclose($fhandle);
 * 
 * $k = $obj->getMetaKeywords($c, $r);
 * echo '<b><font color=#FFFF00>';
 * echo 'Keywords:</font></b>';
 * echo '<p dir="rtl" align="justify">';
 * echo $k . '</p>';
 * 
 * $s = $obj->doRateSummarize($c, $r);
 * echo '<b><font color=#FFFF00>';
 * echo 'Summary:</font></b>';
 * echo '<p dir="rtl" align="justify">';
 * echo $s . '</p>';
 * 
 * echo '<b><font color=#FFFF00>';
 * echo 'Full Text:</font></b>';
 * echo '<p><a class=ar_link target=_blank ';
 * echo 'href='.$file.'>Source File</a></p>';
 * </code>
 *             
 * @category  I18N 
 * @package   I18N_Arabic
 * @author    Khaled Al-Sham'aa <khaled@ar-php.org>
 * @copyright 2006-2016 Khaled Al-Sham'aa
 *    
 * @license   LGPL <http://www.gnu.org/licenses/lgpl.txt>
 * @link      http://www.ar-php.org 
 */

/**
 * This PHP class do automatic keyphrase extraction to provide a quick 
 * mini-summary for a long Arabic document
 *  
 * @category  I18N 
 * @package   I18N_Arabic
 * @author    Khaled Al-Sham'aa <khaled@ar-php.org>
 * @copyright 2006-2016 Khaled Al-Sham'aa
 *    
 * @license   LGPL <http://www.gnu.org/licenses/lgpl.txt>
 * @link      http://www.ar-php.org 
 */ 
class I18N_Arabic_AutoSummarize
{
    private $_normalizeAlef       = array('أ','إ','آ');
    private $_normalizeDiacritics = array('َ','ً','ُ','ٌ','ِ','ٍ','ْ','ّ');

    private $_commonChars = array('ة','ه','ي','ن','و','ت','ل','ا','س','م',
                                   'e', 't', 'a', 'o', 'i', 'n', 's');

    private $_separators = array('.',"\n",'،','؛','(','[','{',')',']','}',',',';');

    private $_commonWords    = array();
    private $_importantWords = array();

    /**
     * Loads initialize values
     *
     * @ignore
     */         
    public function __construct()
    {
        // This common words used in cleanCommon method
        $words    = file(dirname(__FILE__).'/data/ar-stopwords.txt');
        $en_words = file(dirname(__FILE__).'/data/en-stopwords.txt');

        $words = array_merge($words, $en_words);
        $words = array_map('trim', $words);
        
        $this->_commonWords = $words;
        
        // This important words used in rankSentences method
        $words = file(dirname(__FILE__).'/data/important-words.txt');
        $words = array_map('trim', $words);

        $this->_importantWords = $words;
    }
    
    /**
     * Load enhanced Arabic stop words list 
     * 
     * @return void          
     */         
    public function loadExtra()
    {
        $extra_words = file(dirname(__FILE__).'/data/ar-extra-stopwords.txt');
        $extra_words = array_map('trim', $extra_words);

        $this->_commonWords = array_merge($this->_commonWords, $extra_words);
    }

    /**
     * Core summarize function that implement required steps in the algorithm
     *                        
     * @param string  $str      Input Arabic document as a string
     * @param string  $keywords List of keywords higlited by search process
     * @param integer $int      Sentences value (see $mode effect also)
     * @param string  $mode     Mode of sentences count [number|rate]
     * @param string  $output   Output mode [summary|highlight]
     * @param string  $style    Name of the CSS class you would like to apply
     *                    
     * @return string Output summary requested
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    protected function summarize($str, $keywords, $int, $mode, $output, $style=null)
    {
        preg_match_all(
            "/[^\.\n\،\؛\,\;](.+?)[\.\n\،\؛\,\;]/u", 
            $str, 
            $sentences
        );
        $_sentences = $sentences[0];

        if ($mode == 'rate') {
            $str            = preg_replace("/\s{2,}/u", ' ', $str);
            $totalChars     = mb_strlen($str);
            $totalSentences = count($_sentences);

            $maxChars = round($int * $totalChars / 100);
            $int      = round($int * $totalSentences / 100);
        } else {
            $maxChars = 99999;
        }
        
        $summary = '';

        $str           = strip_tags($str);
        $normalizedStr = $this->doNormalize($str);
        $cleanedStr    = $this->cleanCommon($normalizedStr);
        $stemStr       = $this->draftStem($cleanedStr);
        
        preg_match_all(
            "/[^\.\n\،\؛\,\;](.+?)[\.\n\،\؛\,\;]/u", 
            $stemStr, 
            $sentences
        );
        $_stemmedSentences = $sentences[0];

        $wordRanks = $this->rankWords($stemStr);
        
        if ($keywords) {
            $keywords = $this->doNormalize($keywords);
            $keywords = $this->draftStem($keywords);
            $words    = explode(' ', $keywords);
            
            foreach ($words as $word) {
                $wordRanks[$word] = 1000;
            }
        }
        
        $sentencesRanks = $this->rankSentences(
            $_sentences, 
            $_stemmedSentences, 
            $wordRanks
        );
        
        list($sentences, $ranks) = $sentencesRanks;

        $minRank = $this->minAcceptedRank($sentences, $ranks, $int, $maxChars);

        $totalSentences = count($ranks);
        
        for ($i = 0; $i < $totalSentences; $i++) {
            if ($sentencesRanks[1][$i] >= $minRank) {
                if ($output == 'summary') {
                    $summary .= ' '.$sentencesRanks[0][$i];
                } else {
                    $summary .= '<span class="' . $style .'">' . 
                                $sentencesRanks[0][$i] . '</span>';
                }
            } else {
                if ($output == 'highlight') {
                    $summary .= $sentencesRanks[0][$i];
                }
            }
        }
        
        if ($output == 'highlight') {
            $summary = str_replace("\n", '<br />', $summary);
        }
        
        return $summary;
    }
          
    /**
     * Summarize input Arabic string (document content) into specific number of 
     * sentences in the output
     *                        
     * @param string  $str      Input Arabic document as a string
     * @param integer $int      Number of sentences required in output summary
     * @param string  $keywords List of keywords higlited by search process
     *                    
     * @return string Output summary requested
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function doSummarize($str, $int, $keywords)
    {
        $summary = $this->summarize(
            $str, $keywords, $int, 'number', 'summary', $style
        );
        
        return $summary;
    }
    
    /**
     * Summarize percentage of the input Arabic string (document content) into output
     *      
     * @param string  $str      Input Arabic document as a string
     * @param integer $rate     Rate of output summary sentence number as 
     *                          percentage of the input Arabic string 
     *                          (document content)
     * @param string  $keywords List of keywords higlited by search process
     *                    
     * @return string Output summary requested
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function doRateSummarize($str, $rate, $keywords)
    {
        $summary = $this->summarize(
            $str, $keywords, $rate, 'rate', 'summary', $style
        );
        
        return $summary;
    }
    
    /**
     * Highlight key sentences (summary) of the input string (document content) 
     * using CSS and send the result back as an output
     *                             
     * @param string  $str      Input Arabic document as a string
     * @param integer $int      Number of key sentences required to be 
     *                          highlighted in the input string 
     *                          (document content)
     * @param string  $keywords List of keywords higlited by search process
     * @param string  $style    Name of the CSS class you would like to apply
     *                    
     * @return string Output highlighted key sentences summary (using CSS)
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function highlightSummary($str, $int, $keywords, $style)
    {
        $summary = $this->summarize(
            $str, $keywords, $int, 'number', 'highlight', $style
        );
        
        return $summary;
    }
    
    /**
     * Highlight key sentences (summary) as percentage of the input string 
     * (document content) using CSS and send the result back as an output.
     *                    
     * @param string  $str      Input Arabic document as a string
     * @param integer $rate     Rate of highlighted key sentences summary 
     *                          number as percentage of the input Arabic 
     *                          string (document content)
     * @param string  $keywords List of keywords higlited by search process
     * @param string  $style    Name of the CSS class you would like to apply
     *                    
     * @return string Output highlighted key sentences summary (using CSS)
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function highlightRateSummary($str, $rate, $keywords, $style)
    {
        $summary = $this->summarize(
            $str, $keywords, $rate, 'rate', 'highlight', $style
        );
        
        return $summary;
    }
    
    /**
     * Extract keywords from a given Arabic string (document content)
     *      
     * @param string  $str Input Arabic document as a string
     * @param integer $int Number of keywords required to be extracting 
     *                     from input string (document content)
     *                    
     * @return string List of the keywords extracting from input Arabic string
     *               (document content)
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function getMetaKeywords($str, $int)
    {
        $patterns     = array();
        $replacements = array();
        $metaKeywords = '';
        
        array_push($patterns, '/\.|\n|\،|\؛|\(|\[|\{|\)|\]|\}|\,|\;/u');
        array_push($replacements, ' ');
        $str = preg_replace($patterns, $replacements, $str);
        
        $normalizedStr = $this->doNormalize($str);
        $cleanedStr    = $this->cleanCommon($normalizedStr);
        
        $str = preg_replace('/(\W)ال(\w{3,})/u', '\\1\\2', $cleanedStr);
        $str = preg_replace('/(\W)وال(\w{3,})/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})هما(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})كما(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})تين(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})هم(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})هن(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})ها(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})نا(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})ني(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})كم(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})تم(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})كن(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})ات(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})ين(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})تن(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})ون(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})ان(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})تا(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})وا(\W)/u', '\\1\\2', $str);
        $str = preg_replace('/(\w{3,})ة(\W)/u', '\\1\\2', $str);

        $stemStr = preg_replace('/(\W)\w{1,3}(\W)/u', '\\2', $str);
        
        $wordRanks = $this->rankWords($stemStr);
        
        arsort($wordRanks, SORT_NUMERIC);
        
        $i = 1;
        foreach ($wordRanks as $key => $value) {
            if ($this->acceptedWord($key)) {
                $metaKeywords .= $key . '، ';
                $i++;
            }
            if ($i > $int) {
                break;
            }
        }
        
        $metaKeywords = mb_substr($metaKeywords, 0, -2);
        
        return $metaKeywords;
    }
    
    /**
     * Normalized Arabic document
     *      
     * @param string $str Input Arabic document as a string
     *      
     * @return string Normalized Arabic document
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    protected function doNormalize($str)
    {
        $str = str_replace($this->_normalizeAlef, 'ا', $str);
        $str = str_replace($this->_normalizeDiacritics, '', $str);
        $str = strtr(
            $str, 
            'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 
            'abcdefghijklmnopqrstuvwxyz'
        );

        return $str;
    }
    
    /**
     * Extracting common Arabic words (roughly) 
     * from input Arabic string (document content)
     *                        
     * @param string $str Input normalized Arabic document as a string
     *      
     * @return string Arabic document as a string free of common words (roughly)
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function cleanCommon($str)
    {
        $str = str_replace($this->_commonWords, ' ', $str);
        
        return $str;
    }
    
    /**
     * Remove less significant Arabic letter from given string (document content). 
     * Please note that output will not be human readable.
     *                      
     * @param string $str Input Arabic document as a string
     *      
     * @return string Output string after removing less significant Arabic letter
     *                (not human readable output)
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    protected function draftStem($str)
    {
        $str = str_replace($this->_commonChars, '', $str);
        return $str;
    }
    
    /**
     * Ranks words in a given Arabic string (document content). That rank refers 
     * to the frequency of that word appears in that given document.
     *                      
     * @param string $str Input Arabic document as a string
     *      
     * @return hash Associated array where document words referred by index and
     *              those words ranks referred by values of those array items.
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    protected function rankWords($str)
    {
        $wordsRanks = array();
        
        $str   = str_replace($this->_separators, ' ', $str);
        $words = preg_split("/[\s,]+/u", $str);
        
        foreach ($words as $word) {
            if (isset($wordsRanks[$word])) {
                $wordsRanks[$word]++;
            } else {
                $wordsRanks[$word] = 1;
            }
        }

        foreach ($wordsRanks as $wordRank => $total) {
            if (mb_substr($wordRank, 0, 1) == 'و') {
                $subWordRank = mb_substr($wordRank, 1, mb_strlen($wordRank) - 1);
                if (isset($wordsRanks[$subWordRank])) {
                    unset($wordsRanks[$wordRank]);
                    $wordsRanks[$subWordRank] += $total;
                }
            }
        }

        return $wordsRanks;
    }
    
    /**
     * Ranks sentences in a given Arabic string (document content).
     *      
     * @param array $sentences        Sentences of the input Arabic document 
     *                                as an array
     * @param array $stemmedSentences Stemmed sentences of the input Arabic 
     *                                document as an array
     * @param array $arr              Words ranks array (word as an index and 
     *                                value refer to the word frequency)
     *                         
     * @return array Two dimension array, first item is an array of document
     *               sentences, second item is an array of ranks of document
     *               sentences.
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    protected function rankSentences($sentences, $stemmedSentences, $arr)
    {
        $sentenceArr = array();
        $rankArr     = array();
        
        $max = count($sentences);
        
        for ($i = 0; $i < $max; $i++) {
            $sentence = $sentences[$i];

            $w     = 0;
            $first = mb_substr($sentence, 0, 1);
            $last  = mb_substr($sentence, -1, 1);
                    
            if ($first == "\n") {
                $w += 3;
            } elseif (in_array($first, $this->_separators)) {
                $w += 2;
            } else {
                $w += 1;
            }
                    
            if ($last == "\n") {
                $w += 3;
            } elseif (in_array($last, $this->_separators)) {
                $w += 2;
            } else {
                $w += 1;
            }

            foreach ($this->_importantWords as $word) {
                if ($word != '') {
                    $w += mb_substr_count($sentence, $word);
                }
            }
            
            $sentence = mb_substr(mb_substr($sentence, 0, -1), 1);
            if (!in_array($first, $this->_separators)) {
                $sentence = $first . $sentence;
            }
            
            $stemStr = $stemmedSentences[$i];
            $stemStr = mb_substr($stemStr, 0, -1);
            
            $words = preg_split("/[\s,]+/u", $stemStr);
            
            $totalWords = count($words);
            if ($totalWords > 4) {
                $totalWordsRank = 0;
                
                foreach ($words as $word) {
                    if (isset($arr[$word])) {
                        $totalWordsRank += $arr[$word];
                    }
                }
                
                $wordsRank     = $totalWordsRank / $totalWords;
                $sentenceRanks = $w * $wordsRank;
                
                array_push($sentenceArr, $sentence . $last);
                array_push($rankArr, $sentenceRanks);
            }
        }
        
        $sentencesRanks = array($sentenceArr, $rankArr);
        
        return $sentencesRanks;
    }
    
    /**
     * Calculate minimum rank for sentences which will be including in the summary
     *      
     * @param array   $str Document sentences
     * @param array   $arr Sentences ranks
     * @param integer $int Number of sentences you need to include in your summary
     * @param integer $max Maximum number of characters accepted in your summary
     *      
     * @return integer Minimum accepted sentence rank (sentences with rank more
     *                 than this will be listed in the document summary)
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    protected function minAcceptedRank($str, $arr, $int, $max)
    {
        $len = array();
        
        foreach ($str as $line) {
            $len[] = mb_strlen($line);
        }

        rsort($arr, SORT_NUMERIC);

        $totalChars = 0;
        
        for ($i=0; $i<=$int; $i++) {

            if (!isset($arr[$i])) {
                $minRank = 0;
                break;
            }

            $totalChars += $len[$i];

            if ($totalChars >= $max) {
                $minRank = $arr[$i];
                break;
            }

            $minRank = $arr[$i];
        }

        return $minRank;
    }
    
    /**
     * Check some conditions to know if a given string is a formal valid word or not
     *      
     * @param string $word String to be checked if it is a valid word or not
     *      
     * @return boolean True if passed string is accepted as a valid word else 
     *                 it will return False
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    protected function acceptedWord($word)
    {
        $accept = true;
        
        if (mb_strlen($word) < 3) {
            $accept = false;
        }
        
        return $accept;
    }
}


Youez - 2016 - github.com/yon3zu
LinuXploit