<?php
use \ForceUTF8\Encoding;
class HtmlService
{

    private $htmlSource;
    private $site;
    private $parser;
    private $htmlData = [];

    public function __construct($site, $source, $parser)
    {
        $this->site = $site;
        $this->htmlSource = $source;
        $this->parser = $parser;
    }

    public function get()
    {
        $this->getTitleTags();
        $this->getStyleTags();
        $this->getHtmlSizeInfo();
        $this->getLinks();

        return $this->htmlData;
    }

    private function getTitleTags()
    {

        $titleTags = [
            'h1_tags' => $this->getTextBetweenTags('h1'),
            'h2_tags' => $this->getTextBetweenTags('h2'),
            'h3_tags' => $this->getTextBetweenTags('h3'),
            'h4_tags' => $this->getTextBetweenTags('h4'),
            'h5_tags' => $this->getTextBetweenTags('h5'),
            'h6_tags' => $this->getTextBetweenTags('h6')
        ];

        $this->htmlData = array_merge($this->htmlData, $titleTags);
    }

    private function getStyleTags()
    {

        $styleTags = [
            'strong_tags' => $this->getTextBetweenTags('strong'),
            'b_tags'      => $this->getTextBetweenTags('b'),
            'em_tags'     => $this->getTextBetweenTags('em'),
            'i_tags'      => $this->getTextBetweenTags('i'),
            'u_tags'      => $this->getTextBetweenTags('u'),
            'cite_tags'   => $this->getTextBetweenTags('cite')
        ];

        $this->htmlData = array_merge($this->htmlData, $styleTags);
    }

    private function getHtmlSizeInfo()
    {
        $mainFile = fopen(public_path()."/tmp/site_" . md5($this->site) . ".txt", "w");
        fwrite($mainFile, $this->htmlSource);
        fclose($mainFile);

        $text = preg_replace('/(<script.*?>.*?<\/script>|<style.*?>.*?<\/style>|<.*?>|\r|\n|\t)/ms', '', $this->htmlSource);
        $text = preg_replace('/ +/ms', ' ', $text);
        $text = strip_tags($text);

        $textFile = fopen(public_path()."/tmp/text_" . md5($this->site) . ".txt", "w");
        fwrite($textFile, $text);
        fclose($textFile);

        $mainSize = filesize(public_path()."/tmp/site_" . md5($this->site) . ".txt");
        $textSize = filesize(public_path()."/tmp/text_" . md5($this->site) . ".txt");
        $codeSize = ($mainSize - $textSize);

        if (
            strpos($this->htmlSource, '<!DOCTYPE html>') != false &&
            strpos($this->htmlSource, '<header') != false &&
            strpos($this->htmlSource, '<footer') != false
        ) {
            $html5Support = 1;
        } else {
            $html5Support = 0;
        }

        if($codeSize > 0 && $textSize > 0){
            // Text boyutu * 100/toplam html boyutu
            $percentage = ($textSize * 100) / $mainSize;
        }else{
            $percentage = 0;
        }

        $htmlInfo = [
            'html_size'       => $mainSize,
            'code_size'       => $codeSize,
            'text_size'       => $textSize,
            'code_text_ratio' => $percentage,
            'html5_support'   => $html5Support
        ];

        $this->htmlData = array_merge($this->htmlData, $htmlInfo);
    }


    private function getTextBetweenTags($tagName)
    {

        $html = $this->parser->str_get_html($this->htmlSource);

        $return = [];



        if($html && $html->find($tagName)){
            foreach($html->find($tagName) as $item){
                $words = $this->cleanText($item->plaintext);

                if ($words != '') {
                    $return[] = Encoding::toUTF8($words);
                }
            }
        }

        return $return;
    }

    private function getLinks()
    {
        $html = $this->parser->str_get_html($this->htmlSource);

        $links = [];
        $innerLinks = [];
        $externalLinks = [];
        $noFollowLinks = [];

        if($html){
            foreach ($html->find('a') as $item) {

                $text = Encoding::toUTF8($this->cleanText($item->plaintext));
                $href = strtolower_tr(strip_tags($item->href));
                $rel = strtolower_tr(strip_tags($item->rel));

                if ($item->find('img')) {
                    $type = 'image';
                } elseif($text == '') {
                    $type = 'empty';
                }
                else{
                    $type = 'text';
                }

                $linkDetails = [
                    'text' => $text,
                    'href' => $href,
                    'rel'  => $rel,
                    'type' => $type
                ];

                $links[] = $linkDetails;

                $sameHost = false;
                $parse_url = parse_url($href);
                if(isset($parse_url['host'])){
                    if(str_replace('www.', '', $parse_url['host']) == $this->site){
                        $sameHost = true;
                    }
                }

                if ($sameHost || (substr($href, 0, 4) != 'http' || substr($href, 0, 1) == '/')) {
                    $innerLinks[] = $linkDetails;
                }

                if ($sameHost == false && substr($href, 0, 4) == 'http') {
                    $externalLinks[] = $linkDetails;
                }

                if ($rel == 'nofollow') {
                    $noFollowLinks[] = $linkDetails;
                }
            }
        }



        $links = [
            'total_links'    => count($links),
            'internal_links' => $innerLinks,
            'external_links' => $externalLinks,
            'nofollow_links' => $noFollowLinks,
            'title_links'    => $this->getTitleLinkData()
        ];

        $this->htmlData = array_merge($this->htmlData, $links);

    }

    private function getTitleLinkData()
    {
        $titleLinks = [];
        $h1Tags = $this->getTextBetweenTags('h1');
        $h2Tags = $this->getTextBetweenTags('h2');
        $h3Tags = $this->getTextBetweenTags('h3');
        $h4Tags = $this->getTextBetweenTags('h4');
        $h5Tags = $this->getTextBetweenTags('h5');
        $h6Tags = $this->getTextBetweenTags('h6');

        $titleTags = array_merge($h1Tags, $h2Tags, $h3Tags, $h4Tags, $h5Tags, $h6Tags);

        if ($titleTags) {
            foreach ($titleTags as $tag) {
                $innerHtml = $this->parser->str_get_html($tag);
                if ($innerHtml->find('a', 0)) {
                    $innerTag = $innerHtml->find('a', 0);
                    $text = Encoding::toUTF8($this->cleanText($innerTag->plaintext));
                    $href = strtolower_tr(strip_tags($innerTag->href));
                    $rel = strtolower_tr(strip_tags($innerTag->rel));

                    $titleLinks[$tag->tagName][] = [
                        'text' => $text,
                        'href' => $href,
                        'rel'  => $rel
                    ];
                }
            }
        }

        return $titleLinks;
    }

    private function cleanText($words)
    {
        $words = trim(strip_tags($words), chr(0xC2).chr(0xA0));
        $words = preg_replace('/\t+/', '', $words);
        $words = preg_replace('/\n+/', '', $words);
        $words = preg_replace('/\r+/', '', $words);
        $words = str_replace('&nbsp;', '', $words);

        return trim($words);
    }


}