#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
#文字色を変えるパターン
import csv
import warnings
warnings.filterwarnings("ignore")
import time
import re
import spacy
import pytextrank
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

from nltk.stem.wordnet import WordNetLemmatizer as WNL
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from operator import itemgetter
from flask import Flask, render_template, request, g
app = Flask(__name__)
from elasticsearch import Elasticsearch

@app.route('/')
def form():
   return render_template('form.html')

@app.route('/elasticsearch', methods = ['POST', 'GET'])
def search():
    es = Elasticsearch(host = '153.120.135.103', port = 9200, http_auth = ('elastic', 'gui3DhVRfd9F18n30o34'))
    res = es.search(index = 'securityreports-2020-0930-1220', body = {
        '_source':['id','title','text'],
        'size':1000,
        'query':{'match':{'text':'mirai Mirai'}}})
    hit_num = res['hits']['total']['value']
    with open('psr.csv', 'w', encoding='CP932', errors='ignore', newline='') as f:
        header_present = False
        for doc in res['hits']['hits']:
            my_dict = doc['_source']
            if not header_present:
                w = csv.DictWriter(f, my_dict.keys())
                w.writeheader()
                header_present = True
            w.writerow(my_dict)
    csvfile = open('psr.csv', encoding='CP932', errors='ignore')
    a_list = []
    b_list = []
    for row in csv.reader(csvfile):
        a_list.append(row[1])
        b_list.append(row[2])
    del a_list[0]
    del b_list[0]
    reports = ' '.join(a_list)
    nlp = spacy.load('en_core_web_sm', disable = ['ner'])
    tr = pytextrank.TextRank()
    nlp.add_pipe(tr.PipelineComponent, name="textrank", last=True)
    doc = nlp(reports)
    limit_phrases = 15
    phrase_id = 0
    wnl = WNL()
    unit_vector = []
    phrases = []
    phrase = []
    for p in doc._.phrases:
        unit_vector.append(p.rank)
        phrases.append(p.text)
        word_tokenize_list = word_tokenize(p.text.lower())
        a = ''
        for lemma in word_tokenize_list:
            a += ' '
            a += wnl.lemmatize(lemma)
        phrase.append(a)
        #phraseは上位フレーズのステミング済みリスト
        phrase_id += 1
        if phrase_id == limit_phrases:
            break
    sum_ranks = sum(unit_vector)
    unit_vector = [ rank/sum_ranks for rank in unit_vector ]
    sent_list = []
    sent_rank = {}
    sent_id = 0
    sum_vec = 0.0
    sent_box = ''
    for sent in doc.sents:
        a = str(sent)
        text_tokenize_list = word_tokenize(a.lower())
        text_box = ''
        pnum = []
        score = 0
        for text in text_tokenize_list:
            text_box += ' '
            text_box += wnl.lemmatize(text)
        for p in phrase:
            pnum.append(text_box.count(p))
            score += (text_box.count(p))*(unit_vector[phrase.index(p)])
        sent_rank[sent_id] = score
        sent_id += 1
        sent_list.append(pnum)
        #sent_listは各文のBoWベクトル
    sent_id = 0
    sent_text = {}
    sentence = []
    limit_sentences = 10
    for sent in doc.sents:
        sent_text[sent_id] = sent.text
        sent_id += 1
    num_sent = 0
    for sent_id, rank in sorted(sent_rank.items(), key=itemgetter(1), reverse = True):
        sentence.append(sent_text[sent_id])
        num_sent += 1
        if num_sent == limit_sentences:
            break
    return render_template("search.html", sentence = sentence, hits = hit_num, phrase_list = phrases)
        
@app.route('/nonchange', methods = ['POST', 'GET'])
def ten_phrases():
    if request.method == 'POST':
        csvfile = open('psr.csv', encoding='CP932', errors='ignore')
        a_list = []
        b_list = []
        for row in csv.reader(csvfile):
            a_list.append(row[1])
            b_list.append(row[2])
        del a_list[0]
        del b_list[0]
        texts = []
        reports = ' '.join(a_list)
        texts.extend(a_list)
        nlp = spacy.load('en_core_web_sm', disable = ['ner'])
        tr = pytextrank.TextRank()
        nlp.add_pipe(tr.PipelineComponent, name="textrank", last=True)
        doc = nlp(reports)
        wnl = WNL()
        limit_p = 15
        phrase_id = 0
        unit_vector = []
        phrases = []
        phrase = []
        for p in doc._.phrases:
            unit_vector.append(p.rank)
            phrases.append(p.text)
            word_tokenize_list = word_tokenize(p.text.lower())
            a = ''
            for lemma in word_tokenize_list:
                a += ' '
                a += wnl.lemmatize(lemma)
            phrase.append(a)
            #phraseは上位フレーズのステミング済みリスト
            phrase_id += 1
            if phrase_id == limit_p:
                break
        sum_ranks = sum(unit_vector)
        unit_vector = [ rank/sum_ranks for rank in unit_vector ]
        sent_list = []
        sent_rank = {}
        sent_id = 0
        sum_vec = 0.0
        sent_box = ''
        for sent in doc.sents:
            a = str(sent)
            text_tokenize_list = word_tokenize(a.lower())
            text_box = ''
            pnum = []
            score = 0
            for text in text_tokenize_list:
                text_box += ' '
                text_box += wnl.lemmatize(text)
            for p in phrase:
                pnum.append(text_box.count(p))
                score += (text_box.count(p))*(unit_vector[phrase.index(p)])
            sent_rank[sent_id] = score
            sent_id += 1
            sent_list.append(pnum)
            #sent_listは各文のBoWベクトル
        sent_id = 0
        sent_text = {}
        sentence = []
        limit_sentences = 10
        for sent in doc.sents:
            sent_text[sent_id] = sent.text
            sent_id += 1
        num_sent = 0
        for sent_id, rank in sorted(sent_rank.items(), key=itemgetter(1), reverse = True):
            sentence.append(sent_text[sent_id])
            num_sent += 1
            if num_sent == limit_sentences:
                break
        result = request.form.getlist('sentences')
        result = [int(s) for s in result]
        c_list = []
        for i in result:
            c_list.append(sentence[i])
        raw_text = ' '.join(c_list)
        doc = nlp(raw_text)
        sent_num = []
        num = 1
        limit_phrases = 10
        phrase_id = 0
        wnl = WNL()
        unit_vector = []
        phrases = []
        phrase = []
        text_rank = {}
        htmltext = []
        title_list = []
        for p in doc._.phrases:
            unit_vector.append(p.rank)
            phrases.append(p.text)
            word_tokenize_list = word_tokenize(p.text.lower())
            a = ''
            for lemma in word_tokenize_list:
                a += ' '
                a += wnl.lemmatize(lemma)
            phrase.append(a)
            phrase_id += 1
            if phrase_id == limit_phrases:
                break
        sum_ranks = sum(unit_vector)
        unit_vector = [ rank/sum_ranks for rank in unit_vector ]
        for i in range(len(texts)):
            doc = nlp(texts[i])
            sent_list = []
            sent_rank = {}
            sent_id = 0
            sum_vec = 0.0
            sent_box = ''
            for sent in doc.sents:
                a = str(sent)
                text_tokenize_list = word_tokenize(a.lower())
                text_box = ''
                pnum = []
                score = 0
                for text in text_tokenize_list:
                    text_box += ' '
                    text_box += wnl.lemmatize(text)
                for p in phrase:
                    pnum.append(text_box.count(p))
                sent_id += 1
                sent_list.append(pnum)
                if sum(pnum) == 0:
                    sent_box += str(sent) + ' '
                elif sum(pnum) == 1:
                    sent_box += '<span style="background: linear-gradient(transparent 90%, #ff9100 0%);">' + str(sent) + '</span>' + ' '
                elif sum(pnum) == 2:
                    sent_box += '<span style="background: linear-gradient(transparent 60%, #ff9100 0%);">' + str(sent) + '</span>' + ' '
                else:
                    sent_box += '<span style="background: linear-gradient(transparent 0%, #ff9100 0%);">' + str(sent) + '</span>' + ' '
                for p in phrase:
                    senty = re.compile(p, re.IGNORECASE)
                    sent_box = re.sub(senty, '<span style="font-weight: 600; color:red;">' + p + '</span>', sent_box)
                for p in phrases:
                    senty = re.compile(p, re.IGNORECASE)
                    sent_box = re.sub(senty, '<span style="font-weight: 600; color:red;">' + p + '</span>', sent_box)
            htmltext.append(sent_box)
            for s in sent_list:
                score = 0
                for phrase_id in range(len(unit_vector)):
                    score += s[phrase_id]*unit_vector[phrase_id]
                sent_rank[sent_id] = score
                sum_vec += score
                sent_id += 1
            text_rank[i] = sum_vec/len(sent_list)
        rank_list = sorted(text_rank.items(), key=itemgetter(1), reverse = True)
        print(rank_list)
        text_list =[]
        for l in rank_list:
            text_list.append(htmltext[l[0]])
            title_list.append(b_list[l[0]])
            if rank_list.index(l) == 9:
                break
        return render_template("nonchange.html", phrase_list = phrases, text_list = text_list, title = title_list)


if __name__ == "__main__":
    app.run(host='153.120.135.103', port = 80, debug=True)