package fetcher

import (
	"bytes"
	"encoding/json"
	"encoding/xml"
	"fmt"
	"path/filepath"
	"strings"
	"time"

	"github.com/cheggaaa/pb"
	"github.com/gocarina/gocsv"
	"github.com/inconshreveable/log15"
	"github.com/mozqnet/go-exploitdb/extractor"
	"github.com/mozqnet/go-exploitdb/models"
	"github.com/mozqnet/go-exploitdb/util"
	"github.com/pkg/errors"
	"golang.org/x/net/html/charset"
)

// FetchExploitDB :
func FetchExploitDB(deep bool) (exploits []models.Exploit, err error) {
	var exploitCvesMap map[string][]string
	if exploitCvesMap, err = FetchExploitCvesMap(deep); err != nil {
		return nil, err
	}
	var exploitShellCodeMap map[string]*models.ShellCode
	if exploitShellCodeMap, err = FetchExploitShellCodeMap(); err != nil {
		return nil, err
	}
	var exploitPaperMap map[string]*models.Paper
	if exploitPaperMap, err = FetchExploitPaperMap(); err != nil {
		return nil, err
	}

	var exploitDocMap map[string]*models.Document
	if exploitDocMap, err = FetchExploitDocumentMap(); err != nil {
		return nil, err
	}

	// append exploit db ids
	uniqExploitDBIDs := map[string]struct{}{}
	for id := range exploitCvesMap {
		uniqExploitDBIDs[id] = struct{}{}
	}
	for id := range exploitShellCodeMap {
		uniqExploitDBIDs[id] = struct{}{}
	}
	for id := range exploitPaperMap {
		uniqExploitDBIDs[id] = struct{}{}
	}
	for id := range exploitDocMap {
		uniqExploitDBIDs[id] = struct{}{}
	}

	for id := range uniqExploitDBIDs {
		cveIDs, ok := exploitCvesMap[id]
		if ok {
			for _, cveID := range cveIDs {
				var description string
				if e, ok := exploitPaperMap[id]; ok {
					description = e.Description
				}
				if e, ok := exploitShellCodeMap[id]; ok {
					description = e.Description
				}
				if e, ok := exploitDocMap[id]; ok {
					description = e.Description
				}
				if len(description) == 0 {
					continue
				}
				exploit := models.Exploit{
					ExploitUniqueID: id,
					ExploitType:     models.OffensiveSecurityType,
					URL:             "https://www.exploit-db.com/exploits/" + id,
					CveID:           cveID,
					Description:     description,
					OffensiveSecurity: &models.OffensiveSecurity{
						ExploitUniqueID: id,
						Document:        exploitDocMap[id],
						ShellCode:       exploitShellCodeMap[id],
						Paper:           exploitPaperMap[id],
					},
				}
				exploits = append(exploits, exploit)
			}
		} else {
			// No CveID
			var description string
			if e, ok := exploitPaperMap[id]; ok {
				description = e.Description
			}
			if e, ok := exploitShellCodeMap[id]; ok {
				description = e.Description
			}
			if e, ok := exploitDocMap[id]; ok {
				description = e.Description
			}
			if len(description) == 0 {
				continue
			}
			exploit := models.Exploit{
				ExploitUniqueID: id,
				ExploitType:     models.OffensiveSecurityType,
				URL:             "https://www.exploit-db.com/exploits/" + id,
				Description:     description,
				OffensiveSecurity: &models.OffensiveSecurity{
					Document:  exploitDocMap[id],
					ShellCode: exploitShellCodeMap[id],
					Paper:     exploitPaperMap[id],
				},
			}
			exploits = append(exploits, exploit)
		}
	}
	return exploits, nil
}

// FetchExploitCvesMap :
func FetchExploitCvesMap(deep bool) (exploitCveMap map[string][]string, err error) {
	exploitCveMap = map[string][]string{}
	for year := 1999; year <= time.Now().Year(); year++ {
		url := fmt.Sprintf("http://cve.mitre.org/data/downloads/allitems-cvrf-year-%d.xml", year)
		log15.Info("Fetching", "URL", url)
		cveXML, err := util.FetchURL(url)
		if err != nil {
			return nil,
				errors.Wrapf(err, "Failed to fetch cve data from Mitre. targetURL: %s", url)
		}

		var mitreCve models.MitreXML
		// https://stackoverflow.com/questions/6002619/unmarshal-an-iso-8859-1-xml-input-in-go
		decoder := xml.NewDecoder(bytes.NewReader(cveXML))
		decoder.CharsetReader = charset.NewReaderLabel
		if err = decoder.Decode(&mitreCve); err != nil {
			return nil, errors.Wrap(err, "Failed to Unmarshal XML")
		}
		for _, vuln := range mitreCve.Vulnerability {
			for _, ref := range vuln.References {
				desc := strings.Split(ref.Description, ":")
				if len(desc) != 2 {
					continue
				}
				refType, exploitID := desc[0], desc[1]
				// https://cve.mitre.org/data/refs/index.html
				if refType != "EXPLOIT-DB" {
					continue
				}
				exploitCveMap[exploitID] = append(exploitCveMap[exploitID], vuln.CVE)
			}
		}
	}

	if !deep {
		return exploitCveMap, nil
	}

	// https://github.com/offensive-security/exploitdb/search?q=CVE&unscoped_q=CVE
	// over 500 count
	heavyExts := []string{
		"txt",
		"rb",
		"py",
	}
	// under 500 count
	lightExts := []string{
		"c",
		"html",
		"pl",
		"pm",
		"sh",
		"md",
		"php",
		"cpp",
		"java",
		"go",
		"cs",
		"nse",
		"asm",
		"sql",
	}

	for year := 1999; year <= time.Now().Year(); year++ {
		log15.Info("Fetching Github Sources", "year", year)
		if exploitCveMap, err = SearchGithubCodeWithExt(heavyExts, year, exploitCveMap); err != nil {
			return nil, err
		}
	}
	if exploitCveMap, err = SearchGithubCodeWithExt(lightExts, 0, exploitCveMap); err != nil {
		return nil, err
	}
	return exploitCveMap, nil
}

// SearchGithubCodeWithExt :
func SearchGithubCodeWithExt(exts []string, year int, exploitCveMap map[string][]string) (newExploitCveMap map[string][]string, err error) {
	if exploitCveMap == nil {
		exploitCveMap = map[string][]string{}
	}
	for _, ext := range exts {
		page := 1
		// max request of search code is 1000
		maxPage := 10
		for {
			if maxPage < page {
				break
			}
			// https://developer.github.com/v3/search/#search-code
			var url string
			if year == 0 {
				url = fmt.Sprintf("https://api.github.com/search/code?q=CVE+extension:%s+repo:offensive-security/exploitdb&page=%d&per_page=100&sort=indexed&order=desc", ext, page)
			} else {
				spaceCode := "%20"
				url = fmt.Sprintf("https://api.github.com/search/code?q=CVE%s%d+extension:%s+repo:offensive-security/exploitdb&page=%d&per_page=100&sort=indexed&order=desc", spaceCode, year, ext, page)
			}
			log15.Info("Fetching", "URL", url)
			githubJSON, err := util.FetchURL(url)
			if err != nil {
				log15.Warn("Failed to fetch cve data from Github.", "url", url)
				break
			}
			var github models.GithubJSON
			if err = json.Unmarshal(githubJSON, &github); err != nil {
				return nil, err
			}
			bar := pb.StartNew(len(github.Items))
			// for github rate limit
			if 1000 < len(github.Items) {
				log15.Warn("More than 1000 data can not be acquired due to rate limit of github")
			}
			if len(github.Items) < 30 {
				log15.Info("Sleep 10 seconds for github rate limit")
				time.Sleep(10 * time.Second)
			}
			for _, item := range github.Items {
				path := "https://raw.githubusercontent.com/offensive-security/exploitdb/master/" + item.Path
				exploitDBID := strings.TrimSuffix(filepath.Base(path), filepath.Ext(path))
				if _, ok := exploitCveMap[exploitDBID]; !ok {
					doc, err := util.FetchURL(path)
					if err != nil {
						return nil, err
					}
					exploitCveMap[exploitDBID] = extractor.ExtractCveID(doc)
				}
				bar.Increment()
			}
			bar.Finish()
			if page == 1 {
				totalPageSize := (github.TotalCount / 100) + 1
				if totalPageSize < maxPage {
					maxPage = totalPageSize
				}
			}
			page++
		}
	}
	return exploitCveMap, nil
}

// FetchExploitShellCodeMap :
func FetchExploitShellCodeMap() (exploitShellCodeMap map[string]*models.ShellCode, err error) {
	exploitShellCodeMap = map[string]*models.ShellCode{}
	url := "https://raw.githubusercontent.com/offensive-security/exploitdb/master/files_shellcodes.csv"
	log15.Info("Fetching", "URL", url)
	cveCsv, err := util.FetchURL(url)
	shellCodes := []*models.ShellCode{}
	if err := gocsv.UnmarshalBytes(cveCsv, &shellCodes); err != nil {
		return nil, err
	}

	for _, shellCode := range shellCodes {
		shellCode.ShellCodeURL = "https://github.com/offensive-security/exploitdb/" + shellCode.ShellCodeURL
		exploitShellCodeMap[shellCode.ExploitUniqueID] = shellCode
	}
	return exploitShellCodeMap, nil
}

// FetchExploitPaperMap :
func FetchExploitPaperMap() (exploitPaperMap map[string]*models.Paper, err error) {
	exploitPaperMap = map[string]*models.Paper{}
	url := "https://raw.githubusercontent.com/offensive-security/exploitdb-papers/master/files_papers.csv"
	log15.Info("Fetching", "URL", url)
	cveCsv, err := util.FetchURL(url)
	papers := []*models.Paper{}
	if err := gocsv.UnmarshalBytes(cveCsv, &papers); err != nil {
		return nil, err
	}

	for _, paper := range papers {
		paper.PaperURL = "https://github.com/offensive-security/exploitdb-papers/" + paper.PaperURL
		exploitPaperMap[paper.ExploitUniqueID] = paper
	}
	return exploitPaperMap, nil
}

// FetchExploitDocumentMap :
func FetchExploitDocumentMap() (exploitDocMap map[string]*models.Document, err error) {
	exploitDocMap = map[string]*models.Document{}
	url := "https://raw.githubusercontent.com/offensive-security/exploitdb/master/files_exploits.csv"
	log15.Info("Fetching", "URL", url)
	cveCsv, err := util.FetchURL(url)
	docs := []*models.Document{}
	if err := gocsv.UnmarshalBytes(cveCsv, &docs); err != nil {
		return nil, err
	}

	for _, doc := range docs {
		doc.DocumentURL = "https://github.com/offensive-security/exploitdb/" + doc.DocumentURL
		exploitDocMap[doc.ExploitUniqueID] = doc
	}
	return exploitDocMap, nil
}
