Browse Source

fix: 爬虫成功率、准确率提升; 本地镜像构建加速

master 1.1.0
OhYee 2 years ago
parent
commit
26ca21247f
Signed by: OhYee
GPG Key ID: 5A9E1F63ED274FBB
  1. 25
      Dockerfile
  2. 21
      api/pkg/friends/main.go
  3. 101
      cron/spider.go
  4. 2
      cron/spider/const.go
  5. 96
      cron/spider/html.go
  6. 116
      cron/spider/request.go
  7. 65
      cron/spider/rss.go
  8. 114
      cron/spider/time.go
  9. 89
      cron/spider/time_test.go
  10. 2
      generate.bash
  11. 6
      go.mod
  12. 5
      go.sum
  13. 6
      main.go
  14. 2
      register/context_http.go
  15. 2
      register/context_websocket.go
  16. 2
      register/register.go

25
Dockerfile

@ -1,11 +1,20 @@ @@ -1,11 +1,20 @@
FROM golang:1.16.3 AS builder
# syntax=docker/dockerfile:experimental
COPY ./ /data/blotter
FROM golang:1.17.0 AS builder
WORKDIR /data/blotter
RUN go get
RUN go generate
# deps cache
COPY ./go.mod ./go.sum /data/blotter/
RUN go mod download -x
RUN go build all
# build code
# build with cache: https://github.com/golang/go/issues/27719
COPY ./ /data/blotter
RUN --mount=type=cache,target=/go/pkg/mod \
--mount=type=cache,target=/root/.cache/go-build \
go generate
# FROM golang:1.16.3 AS prod
FROM ubuntu AS prod
@ -20,7 +29,6 @@ RUN apt update && \ @@ -20,7 +29,6 @@ RUN apt update && \
python3 -m pip install matplotlib && \
rm -rf /var/lib/apt/lists/*
# Headless chrome from https://hub.docker.com/r/justinribeiro/chrome-headless/dockerfile/
RUN apt update && \
apt install -y \
@ -44,19 +52,18 @@ RUN apt update && \ @@ -44,19 +52,18 @@ RUN apt update && \
--no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
ENV mongoURI="mongodb:27017"
COPY --from=builder /data/blotter/blotter /data/blotter/blotter
# # gojieba 字典文件
# gojieba 字典文件
COPY --from=builder /go/pkg/mod/github.com/ttys3/gojieba@v1.1.3/dict/hmm_model.utf8 /go/pkg/mod/github.com/ttys3/gojieba@v1.1.3/dict/hmm_model.utf8
COPY --from=builder /go/pkg/mod/github.com/ttys3/gojieba@v1.1.3/dict/idf.utf8 /go/pkg/mod/github.com/ttys3/gojieba@v1.1.3/dict/idf.utf8
COPY --from=builder /go/pkg/mod/github.com/ttys3/gojieba@v1.1.3/dict/jieba.dict.utf8 /go/pkg/mod/github.com/ttys3/gojieba@v1.1.3/dict/jieba.dict.utf8
COPY --from=builder /go/pkg/mod/github.com/ttys3/gojieba@v1.1.3/dict/stop_words.utf8 /go/pkg/mod/github.com/ttys3/gojieba@v1.1.3/dict/stop_words.utf8
COPY --from=builder /go/pkg/mod/github.com/ttys3/gojieba@v1.1.3/dict/user.dict.utf8 /go/pkg/mod/github.com/ttys3/gojieba@v1.1.3/dict/user.dict.utf8
WORKDIR /data/blotter
ENV mongoURI="mongodb:27017"
ENTRYPOINT [ "./blotter", "-address", "0.0.0.0:50000" ]

21
api/pkg/friends/main.go

@ -41,18 +41,27 @@ func SetFriends(fs []Friend) (err error) { @@ -41,18 +41,27 @@ func SetFriends(fs []Friend) (err error) {
}
func SetFriendPosts(url string, posts []FriendPost) (err error) {
op := bson.M{
"$set": bson.M{
"posts": posts,
"error": false,
},
}
if len(posts) == 0 {
op = bson.M{
"$set": bson.M{
"error": true,
},
}
}
_, err = mongo.Update(
"blotter",
"friends",
bson.M{
"link": url,
},
bson.M{
"$set": bson.M{
"posts": posts,
"error": len(posts) == 0,
},
},
op,
nil,
)
return

101
cron/spider.go

@ -9,50 +9,75 @@ import ( @@ -9,50 +9,75 @@ import (
"github.com/OhYee/blotter/api/pkg/friends"
"github.com/OhYee/blotter/cron/spider"
"github.com/OhYee/blotter/output"
"github.com/OhYee/blotter/register"
)
func spiderSite(f friends.Friend, wg *sync.WaitGroup) {
if wg != nil {
defer wg.Done()
}
friendName := f.Name
friendURL := f.RSS
var posts []friends.FriendPost
retry := 0
for retry = 0; retry < 5; retry++ {
output.LogOutput.Println(time.Now().Format("2006-01-02 15:04:05"), "Spider", friendName, friendURL, "retry", retry)
if friendURL == "" ||
strings.Index(friendURL, "rss") != -1 ||
strings.Index(friendURL, "atom") != -1 ||
strings.Index(friendURL, "feed") != -1 ||
strings.Index(friendURL, "xml") != -1 {
posts = spider.ReadRSS(friendURL, retry)
} else {
posts = spider.ReadHTML(friendURL, retry)
}
if len(posts) != 0 {
break
}
}
friends.SetFriendPosts(
f.Link,
posts,
)
output.DebugOutput.Println(posts)
output.LogOutput.Println(time.Now().Format("2006-01-02 15:04:05"), "Spider", friendName, friendURL, "Finished", retry)
}
func Spider() {
output.LogOutput.Println(time.Now().Format("2006-01-02 15:04:05"), "Spider")
defer output.LogOutput.Println(time.Now().Format("2006-01-02 15:04:05"), "Spider", "Finished")
wg := &sync.WaitGroup{}
fs, _ := friends.GetFriends()
for _, f := range fs {
wg.Add(1)
go func(f friends.Friend) {
defer wg.Done()
friendName := f.Name
friendURL := f.RSS
var posts []friends.FriendPost
retry := 0
for retry = 0; retry < 5; retry++ {
output.LogOutput.Println(time.Now().Format("2006-01-02 15:04:05"), "Spider", friendName, friendURL, "retry", retry)
if friendURL == "" ||
strings.Index(friendURL, "rss") != -1 ||
strings.Index(friendURL, "atom") != -1 ||
strings.Index(friendURL, "feed") != -1 ||
strings.Index(friendURL, "xml") != -1 {
posts = spider.ReadRSS(friendURL)
} else {
posts = spider.ReadHTML(friendURL)
}
if len(posts) != 0 {
break
}
}
friends.SetFriendPosts(
f.Link,
posts,
)
output.LogOutput.Println(time.Now().Format("2006-01-02 15:04:05"), "Spider", friendName, friendURL, "Finished", retry)
}(f)
spiderURLContext, ok := register.GetContext("spiderURL")
spiderURL := ""
if ok {
switch spiderURLContext.(type) {
case string:
spiderURL = spiderURLContext.(string)
}
}
if spiderURL == "" {
wg := &sync.WaitGroup{}
fs, _ := friends.GetFriends()
for _, f := range fs {
if f.RSS == "" {
continue
}
wg.Add(1)
go spiderSite(f, wg)
}
wg.Wait()
} else {
spiderSite(friends.Friend{
Simple: friends.Simple{
Name: "Test",
},
RSS: spiderURL,
}, nil)
}
wg.Wait()
}

2
cron/spider/const.go

@ -4,5 +4,5 @@ import "time" @@ -4,5 +4,5 @@ import "time"
const (
UserAgent = "OhYee-Spider"
Timeout = 30 * time.Second
Timeout = 120 * time.Second
)

96
cron/spider/html.go

@ -2,15 +2,10 @@ package spider @@ -2,15 +2,10 @@ package spider
import (
"bytes"
"context"
"fmt"
"os"
"strings"
"time"
"github.com/OhYee/blotter/api/pkg/friends"
"github.com/OhYee/blotter/output"
"github.com/chromedp/chromedp"
"golang.org/x/net/html"
"net/url"
@ -109,69 +104,6 @@ func CheckPost(v []*html.Node) bool { @@ -109,69 +104,6 @@ func CheckPost(v []*html.Node) bool {
return true
}
func getChromePath() string {
env := os.Environ()
for _, s := range env {
ss := strings.Split(s, "=")
if len(ss) >= 2 {
key := ss[0]
value := strings.Join(ss[1:], "")
if strings.ToUpper(key) == "CHROME_PATH" {
return value
}
}
}
return ""
}
func GetHTML(u string) string {
opts := []func(*chromedp.ExecAllocator){
chromedp.Flag("headless", true),
chromedp.Flag("blink-settings", "imagesEnabled=false"),
chromedp.UserAgent(UserAgent),
chromedp.Flag("ignore-certificate-errors", true),
}
chromePath := getChromePath()
if len(chromePath) > 0 {
opts = append(
opts,
chromedp.ExecPath(
fmt.Sprintf(
"%s/%s",
strings.TrimRight(chromePath, "/"),
"chrome",
),
),
)
}
ctx, cancel := chromedp.NewExecAllocator(
context.Background(),
opts...,
)
defer cancel()
ctx2, cancel2 := chromedp.NewContext(
ctx,
)
defer cancel2()
ctx3, cancel3 := context.WithTimeout(ctx2, Timeout)
defer cancel3()
var res string
err := chromedp.Run(
ctx3,
chromedp.Navigate(u),
chromedp.OuterHTML("html", &res, chromedp.ByQuery),
)
if err != nil {
output.ErrOutput.Println(u, err)
}
return res
}
func elementFindTime(node *html.Node) *time.Time {
t := node
parentCount := 0
@ -190,11 +122,18 @@ func elementFindTime(node *html.Node) *time.Time { @@ -190,11 +122,18 @@ func elementFindTime(node *html.Node) *time.Time {
return nil
}
func ReadHTML(u string) []friends.FriendPost {
func ReadHTML(u string, retry int) []friends.FriendPost {
hostURL, _ := url.Parse(u)
c := GetHTML(u)
doc, _ := html.Parse(bytes.NewBufferString(c))
content := ""
if retry%2 == 0 {
content = getHTML(u)
} else {
content = getHTMLWithJS(u)
}
// output.DebugOutput.Println(c)
doc, _ := html.Parse(bytes.NewBufferString(content))
m := make(map[string][]*html.Node)
dfs(doc, []string{}, m)
@ -217,11 +156,24 @@ func ReadHTML(u string) []friends.FriendPost { @@ -217,11 +156,24 @@ func ReadHTML(u string) []friends.FriendPost {
}
titles := elementInnterText(item)
// output.DebugOutput.Println(titles)
if len(titles) <= 0 {
continue
}
title := ""
for _, t := range titles {
if parseTime(t) == nil {
title = t
}
}
if title == "" {
title = titles[0]
}
posts = append(posts, friends.FriendPost{
Title: titles[0],
Title: title,
Link: u.String(),
Time: toUnix(elementFindTime(item)),
})

116
cron/spider/request.go

@ -0,0 +1,116 @@ @@ -0,0 +1,116 @@
package spider
import (
"context"
"crypto/tls"
"fmt"
"io/ioutil"
"net/http"
"os"
"strings"
"github.com/OhYee/blotter/output"
"github.com/chromedp/chromedp"
)
var client = &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
DisableCompression: false,
},
Timeout: Timeout,
}
func getHTML(u string) (content string) {
req, err := http.NewRequest("GET", u, nil)
if err != nil {
output.ErrOutput.Println(u, err)
return
}
req.Header.Set("User-Agent", UserAgent)
req.Header.Add("Accept-Encoding", "identity")
// https://dpjeep.com/2019/06/10/golangzhi-http-eofxiang-jie/
req.Close = true
resp, err := client.Do(req)
if err != nil {
output.ErrOutput.Println(u, err)
return
}
defer resp.Body.Close()
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
output.ErrOutput.Println(u, err)
return
}
content = string(b)
return
}
func getChromePath() string {
env := os.Environ()
for _, s := range env {
ss := strings.Split(s, "=")
if len(ss) >= 2 {
key := ss[0]
value := strings.Join(ss[1:], "")
if strings.ToUpper(key) == "CHROME_PATH" {
return value
}
}
}
return ""
}
func getHTMLWithJS(u string) string {
opts := []func(*chromedp.ExecAllocator){
chromedp.Flag("headless", true),
chromedp.Flag("blink-settings", "imagesEnabled=false"),
chromedp.Flag("ignore-certificate-errors", true),
chromedp.Flag("no-sandbox", true),
chromedp.Flag("disable-gpu", true),
chromedp.Flag("default-browser-check ", true),
chromedp.UserAgent(UserAgent),
}
chromePath := getChromePath()
if len(chromePath) > 0 {
opts = append(
opts,
chromedp.ExecPath(
fmt.Sprintf(
"%s/%s",
strings.TrimRight(chromePath, "/"),
"chrome",
),
),
)
}
ctx, cancel := chromedp.NewExecAllocator(
context.Background(),
opts...,
)
defer cancel()
ctx2, cancel2 := chromedp.NewContext(
ctx,
)
defer cancel2()
ctx3, cancel3 := context.WithTimeout(ctx2, Timeout)
defer cancel3()
var res string
err := chromedp.Run(
ctx3,
chromedp.Navigate(u),
chromedp.OuterHTML("html", &res, chromedp.ByQuery),
)
if err != nil {
output.ErrOutput.Println(u, err)
}
return res
}

65
cron/spider/rss.go

@ -1,49 +1,62 @@ @@ -1,49 +1,62 @@
package spider
import (
"crypto/tls"
"io/ioutil"
"net/http"
"bytes"
"github.com/OhYee/blotter/api/pkg/friends"
"github.com/OhYee/blotter/output"
"github.com/mmcdole/gofeed"
"golang.org/x/net/html"
)
func ReadRSS(u string) (posts []friends.FriendPost) {
// output.DebugOutput.Println(u)
posts = make([]friends.FriendPost, 0, 5)
client := &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
Timeout: Timeout,
func findTitle(n *html.Node, arr []*html.Node) {
t := n
for t != nil {
if t.Type == html.ElementNode && t.Data == "title" {
arr = append(arr, t)
}
findTitle(t.FirstChild, arr)
t = t.NextSibling
}
req, err := http.NewRequest("GET", u, nil)
}
func htmlParser(content string) (feed *gofeed.Feed, err error) {
root, err := html.Parse(bytes.NewReader([]byte(content)))
if err != nil {
output.ErrOutput.Println(u, err)
return
}
titles := make([]*html.Node, 0)
findTitle(root, titles)
req.Header.Set("User-Agent", UserAgent)
resp, err := client.Do(req)
if err != nil {
output.ErrOutput.Println(u, err)
return
feed = &gofeed.Feed{}
feed.Items = make([]*gofeed.Item, len(titles))
for i, title := range titles {
feed.Items[i] = &gofeed.Item{
Title: title.Data,
Link: "",
Updated: "",
Published: "",
}
}
return
}
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
output.ErrOutput.Println(u, err)
func ReadRSS(u string, retry int) (posts []friends.FriendPost) {
// output.DebugOutput.Println(u)
posts = make([]friends.FriendPost, 0, 5)
content := ""
if retry%2 == 0 {
content = getHTML(u)
} else {
content = getHTMLWithJS(u)
}
if len(content) == 0 {
return
}
// output.DebugOutput.Println(string(b))
fp := gofeed.NewParser()
feed, err := fp.ParseString(string(b))
feed, err := fp.ParseString(content)
if err != nil {
output.ErrOutput.Println(u, err)
return

114
cron/spider/time.go

@ -2,52 +2,35 @@ package spider @@ -2,52 +2,35 @@ package spider
import (
"regexp"
"strconv"
"strings"
"time"
utime "github.com/OhYee/goutils/time"
)
type timeFinder struct {
Regexp *regexp.Regexp
TimeFormat string
Regexp *regexp.Regexp
TimeFormat string
TimeFormatFunc func(string) *time.Time
}
var timeFinders = []timeFinder{
{
Regexp: regexp.MustCompile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}"),
TimeFormat: "2006-01-02 15:04:05",
Regexp: regexp.MustCompile("\\d{2,4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}"),
TimeFormatFunc: func(s string) *time.Time { return splitDateTime(s, "-", ":", " ") },
},
{
Regexp: regexp.MustCompile("\\d{4}-\\d{1,2}-\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}"),
TimeFormat: "2006-1-2 15:4:5",
Regexp: regexp.MustCompile("\\d{2,4}/\\d{1,2}/\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}"),
TimeFormatFunc: func(s string) *time.Time { return splitDateTime(s, "/", ":", " ") },
},
{
Regexp: regexp.MustCompile("\\d{4}/\\d{2}/\\d{2} \\d{2}:\\d{2}:\\d{2}"),
TimeFormat: "2006/01/02 15:04:05",
Regexp: regexp.MustCompile("\\d{2,4}-\\d{1,2}-\\d{1,2}"),
TimeFormatFunc: func(s string) *time.Time { return splitDate(s, "-") },
},
{
Regexp: regexp.MustCompile("\\d{4}/\\d{1,2}/\\d{1,2} \\d{1,2}:\\d{1,2}:\\d{1,2}"),
TimeFormat: "2006/1/2 15:4:5",
},
{
Regexp: regexp.MustCompile("\\d{4}-\\d{2}-\\d{2}"),
TimeFormat: "2006-01-02",
},
{
Regexp: regexp.MustCompile("\\d{4}-\\d{1,2}-\\d{1,2}"),
TimeFormat: "2006-1-2",
},
{
Regexp: regexp.MustCompile("\\d{4}/\\d{2}/\\d{2}"),
TimeFormat: "2006/01/02",
},
{
Regexp: regexp.MustCompile("\\d{4}/\\d{1,2}/\\d{1,2}"),
TimeFormat: "2006/1/2",
},
{
Regexp: regexp.MustCompile("\\d{2}/\\d{1,2}/\\d{1,2}"),
TimeFormat: "06/1/2",
Regexp: regexp.MustCompile("\\d{2,4}/\\d{1,2}/\\d{1,2}"),
TimeFormatFunc: func(s string) *time.Time { return splitDate(s, "/") },
},
{
Regexp: regexp.MustCompile("[a-zA-Z]{2,4} \\d{2}, \\d{4}"),
@ -63,10 +46,14 @@ func parseTime(s string) *time.Time { @@ -63,10 +46,14 @@ func parseTime(s string) *time.Time {
for _, r := range timeFinders {
result := r.Regexp.FindAllString(s, -1)
for _, timeString := range result {
t, err := time.ParseInLocation(r.TimeFormat, timeString, utime.ChinaTimeZone)
// output.DebugOutput.Println(timeString, err)
if err == nil {
return &t
if r.TimeFormatFunc == nil {
if t, err := time.ParseInLocation(r.TimeFormat, timeString, utime.ChinaTimeZone); err == nil {
return &t
}
} else {
if t := r.TimeFormatFunc(timeString); t != nil {
return t
}
}
}
}
@ -79,3 +66,62 @@ func toUnix(t *time.Time) int64 { @@ -79,3 +66,62 @@ func toUnix(t *time.Time) int64 {
}
return t.Unix()
}
func toInt(s string) int {
n, err := strconv.Atoi(s)
if err != nil {
return 0
}
return n
}
func splitInts(s string, char string) (int, int, int) {
slice := strings.Split(s, char)
if len(slice) != 3 {
return 0, 0, 0
}
return toInt(slice[0]), toInt(slice[1]), toInt(slice[2])
}
func splitDate(s string, char string) *time.Time {
slice := strings.Split(s, char)
if len(slice) != 3 {
return nil
}
year, month, day := splitInts(s, char)
if year == 0 || month < 1 || month > 12 || day < 1 || day > 31 {
return nil
}
if year < 100 {
year += 2000
}
t := time.Date(year, time.Month(month), day, 0, 0, 0, 0, utime.ChinaTimeZone)
return &t
}
func splitDateTime(s string, char string, char2 string, char3 string) *time.Time {
slice := strings.Split(s, char)
if len(slice) != 3 {
return nil
}
dateSlice := strings.Split(s, char3)
if len(dateSlice) != 2 {
return nil
}
year, month, day := splitInts(dateSlice[0], char)
hour, minute, second := splitInts(dateSlice[1], char2)
if year == 0 || month < 1 || month > 12 || day < 1 || day > 31 ||
hour < 0 || hour > 23 || minute < 0 || minute > 59 || second < 0 || second > 59 {
return nil
}
if year < 100 {
year += 2000
}
t := time.Date(year, time.Month(month), day, hour, minute, second, 0, utime.ChinaTimeZone)
return &t
}

89
cron/spider/time_test.go

@ -0,0 +1,89 @@ @@ -0,0 +1,89 @@
package spider
import (
"testing"
)
func Test_parseTime(t *testing.T) {
tests := []struct {
s string
want int64
}{
{
s: "2021-09-05",
want: 1630771200,
},
{
s: "2021-9-5",
want: 1630771200,
},
{
s: "2021-09-5",
want: 1630771200,
},
{
s: "2021-9-05",
want: 1630771200,
},
{
s: "2021/09/05",
want: 1630771200,
},
{
s: "2021/9/5",
want: 1630771200,
},
{
s: "2021/09/5",
want: 1630771200,
},
{
s: "2021/9/05",
want: 1630771200,
},
{
s: "2021-09-05 17:30:25",
want: 1630834225,
},
{
s: "21-09-05 17:30:25",
want: 1630834225,
},
{
s: "2021-09-05 25:30:25",
want: 1630771200, // 只处理日期部分
},
{
s: "2021-13-05 25:30:25",
want: 0,
},
{
s: "21-09-05 17:30:25",
want: 1630834225,
},
{
s: "time is: 21-09-05",
want: 1630771200,
},
{
s: "0000-00-00",
want: 0,
},
{
s: "21|09|05",
want: 0,
},
}
for _, tt := range tests {
t.Run(tt.s, func(t *testing.T) {
got := parseTime(tt.s)
ts := int64(0)
if got != nil {
ts = got.Unix()
}
if tt.want != ts {
t.Errorf("parseTime() = %v, want %v", ts, tt.want)
}
})
}
}

2
generate.bash

@ -9,6 +9,6 @@ _blotter_version="${_branch}${_version} (${_date})" @@ -9,6 +9,6 @@ _blotter_version="${_branch}${_version} (${_date})"
echo ${_blotter_version}
CGO_ENABLED=1 go build -ldflags "-X 'main._version=${_blotter_version}' -extldflags '-static -s -w -fpic'"
CGO_ENABLED=1 go build -v -ldflags "-X 'main._version=${_blotter_version}' -extldflags '-static -s -w -fpic'"
unset _date _branch _version _blotter_version

6
go.mod

@ -16,7 +16,7 @@ require ( @@ -16,7 +16,7 @@ require (
github.com/OhYee/rainbow v1.0.6
github.com/alecthomas/chroma v0.8.2 // indirect
github.com/aws/aws-sdk-go v1.36.1 // indirect
github.com/chromedp/chromedp v0.7.4 // indirect
github.com/chromedp/chromedp v0.7.4
github.com/dlclark/regexp2 v1.4.0 // indirect
github.com/golang/protobuf v1.4.3 // indirect
github.com/golang/snappy v0.0.2 // indirect
@ -25,7 +25,7 @@ require ( @@ -25,7 +25,7 @@ require (
github.com/graemephi/goldmark-qjs-katex v0.3.0
github.com/klauspost/compress v1.11.3 // indirect
github.com/mitchellh/mapstructure v1.4.0
github.com/mmcdole/gofeed v1.1.3 // indirect
github.com/mmcdole/gofeed v1.1.3
github.com/qiniu/api.v7/v7 v7.7.0
github.com/refraction-networking/utls v0.0.0-20201112193908-f7e7360167ed // indirect
github.com/robfig/cron v1.2.0
@ -36,7 +36,7 @@ require ( @@ -36,7 +36,7 @@ require (
go.mongodb.org/mongo-driver v1.4.4
go.starlark.net v0.0.0-20201202183632-a7839910dcbe // indirect
golang.org/x/crypto v0.0.0-20201203163018-be400aefbc4c // indirect
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb // indirect
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 // indirect
golang.org/x/text v0.3.4 // indirect
google.golang.org/genproto v0.0.0-20201203001206-6486ece9c497 // indirect

5
go.sum

@ -57,11 +57,9 @@ github.com/aws/aws-sdk-go v1.34.28/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/ @@ -57,11 +57,9 @@ github.com/aws/aws-sdk-go v1.34.28/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/
github.com/aws/aws-sdk-go v1.36.1 h1:rDgSL20giXXu48Ycx6Qa4vWaNTVTltUl6vA73ObCSVk=
github.com/aws/aws-sdk-go v1.36.1/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/chromedp/cdproto v0.0.0-20210323015217-0942afbea50e h1:UimnzLuARNkGi2XsNznUoOLFP/noktdUMrr7fcb3D4U=
github.com/chromedp/cdproto v0.0.0-20210323015217-0942afbea50e/go.mod h1:At5TxYYdxkbQL0TSefRjhLE3Q0lgvqKKMSFUglJ7i1U=
github.com/chromedp/cdproto v0.0.0-20210713064928-7d28b402946a h1:B6EyBXuMsFyrUoBrNXdt+Vf3vQNpN4DU/Xv96R4BdFg=
github.com/chromedp/cdproto v0.0.0-20210713064928-7d28b402946a/go.mod h1:At5TxYYdxkbQL0TSefRjhLE3Q0lgvqKKMSFUglJ7i1U=
github.com/chromedp/chromedp v0.6.10 h1:Yd4X6ngkWbn6A+hv6mUzV9kVHrPn7L4+vf2uyNbze2s=
github.com/chromedp/chromedp v0.6.10/go.mod h1:Q8L2uDLH9YFYbThK5fqPpyWa3CT4y9dqHLxaQr+Yhl8=
github.com/chromedp/chromedp v0.7.4 h1:U+0d3WbB/Oj4mDuBOI0P7S3PJEued5UZIl5AJ3QulwU=
github.com/chromedp/chromedp v0.7.4/go.mod h1:dBj+SXuQHznp6ZPwZeDDEBZKwclUwDLbZ0hjMialMYs=
@ -119,7 +117,6 @@ github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU @@ -119,7 +117,6 @@ github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.0.4 h1:5eXU1CZhpQdq5kXbKb+sECH5Ia5KiO6CYzIzdlVx6Bs=
github.com/gobwas/ws v1.0.4/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM=
github.com/gobwas/ws v1.1.0 h1:7RFti/xnNkMJnrK7D1yQ/iCIB5OrrY/54/H930kIbHA=
github.com/gobwas/ws v1.1.0/go.mod h1:nzvNcVha5eUziGrbxFCo6qFIojQHjJV5cLYIbezhfL0=
@ -209,6 +206,7 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLD @@ -209,6 +206,7 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLD
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
github.com/nkovacs/streamquote v0.0.0-20170412213628-49af9bddb229/go.mod h1:0aYXnNPJ8l7uZxf45rWW1a/uME32OF0rhiYGNQ2oF2E=
github.com/orisano/pixelmatch v0.0.0-20210112091706-4fa4c7ba91d5 h1:1SoBaSPudixRecmlHXb/GxmaD3fLMtHIDN13QujwQuc=
github.com/orisano/pixelmatch v0.0.0-20210112091706-4fa4c7ba91d5/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
github.com/pelletier/go-toml v1.7.0/go.mod h1:vwGMzjaWMwyfHwgIBhI2YUM4fB6nL6lVAvS1LBMMhTE=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@ -320,7 +318,6 @@ golang.org/x/sys v0.0.0-20200413165638-669c56c373c4/go.mod h1:h1NjWce9XRLGQEsW7w @@ -320,7 +318,6 @@ golang.org/x/sys v0.0.0-20200413165638-669c56c373c4/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201207223542-d4d67f95c62d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4 h1:EZ2mChiOa8udjfp6rRmswTbtZN/QzUQp4ptM4rnjHvc=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210525143221-35b2ab0089ea h1:+WiDlPBBaO+h9vPNZi8uJ3k4BkKQB7Iow3aqwHVA5hI=
golang.org/x/sys v0.0.0-20210525143221-35b2ab0089ea/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

6
main.go

@ -17,6 +17,7 @@ var ( @@ -17,6 +17,7 @@ var (
addr = "127.0.0.1:50000"
prefix = "/api/"
tool = ""
url = ""
)
var (
@ -28,6 +29,7 @@ var ( @@ -28,6 +29,7 @@ var (
func parseFlags() {
flag.StringVar(&addr, "address", "127.0.0.1:50000", "listen address")
flag.StringVar(&prefix, "prefix", "/api/", "api url prefix")
flag.StringVar(&url, "url", "", "spider url")
keys := make([]string, len(cron.CronMap))
pos := 0
@ -42,6 +44,9 @@ func parseFlags() { @@ -42,6 +44,9 @@ func parseFlags() {
func main() {
parseFlags()
register.SetContext("version", _version)
register.SetContext("spiderURL", url)
if tool != "" {
f, e := cron.CronMap[tool]
if !e {
@ -52,7 +57,6 @@ func main() { @@ -52,7 +57,6 @@ func main() {
os.Exit(0)
}
register.SetContext("version", _version)
cron.Start()
defer cron.Stop()

2
register/context_http.go

@ -203,5 +203,5 @@ func (context *HTTPContext) GetUser() *user.TypeDB { @@ -203,5 +203,5 @@ func (context *HTTPContext) GetUser() *user.TypeDB {
// GetContext get global context
func (context *HTTPContext) GetContext(key string) (value interface{}, ok bool) {
return getContext(key)
return GetContext(key)
}

2
register/context_websocket.go

@ -122,5 +122,5 @@ func (context *WebSocketContext) GetUser() *user.TypeDB { @@ -122,5 +122,5 @@ func (context *WebSocketContext) GetUser() *user.TypeDB {
// GetContext get global context
func (context *WebSocketContext) GetContext(key string) (value interface{}, ok bool) {
return getContext(key)
return GetContext(key)
}

2
register/register.go

@ -14,7 +14,7 @@ var ( @@ -14,7 +14,7 @@ var (
ctxMutex = new(sync.RWMutex)
)
func getContext(key string) (value interface{}, ok bool) {
func GetContext(key string) (value interface{}, ok bool) {
ctxMutex.RLock()
defer ctxMutex.RUnlock()
value, ok = ctx[key]

Loading…
Cancel
Save