Browse Source

spider for summerfire.cn

master
OhYee 2 years ago
parent
commit
9a28548fb5
Signed by: OhYee
GPG Key ID: 5A9E1F63ED274FBB
  1. 2
      tools/spider/sites/__init__.py
  2. 38
      tools/spider/sites/summerfire.py

2
tools/spider/sites/__init__.py

@ -17,6 +17,7 @@ from .lylblog import Lylblog @@ -17,6 +17,7 @@ from .lylblog import Lylblog
from .icskkk import ICSKKK
from .wanglingyue import Wanglingyue
from .yvonnecheung import Yvonnecheung
from .summerfire import SummerFire
sites = [
Taifua(),
@ -36,6 +37,7 @@ sites = [ @@ -36,6 +37,7 @@ sites = [
ICSKKK(),
Wanglingyue(),
Yvonnecheung(),
SummerFire(),
Lylblog(),
RSS(),

38
tools/spider/sites/summerfire.py

@ -0,0 +1,38 @@ @@ -0,0 +1,38 @@
from bs4 import BeautifulSoup
import datetime
if __name__ == "__main__":
from utils import *
else:
from .utils import *
class SummerFire(Site):
def __init__(self):
super(Site, self)
def matcher(self, url: str):
return "summerfire.cn" in url
def solver(self, url: str):
res = get(url)
soup = BeautifulSoup(res, features="lxml")
posts = []
for item in soup.select("article.summary"):
title = item.select_one("h1.single-title")
y, m, d = map(int, item.select_one(
"time").get_text().split("-"))
posts.append(
Post(
title.get_text(),
"%s/%s" % (url.strip("/"),
title.select_one("a").get("href").strip("/")),
datetime.datetime(y, m, d).timestamp(),
))
return posts
if __name__ == '__main__':
t = SummerFire()
print(t.matcher("https://www.summerfire.cn/"))
print(t.solver("https://www.summerfire.cn/"))
Loading…
Cancel
Save