66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
#!/usr/bin/env python3
|
|
import email.utils
|
|
import json
|
|
import os
|
|
import urllib.parse
|
|
import urllib.request
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
|
def normalize_pubdate(value: str) -> str:
|
|
if not value:
|
|
return ''
|
|
try:
|
|
dt = email.utils.parsedate_to_datetime(value)
|
|
return dt.strftime('%Y-%m-%d')
|
|
except Exception:
|
|
return value
|
|
|
|
|
|
def infer_provider(title: str, link: str) -> str:
|
|
text = (title + ' ' + link).lower()
|
|
for needle, provider in [
|
|
('openai', 'OpenAI'), ('anthropic', 'Anthropic'), ('claude', 'Anthropic'), ('gemini', 'Google'), ('google', 'Google'),
|
|
('deepseek', 'DeepSeek'), ('qwen', 'Qwen'), ('dashscope', 'DashScope'), ('zhipu', '智谱'),
|
|
('baidu', '百度'), ('tencent', '腾讯'), ('minimax', 'MiniMax'), ('x.ai', 'xAI'), ('xai', 'xAI')
|
|
]:
|
|
if needle in text:
|
|
return provider
|
|
return ''
|
|
|
|
|
|
query = os.environ.get("INTRADAY_DISCOVERY_QUERY", "").strip()
|
|
if not query:
|
|
print("[]")
|
|
raise SystemExit(0)
|
|
|
|
url = "https://www.bing.com/search?format=rss&q=" + urllib.parse.quote(query)
|
|
req = urllib.request.Request(url, headers={
|
|
"User-Agent": "Mozilla/5.0",
|
|
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
})
|
|
with urllib.request.urlopen(req, timeout=20) as resp:
|
|
body = resp.read().decode("utf-8", errors="ignore")
|
|
|
|
root = ET.fromstring(body)
|
|
items = []
|
|
for item in root.findall('./channel/item'):
|
|
title = (item.findtext('title') or '').strip()
|
|
link = (item.findtext('link') or '').strip()
|
|
desc = (item.findtext('description') or '').strip()
|
|
pub = (item.findtext('pubDate') or '').strip()
|
|
provider = infer_provider(title, link)
|
|
provider_url = ''
|
|
if link:
|
|
parsed = urllib.parse.urlparse(link)
|
|
provider_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else ''
|
|
items.append({
|
|
"title": title,
|
|
"summary": desc,
|
|
"url": link,
|
|
"provider": provider,
|
|
"provider_url": provider_url,
|
|
"published_at": normalize_pubdate(pub),
|
|
})
|
|
print(json.dumps(items, ensure_ascii=False))
|