保存时间:2026/4/3 19:39:19
from playwright.sync_api import sync_playwright
USER_DATA_DIR = "./my_chrome_profile"
with sync_playwright() as p:
browser = p.chromium.launch_persistent_context(
user_data_dir=USER_DATA_DIR,
headless=False
)
page = browser.new_page()
page.goto("https://www.doubao.com/")
# 下面只做你固定的拆解:
title = page.title()
chat_content = page.locator(".chat-container").inner_html()
# ... 只提取固定结构
print({"title": title, "chat": chat_content})
browser.close()
(async function exportDoubaoHistory() {
// 1. 抓取所有对话块(你可以根据实际 DOM 微调选择器)
const messages = Array.from(document.querySelectorAll('.message-item, .chat-message, [class*="message"]'));
if (messages.length === 0) {
alert('未找到对话内容,请确认当前在对话页面');
return;
}
// 2. 结构化提取:谁发的 + 内容
const history = messages.map(el => {
const text = el.innerText.trim();
const isUser = el.innerText.includes('你:') || el.closest('[class*="user"]');
return {
role: isUser ? 'user' : 'assistant',
content: text
};
});
// 3. 转 JSON
const json = JSON.stringify(history, null, 2);
// 4. 下载成文件
const blob = new Blob([json], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `doubao-history-${new Date().toISOString().slice(0,10)}.json`;
a.click();
console.log('导出完成,共', history.length, '条消息');
console.log(history);
})();
(function exportPlainText() {
const texts = Array.from(document.querySelectorAll('[class*="message"]'))
.map(el => el.innerText.trim())
.filter(Boolean)
.join('\n\n---\n\n');
const blob = new Blob([texts], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'doubao.txt';
a.click();
})();
<div class="message user-message">...</div>
<div class="message assistant-message">...</div>