Compare commits
33 Commits
8cc25b7c2e
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 498d5110e9 | |||
| 851d536b59 | |||
| adc9c76864 | |||
| 624e158be9 | |||
| 5bc40abbc1 | |||
| bd2c457f54 | |||
| 179bfa327b | |||
| c2357ffb67 | |||
| 0d287e7c1f | |||
| 674ee1e1e2 | |||
| 0cf231f9f7 | |||
| f82da3bab1 | |||
| 22a50ad5c6 | |||
| 0d9e427a34 | |||
| ec68b83827 | |||
| 130bbfb090 | |||
| 6e83136dc6 | |||
| f6f4da7d07 | |||
| a2be43d42a | |||
| a4c106fa5a | |||
| f24ca9aa29 | |||
| a537d3825b | |||
| e67931c3ca | |||
| b7cd03434d | |||
| a9d6c4699d | |||
| 3984b81f86 | |||
| d62cd2fcca | |||
| d44a294bf7 | |||
| 57e0029eb1 | |||
| a2ecc7f451 | |||
| 6ae10c9d36 | |||
| 20b2f46533 | |||
| 43ec564daa |
68
.gitignore
vendored
Normal file
68
.gitignore
vendored
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
integrated_product_system.log
|
||||||
|
|
||||||
|
# Databases
|
||||||
|
*.db
|
||||||
|
*.sqlite
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.trae/
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Test files
|
||||||
|
*test*.py
|
||||||
|
*Test*.py
|
||||||
|
pytest_cache/
|
||||||
|
.tox/
|
||||||
|
.coverage
|
||||||
|
coverage.xml
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
*.tmp
|
||||||
|
*.temp
|
||||||
|
temp*.txt
|
||||||
|
*.bak
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
temp_*.txt
|
||||||
|
|
||||||
|
# Bug and debug files
|
||||||
|
*debug*.png
|
||||||
|
*bug*.txt
|
||||||
|
|
||||||
|
# Batch files
|
||||||
|
*.bat
|
||||||
|
|
||||||
|
# Output files
|
||||||
|
*.out
|
||||||
|
*.output
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.*.local
|
||||||
|
|
||||||
|
# Documentation build
|
||||||
|
_build/
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
*.egg-info/
|
||||||
|
|
||||||
|
# Other
|
||||||
|
2025年12月*.txt
|
||||||
|
*.png
|
||||||
5850
2025年12月15日18107.txt
5850
2025年12月15日18107.txt
File diff suppressed because it is too large
Load Diff
5910
2025年12月16日20359.txt
5910
2025年12月16日20359.txt
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
5790
2026年1月15日1991.txt
Normal file
5790
2026年1月15日1991.txt
Normal file
File diff suppressed because it is too large
Load Diff
5820
2026年1月17日16419.txt
Normal file
5820
2026年1月17日16419.txt
Normal file
File diff suppressed because it is too large
Load Diff
5800
2026年1月18日9249.txt
Normal file
5800
2026年1月18日9249.txt
Normal file
File diff suppressed because it is too large
Load Diff
5840
2026年1月21日19238.txt
Normal file
5840
2026年1月21日19238.txt
Normal file
File diff suppressed because it is too large
Load Diff
5795
2026年1月22日18556.txt
Normal file
5795
2026年1月22日18556.txt
Normal file
File diff suppressed because it is too large
Load Diff
5855
2026年1月29日20470.txt
Normal file
5855
2026年1月29日20470.txt
Normal file
File diff suppressed because it is too large
Load Diff
5795
2026年1月31日91239.txt
Normal file
5795
2026年1月31日91239.txt
Normal file
File diff suppressed because it is too large
Load Diff
5800
2026年3月10日183431.txt
Normal file
5800
2026年3月10日183431.txt
Normal file
File diff suppressed because it is too large
Load Diff
5810
2026年3月8日18119.txt
Normal file
5810
2026年3月8日18119.txt
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
Before Width: | Height: | Size: 257 KiB After Width: | Height: | Size: 526 KiB |
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
Before Width: | Height: | Size: 261 KiB After Width: | Height: | Size: 231 KiB |
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 437 KiB After Width: | Height: | Size: 717 KiB |
@@ -1,13 +1,11 @@
|
|||||||
=== Product Hunt 产品信息 ===
|
=== Product Hunt 产品信息 ===
|
||||||
|
|
||||||
产品名称: Croct
|
产品名称: Greta
|
||||||
|
|
||||||
产品简介: Croct is a conversion optimization platform that includes AI-powered audience segmentation, content personalization, AB testing, feature flag, real-time website analytics, and a component-based CMS for modern frameworks like Next.js and React.
|
产品简介: 未获取
|
||||||
|
|
||||||
It helps product and growth teams scale UI and website optimization, enabling faster growth without over-relying on developers.
|
制作人发言: This is first first proposed project. If you want to support Santiago getting his project built, here are the details.https://onemillionlines.com/proj...
|
||||||
|
|
||||||
制作人发言: AI-powered website segmentation
|
用户数: 664 followers
|
||||||
|
|
||||||
用户数: 619 followers
|
提取时间: 2026-03-08 20:40:13
|
||||||
|
|
||||||
提取时间: 2025-12-17 18:36:11
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -205,9 +205,13 @@ def process_temp_files():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# 处理每篇文章
|
# 处理每篇文章
|
||||||
for article in tqdm(articles, desc=f"处理 {file_path}"):
|
for i, article in tqdm(enumerate(articles), desc=f"处理 {file_path}", total=len(articles)):
|
||||||
total_processed += 1
|
total_processed += 1
|
||||||
|
|
||||||
|
# 每处理10篇文章记录一次进度
|
||||||
|
if i % 10 == 0 and i > 0:
|
||||||
|
logger.info(f"已处理 {i}/{len(articles)} 篇文章,完成 {i/len(articles)*100:.1f}%")
|
||||||
|
|
||||||
# 检查重复
|
# 检查重复
|
||||||
if check_duplicate(article['title'], source_date):
|
if check_duplicate(article['title'], source_date):
|
||||||
logger.info(f"跳过重复文章(最近三天已存在): {article['title']}")
|
logger.info(f"跳过重复文章(最近三天已存在): {article['title']}")
|
||||||
|
|||||||
BIN
tophub_data.db
BIN
tophub_data.db
Binary file not shown.
49504
tophub_scraper.log
49504
tophub_scraper.log
File diff suppressed because it is too large
Load Diff
@@ -262,7 +262,7 @@ class TopHubScraper:
|
|||||||
|
|
||||||
# 实时读取输出以避免编码问题
|
# 实时读取输出以避免编码问题
|
||||||
try:
|
try:
|
||||||
stdout, stderr = process.communicate(timeout=300) # 5分钟超时
|
stdout, stderr = process.communicate(timeout=3600) # 1小时超时
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
process.kill()
|
process.kill()
|
||||||
logger.error("tophub_add_data_to_db.py执行超时")
|
logger.error("tophub_add_data_to_db.py执行超时")
|
||||||
@@ -287,6 +287,8 @@ class TopHubScraper:
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
scraper = TopHubScraper()
|
scraper = TopHubScraper()
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 抓取数据
|
# 抓取数据
|
||||||
scraped_data = scraper.scrape_by_node_ids()
|
scraped_data = scraper.scrape_by_node_ids()
|
||||||
|
|||||||
Reference in New Issue
Block a user