You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

37 lines
1.1 KiB

<?php
namespace Tests\Unit;
use App\Services\Crawl\NewsContentHtml;
use PHPUnit\Framework\TestCase;
class NewsContentHtmlTest extends TestCase
{
public function test_extracts_sjtu_article_content(): void
{
$html = <<<'HTML'
<div class="Article_content">
<p>正文段落一</p>
<p><img src="/resource/upload/202604/a.png" alt="图"/></p>
<p>正文段落二,需要足够长的文字才能通过提取阈值校验。</p>
</div>
<div class="Article-source">来源</div>
HTML;
$body = NewsContentHtml::extractBody($html);
$this->assertNotNull($body);
$this->assertStringContainsString('正文段落一', $body);
$this->assertStringContainsString('/resource/upload/', $body);
}
public function test_resolves_relative_image_src(): void
{
$normalized = NewsContentHtml::normalize(
'<img src="/resource/upload/a.png">',
'https://news.sjtu.edu.cn/jdyw/20260408/221279.html'
);
$this->assertStringContainsString('https://news.sjtu.edu.cn/resource/upload/a.png', $normalized);
}
}