电子信息与电气工程学院
HTML;
$adapter = new FacultyListHtmlAdapter;
$method = new \ReflectionMethod($adapter, 'extractFromHtml');
$method->setAccessible(true);
$items = $method->invoke(
$adapter,
$html,
[],
'https://faculty.sjtu.edu.cn/xyjs_list.jsp?urltype=tsites.CollegeTeacherList&wbtreeid=1001&st=0&id=1701&lang=zh_CN',
);
$this->assertCount(2, $items);
$this->assertSame('沈备军', $items[0]->title);
$this->assertSame('http://faculty.sjtu.edu.cn/bjshen/zh_CN/index.htm', $items[0]->canonicalUrl);
$this->assertSame('上海交通大学', $items[0]->schoolName);
$this->assertSame('faculty_html_tsites', $items[0]->extra['platform']);
$this->assertSame('副教授', $items[0]->extra['academic_title']);
$this->assertSame('电子信息与电气工程学院', $items[0]->extra['college_name']);
$this->assertSame('副教授', $items[0]->extra['lead_author']['academic_title']);
$this->assertSame('夏斌', $items[1]->title);
}
public function test_extracts_teacher_when_profile_href_is_empty(): void
{
$html = <<<'HTML'
HTML;
$adapter = new FacultyListHtmlAdapter;
$method = new \ReflectionMethod($adapter, 'extractFromHtml');
$method->setAccessible(true);
$items = $method->invoke(
$adapter,
$html,
[],
'https://faculty.sjtu.edu.cn/xyjs_list.jsp?id=1701',
);
$this->assertCount(1, $items);
$this->assertSame('邵海滨', $items[0]->title);
$this->assertStringContainsString('shaohaibin', (string) $items[0]->canonicalUrl);
$this->assertSame('副研究员', $items[0]->extra['academic_title']);
$this->assertSame('电子信息与电气工程学院', $items[0]->extra['college_name']);
}
public function test_extracts_email_from_teacher_profile_html(): void
{
$html = '电子邮箱:bjshen@sjtu.edu.cn';
$adapter = new FacultyListHtmlAdapter;
$method = new \ReflectionMethod($adapter, 'extractEmailFromProfileHtml');
$method->setAccessible(true);
$email = $method->invoke($adapter, $html);
$this->assertSame('bjshen@sjtu.edu.cn', $email);
}
public function test_apply_email_to_item_updates_lead_author(): void
{
$adapter = new FacultyListHtmlAdapter;
$method = new \ReflectionMethod($adapter, 'applyEmailToItem');
$method->setAccessible(true);
$item = $method->invoke(
$adapter,
new \App\Services\Crawl\CrawlItemDto(
externalId: 'faculty:test',
title: '沈备军',
canonicalUrl: 'http://faculty.sjtu.edu.cn/bjshen/zh_CN/index.htm',
extra: [
'lead_author' => [
'name' => '沈备军',
'email' => null,
'university_name' => '上海交通大学',
],
],
),
'bjshen@sjtu.edu.cn',
);
$this->assertSame('bjshen@sjtu.edu.cn', $item->extra['lead_author']['email']);
}
public function test_detects_total_pages_and_builds_pagenum_url(): void
{
$html = '下页';
$adapter = new FacultyListHtmlAdapter;
$detect = new \ReflectionMethod($adapter, 'detectTotalPages');
$detect->setAccessible(true);
$this->assertSame(20, $detect->invoke($adapter, $html));
$build = new \ReflectionMethod($adapter, 'buildPageUrl');
$build->setAccessible(true);
$url = $build->invoke(
$adapter,
'https://faculty.sjtu.edu.cn/xyjs_list.jsp?urltype=tsites.CollegeTeacherList&wbtreeid=1001&st=0&id=1701&lang=zh_CN',
3,
$html,
);
$this->assertStringContainsString('PAGENUM=3', $url);
$this->assertStringContainsString('totalpage=20', $url);
}
}