diff --git a/app/Services/Crawl/CrawlSourceResolver.php b/app/Services/Crawl/CrawlSourceResolver.php index d1fcce6..a9bda36 100644 --- a/app/Services/Crawl/CrawlSourceResolver.php +++ b/app/Services/Crawl/CrawlSourceResolver.php @@ -19,6 +19,11 @@ class CrawlSourceResolver ->orderBy('sort') ->get(); + $hint = $this->resolveByUrlHint($requestUrl, $targetType, $sources); + if ($hint) { + return $hint; + } + $wildcard = null; foreach ($sources as $source) { @@ -35,11 +40,6 @@ class CrawlSourceResolver } } - $hint = $this->resolveByUrlHint($requestUrl, $targetType, $sources); - if ($hint) { - return $hint; - } - return $wildcard; } diff --git a/database/migrations/2026_06_23_000001_add_ai_sjtu_research_center_crawl_source.php b/database/migrations/2026_06_23_000001_add_ai_sjtu_research_center_crawl_source.php new file mode 100644 index 0000000..392fc38 --- /dev/null +++ b/database/migrations/2026_06_23_000001_add_ai_sjtu_research_center_crawl_source.php @@ -0,0 +1,74 @@ + 'keyword', + 'type' => 'textarea', + 'label' => '搜索关键词(选填)', + 'required' => false, + 'placeholder' => '多个关键词用空格、逗号或换行分隔', + ], + [ + 'key' => 'max_results', + 'type' => 'number', + 'label' => '条数上限', + 'default' => 200, + 'min' => 1, + 'max' => 500, + ], + ]; + + $now = now(); + $exists = DB::table('crawl_sources') + ->where('adapter_code', 'ai_sjtu_research_center_api') + ->where('target_type', 'teacher') + ->exists(); + + if ($exists) { + DB::table('crawl_sources') + ->where('adapter_code', 'ai_sjtu_research_center_api') + ->where('target_type', 'teacher') + ->update([ + 'name' => '交大人工智能研究院研究中心', + 'entry_url' => 'https://ai.sjtu.edu.cn/center', + 'match_domains' => json_encode(['ai.sjtu.edu.cn'], JSON_UNESCAPED_UNICODE), + 'config' => json_encode(['api_base' => 'https://ai.sjtu.edu.cn/api'], JSON_UNESCAPED_UNICODE), + 'param_schema' => json_encode($teacherSchema, JSON_UNESCAPED_UNICODE), + 'status' => 1, + 'sort' => 25, + 'updated_at' => $now, + ]); + + return; + } + + DB::table('crawl_sources')->insert([ + 'name' => '交大人工智能研究院研究中心', + 'target_type' => 'teacher', + 'adapter_code' => 'ai_sjtu_research_center_api', + 'entry_url' => 'https://ai.sjtu.edu.cn/center', + 'match_domains' => json_encode(['ai.sjtu.edu.cn'], JSON_UNESCAPED_UNICODE), + 'config' => json_encode(['api_base' => 'https://ai.sjtu.edu.cn/api'], JSON_UNESCAPED_UNICODE), + 'param_schema' => json_encode($teacherSchema, JSON_UNESCAPED_UNICODE), + 'status' => 1, + 'sort' => 25, + 'created_at' => $now, + 'updated_at' => $now, + ]); + } + + public function down(): void + { + DB::table('crawl_sources') + ->where('adapter_code', 'ai_sjtu_research_center_api') + ->where('target_type', 'teacher') + ->delete(); + } +}; diff --git a/tests/Unit/CrawlSourceResolverTest.php b/tests/Unit/CrawlSourceResolverTest.php new file mode 100644 index 0000000..4554230 --- /dev/null +++ b/tests/Unit/CrawlSourceResolverTest.php @@ -0,0 +1,42 @@ +create([ + 'name' => '师资列表页(通用 HTML)', + 'target_type' => 'teacher', + 'adapter_code' => 'faculty_list_html', + 'entry_url' => 'https://', + 'match_domains' => ['*'], + 'status' => 1, + 'sort' => 30, + ]); + + CrawlSource::query()->create([ + 'name' => '交大人工智能研究院研究中心', + 'target_type' => 'teacher', + 'adapter_code' => 'ai_sjtu_research_center_api', + 'entry_url' => 'https://ai.sjtu.edu.cn/center', + 'match_domains' => ['ai.sjtu.edu.cn'], + 'status' => 1, + 'sort' => 25, + ]); + + $resolver = new CrawlSourceResolver; + $source = $resolver->resolve('https://ai.sjtu.edu.cn/center', 'teacher'); + + $this->assertNotNull($source); + $this->assertSame('ai_sjtu_research_center_api', $source->adapter_code); + } +}