master
lion 3 days ago
parent fc3e050c76
commit 4d93e3e041

@ -19,6 +19,11 @@ class CrawlSourceResolver
->orderBy('sort')
->get();
$hint = $this->resolveByUrlHint($requestUrl, $targetType, $sources);
if ($hint) {
return $hint;
}
$wildcard = null;
foreach ($sources as $source) {
@ -35,11 +40,6 @@ class CrawlSourceResolver
}
}
$hint = $this->resolveByUrlHint($requestUrl, $targetType, $sources);
if ($hint) {
return $hint;
}
return $wildcard;
}

@ -0,0 +1,74 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Support\Facades\DB;
return new class extends Migration
{
public function up(): void
{
$teacherSchema = [
[
'key' => 'keyword',
'type' => 'textarea',
'label' => '搜索关键词(选填)',
'required' => false,
'placeholder' => '多个关键词用空格、逗号或换行分隔',
],
[
'key' => 'max_results',
'type' => 'number',
'label' => '条数上限',
'default' => 200,
'min' => 1,
'max' => 500,
],
];
$now = now();
$exists = DB::table('crawl_sources')
->where('adapter_code', 'ai_sjtu_research_center_api')
->where('target_type', 'teacher')
->exists();
if ($exists) {
DB::table('crawl_sources')
->where('adapter_code', 'ai_sjtu_research_center_api')
->where('target_type', 'teacher')
->update([
'name' => '交大人工智能研究院研究中心',
'entry_url' => 'https://ai.sjtu.edu.cn/center',
'match_domains' => json_encode(['ai.sjtu.edu.cn'], JSON_UNESCAPED_UNICODE),
'config' => json_encode(['api_base' => 'https://ai.sjtu.edu.cn/api'], JSON_UNESCAPED_UNICODE),
'param_schema' => json_encode($teacherSchema, JSON_UNESCAPED_UNICODE),
'status' => 1,
'sort' => 25,
'updated_at' => $now,
]);
return;
}
DB::table('crawl_sources')->insert([
'name' => '交大人工智能研究院研究中心',
'target_type' => 'teacher',
'adapter_code' => 'ai_sjtu_research_center_api',
'entry_url' => 'https://ai.sjtu.edu.cn/center',
'match_domains' => json_encode(['ai.sjtu.edu.cn'], JSON_UNESCAPED_UNICODE),
'config' => json_encode(['api_base' => 'https://ai.sjtu.edu.cn/api'], JSON_UNESCAPED_UNICODE),
'param_schema' => json_encode($teacherSchema, JSON_UNESCAPED_UNICODE),
'status' => 1,
'sort' => 25,
'created_at' => $now,
'updated_at' => $now,
]);
}
public function down(): void
{
DB::table('crawl_sources')
->where('adapter_code', 'ai_sjtu_research_center_api')
->where('target_type', 'teacher')
->delete();
}
};

@ -0,0 +1,42 @@
<?php
namespace Tests\Unit;
use App\Models\CrawlSource;
use App\Services\Crawl\CrawlSourceResolver;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Tests\TestCase;
class CrawlSourceResolverTest extends TestCase
{
use RefreshDatabase;
public function test_resolves_ai_sjtu_center_to_research_center_api(): void
{
CrawlSource::query()->create([
'name' => '师资列表页(通用 HTML',
'target_type' => 'teacher',
'adapter_code' => 'faculty_list_html',
'entry_url' => 'https://',
'match_domains' => ['*'],
'status' => 1,
'sort' => 30,
]);
CrawlSource::query()->create([
'name' => '交大人工智能研究院研究中心',
'target_type' => 'teacher',
'adapter_code' => 'ai_sjtu_research_center_api',
'entry_url' => 'https://ai.sjtu.edu.cn/center',
'match_domains' => ['ai.sjtu.edu.cn'],
'status' => 1,
'sort' => 25,
]);
$resolver = new CrawlSourceResolver;
$source = $resolver->resolve('https://ai.sjtu.edu.cn/center', 'teacher');
$this->assertNotNull($source);
$this->assertSame('ai_sjtu_research_center_api', $source->adapter_code);
}
}
Loading…
Cancel
Save