You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
98 lines
2.6 KiB
98 lines
2.6 KiB
<?php
|
|
|
|
namespace App\Services\Crawl;
|
|
|
|
use App\Models\DictItem;
|
|
use App\Models\DictType;
|
|
use Illuminate\Support\Str;
|
|
|
|
class NewsCategoryMatcher
|
|
{
|
|
/** @var list<array{id:int, label:string, value:string}>|null */
|
|
protected static ?array $cachedItems = null;
|
|
|
|
/**
|
|
* 根据关键词与标题/摘要匹配资讯分类字典项;无匹配时默认「行业动态」。
|
|
*
|
|
* @param list<string> $keywords
|
|
*/
|
|
public function resolveCategoryId(string $title, ?string $summary, array $keywords): ?int
|
|
{
|
|
$haystack = $title.($summary ?? '');
|
|
|
|
foreach ($this->categoryItems() as $item) {
|
|
foreach ($keywords as $keyword) {
|
|
if ($keyword === '') {
|
|
continue;
|
|
}
|
|
if (Str::contains($item['label'], $keyword)
|
|
|| Str::contains($keyword, $item['label'])
|
|
|| Str::contains($haystack, $item['label'])) {
|
|
return $item['id'];
|
|
}
|
|
}
|
|
if (Str::contains($haystack, $item['label'])) {
|
|
return $item['id'];
|
|
}
|
|
}
|
|
|
|
return $this->defaultCategoryId();
|
|
}
|
|
|
|
public function labelForId(?int $id): ?string
|
|
{
|
|
if (! $id) {
|
|
return null;
|
|
}
|
|
foreach ($this->categoryItems() as $item) {
|
|
if ($item['id'] === $id) {
|
|
return $item['label'];
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
protected function defaultCategoryId(): ?int
|
|
{
|
|
foreach ($this->categoryItems() as $item) {
|
|
if ($item['value'] === 'industry') {
|
|
return $item['id'];
|
|
}
|
|
}
|
|
|
|
return $this->categoryItems()[0]['id'] ?? null;
|
|
}
|
|
|
|
/**
|
|
* @return list<array{id:int, label:string, value:string}>
|
|
*/
|
|
protected function categoryItems(): array
|
|
{
|
|
if (self::$cachedItems !== null) {
|
|
return self::$cachedItems;
|
|
}
|
|
|
|
$typeId = DictType::query()->where('code', 'news_category')->where('status', 1)->value('id');
|
|
if (! $typeId) {
|
|
self::$cachedItems = [];
|
|
|
|
return self::$cachedItems;
|
|
}
|
|
|
|
self::$cachedItems = DictItem::query()
|
|
->where('dict_type_id', $typeId)
|
|
->where('status', 1)
|
|
->orderBy('sort')
|
|
->get(['id', 'label', 'value'])
|
|
->map(fn (DictItem $item) => [
|
|
'id' => (int) $item->id,
|
|
'label' => (string) $item->label,
|
|
'value' => (string) $item->value,
|
|
])
|
|
->all();
|
|
|
|
return self::$cachedItems;
|
|
}
|
|
}
|