You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

98 lines
2.6 KiB

<?php
namespace App\Services\Crawl;
use App\Models\DictItem;
use App\Models\DictType;
use Illuminate\Support\Str;
class NewsCategoryMatcher
{
/** @var list<array{id:int, label:string, value:string}>|null */
protected static ?array $cachedItems = null;
/**
* 根据关键词与标题/摘要匹配资讯分类字典项;无匹配时默认「行业动态」。
*
* @param list<string> $keywords
*/
public function resolveCategoryId(string $title, ?string $summary, array $keywords): ?int
{
$haystack = $title.($summary ?? '');
foreach ($this->categoryItems() as $item) {
foreach ($keywords as $keyword) {
if ($keyword === '') {
continue;
}
if (Str::contains($item['label'], $keyword)
|| Str::contains($keyword, $item['label'])
|| Str::contains($haystack, $item['label'])) {
return $item['id'];
}
}
if (Str::contains($haystack, $item['label'])) {
return $item['id'];
}
}
return $this->defaultCategoryId();
}
public function labelForId(?int $id): ?string
{
if (! $id) {
return null;
}
foreach ($this->categoryItems() as $item) {
if ($item['id'] === $id) {
return $item['label'];
}
}
return null;
}
protected function defaultCategoryId(): ?int
{
foreach ($this->categoryItems() as $item) {
if ($item['value'] === 'industry') {
return $item['id'];
}
}
return $this->categoryItems()[0]['id'] ?? null;
}
/**
* @return list<array{id:int, label:string, value:string}>
*/
protected function categoryItems(): array
{
if (self::$cachedItems !== null) {
return self::$cachedItems;
}
$typeId = DictType::query()->where('code', 'news_category')->where('status', 1)->value('id');
if (! $typeId) {
self::$cachedItems = [];
return self::$cachedItems;
}
self::$cachedItems = DictItem::query()
->where('dict_type_id', $typeId)
->where('status', 1)
->orderBy('sort')
->get(['id', 'label', 'value'])
->map(fn (DictItem $item) => [
'id' => (int) $item->id,
'label' => (string) $item->label,
'value' => (string) $item->value,
])
->all();
return self::$cachedItems;
}
}