*/ public static function parse(?string $raw): array { if ($raw === null || trim($raw) === '') { return []; } $parts = preg_split('/[\s,,;;\n\r]+/u', trim($raw), -1, PREG_SPLIT_NO_EMPTY) ?: []; $keywords = []; foreach ($parts as $part) { $kw = trim($part); if ($kw !== '' && ! in_array($kw, $keywords, true)) { $keywords[] = $kw; } } return $keywords; } /** * 标题或摘要是否命中任一关键词。 * * @param list $keywords */ public static function matchesAny(string $title, ?string $summary, array $keywords): bool { if ($keywords === []) { return true; } $haystack = $title.($summary ?? ''); foreach ($keywords as $keyword) { if ($keyword !== '' && mb_stripos($haystack, $keyword) !== false) { return true; } } return false; } /** * 按逗号/分号/换行拆成多个检索短语;无分隔符时整段视为一个短语(短语内空格在 arXiv 查询中 AND)。 * * @return list */ public static function parsePhrases(?string $raw): array { $raw = trim((string) $raw); if ($raw === '') { return []; } if (preg_match('/[,,;;\n\r]/u', $raw)) { $parts = preg_split('/[,,;;\n\r]+/u', $raw, -1, PREG_SPLIT_NO_EMPTY) ?: []; } else { $parts = [$raw]; } $phrases = []; foreach ($parts as $part) { $phrase = trim($part); if ($phrase !== '' && ! in_array($phrase, $phrases, true)) { $phrases[] = $phrase; } } return $phrases; } /** * 构建 arXiv search_query:短语内空格 AND,多短语之间 OR。 */ public static function buildArxivSearchQuery(?string $raw): string { $phrases = self::parsePhrases($raw); if ($phrases === []) { return 'all:*'; } $clauseParts = []; foreach ($phrases as $phrase) { $terms = preg_split('/\s+/u', $phrase, -1, PREG_SPLIT_NO_EMPTY) ?: []; $ands = []; foreach ($terms as $term) { $clean = preg_replace('/[^\p{L}\p{N}\s\-_]/u', '', $term); if ($clean !== '') { $ands[] = 'all:'.$clean; } } if ($ands === []) { continue; } $clauseParts[] = count($ands) === 1 ? $ands[0] : '('.implode(' AND ', $ands).')'; } if ($clauseParts === []) { return 'all:*'; } return count($clauseParts) === 1 ? $clauseParts[0] : '('.implode(' OR ', $clauseParts).')'; } }