You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

84 lines
2.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

<?php
namespace App\Services\Crawl;
/**
* 跨进程互斥 + 最小请求间隔,满足 arXiv「每 3 秒至多 1 次、单连接」要求。
* 仅在「排队等待」时持锁HTTP 请求在锁外执行,避免阻塞其它进程过久。
*/
class ArxivRequestGate
{
private const LOCK_BASENAME = 'arxiv_request.lock';
private const STATE_BASENAME = 'arxiv_last_request_at.txt';
public function __construct(
protected float $minIntervalSeconds = 4.0,
) {}
public static function fromConfig(): self
{
return new self((float) config('crawl.arxiv.min_interval_seconds', 4));
}
/**
* 等待轮到自己发起 arXiv 请求(持锁时间仅包含间隔 sleep
*/
public function waitTurn(): void
{
$dir = storage_path('framework/cache');
if (! is_dir($dir)) {
mkdir($dir, 0755, true);
}
$lockPath = $dir.'/'.self::LOCK_BASENAME;
$statePath = $dir.'/'.self::STATE_BASENAME;
$handle = fopen($lockPath, 'c+');
if ($handle === false) {
throw new \RuntimeException('无法创建 arXiv 请求锁文件');
}
try {
if (! flock($handle, LOCK_EX)) {
throw new \RuntimeException('无法获取 arXiv 请求锁');
}
$last = is_readable($statePath) ? (float) trim((string) file_get_contents($statePath)) : 0.0;
$remain = $this->minIntervalSeconds - (microtime(true) - $last);
if ($remain > 0) {
usleep((int) ceil($remain * 1_000_000));
}
} finally {
flock($handle, LOCK_UN);
fclose($handle);
}
}
/**
* 记录本次 arXiv 请求已发出(用于下一次间隔计算)。
*/
public function markSent(): void
{
$dir = storage_path('framework/cache');
$statePath = $dir.'/'.self::STATE_BASENAME;
file_put_contents($statePath, (string) microtime(true));
}
/**
* @template T
*
* @param callable(): T $callback
* @return T
*/
public function run(callable $callback): mixed
{
$this->waitTurn();
try {
return $callback();
} finally {
$this->markSent();
}
}
}