|
|
<?php
|
|
|
|
|
|
namespace App\Services\Crawl;
|
|
|
|
|
|
/**
|
|
|
* 跨进程互斥 + 最小请求间隔,满足 arXiv「每 3 秒至多 1 次、单连接」要求。
|
|
|
* 仅在「排队等待」时持锁,HTTP 请求在锁外执行,避免阻塞其它进程过久。
|
|
|
*/
|
|
|
class ArxivRequestGate
|
|
|
{
|
|
|
private const LOCK_BASENAME = 'arxiv_request.lock';
|
|
|
|
|
|
private const STATE_BASENAME = 'arxiv_last_request_at.txt';
|
|
|
|
|
|
public function __construct(
|
|
|
protected float $minIntervalSeconds = 4.0,
|
|
|
) {}
|
|
|
|
|
|
public static function fromConfig(): self
|
|
|
{
|
|
|
return new self((float) config('crawl.arxiv.min_interval_seconds', 4));
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 等待轮到自己发起 arXiv 请求(持锁时间仅包含间隔 sleep)。
|
|
|
*/
|
|
|
public function waitTurn(): void
|
|
|
{
|
|
|
$dir = storage_path('framework/cache');
|
|
|
if (! is_dir($dir)) {
|
|
|
mkdir($dir, 0755, true);
|
|
|
}
|
|
|
|
|
|
$lockPath = $dir.'/'.self::LOCK_BASENAME;
|
|
|
$statePath = $dir.'/'.self::STATE_BASENAME;
|
|
|
|
|
|
$handle = fopen($lockPath, 'c+');
|
|
|
if ($handle === false) {
|
|
|
throw new \RuntimeException('无法创建 arXiv 请求锁文件');
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
if (! flock($handle, LOCK_EX)) {
|
|
|
throw new \RuntimeException('无法获取 arXiv 请求锁');
|
|
|
}
|
|
|
|
|
|
$last = is_readable($statePath) ? (float) trim((string) file_get_contents($statePath)) : 0.0;
|
|
|
$remain = $this->minIntervalSeconds - (microtime(true) - $last);
|
|
|
if ($remain > 0) {
|
|
|
usleep((int) ceil($remain * 1_000_000));
|
|
|
}
|
|
|
} finally {
|
|
|
flock($handle, LOCK_UN);
|
|
|
fclose($handle);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 记录本次 arXiv 请求已发出(用于下一次间隔计算)。
|
|
|
*/
|
|
|
public function markSent(): void
|
|
|
{
|
|
|
$dir = storage_path('framework/cache');
|
|
|
$statePath = $dir.'/'.self::STATE_BASENAME;
|
|
|
file_put_contents($statePath, (string) microtime(true));
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* @template T
|
|
|
*
|
|
|
* @param callable(): T $callback
|
|
|
* @return T
|
|
|
*/
|
|
|
public function run(callable $callback): mixed
|
|
|
{
|
|
|
$this->waitTurn();
|
|
|
try {
|
|
|
return $callback();
|
|
|
} finally {
|
|
|
$this->markSent();
|
|
|
}
|
|
|
}
|
|
|
}
|