feat(middleware): add crawler detection middleware for suspicious requests
Introduce `CrawlerDetectionMiddleware` to identify and log potential crawlers or bots accessing suspicious paths or using bot-like user agents. Middleware applied globally to all routes in `AppModule`.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import { CacheModule } from "@nestjs/cache-manager";
|
||||
import { Module } from "@nestjs/common";
|
||||
import { MiddlewareConsumer, Module, NestModule } from "@nestjs/common";
|
||||
import { ConfigModule, ConfigService } from "@nestjs/config";
|
||||
import { ScheduleModule } from "@nestjs/schedule";
|
||||
import { ThrottlerModule } from "@nestjs/throttler";
|
||||
@@ -10,6 +10,7 @@ import { AppService } from "./app.service";
|
||||
import { AuthModule } from "./auth/auth.module";
|
||||
import { CategoriesModule } from "./categories/categories.module";
|
||||
import { CommonModule } from "./common/common.module";
|
||||
import { CrawlerDetectionMiddleware } from "./common/middlewares/crawler-detection.middleware";
|
||||
import { validateEnv } from "./config/env.schema";
|
||||
import { ContentsModule } from "./contents/contents.module";
|
||||
import { CryptoModule } from "./crypto/crypto.module";
|
||||
@@ -71,4 +72,8 @@ import { UsersModule } from "./users/users.module";
|
||||
controllers: [AppController, HealthController],
|
||||
providers: [AppService],
|
||||
})
|
||||
export class AppModule {}
|
||||
export class AppModule implements NestModule {
|
||||
configure(consumer: MiddlewareConsumer) {
|
||||
consumer.apply(CrawlerDetectionMiddleware).forRoutes("*");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
import { Injectable, Logger, NestMiddleware } from "@nestjs/common";
|
||||
import type { NextFunction, Request, Response } from "express";
|
||||
|
||||
@Injectable()
|
||||
export class CrawlerDetectionMiddleware implements NestMiddleware {
|
||||
private readonly logger = new Logger("CrawlerDetection");
|
||||
|
||||
private readonly SUSPICIOUS_PATTERNS = [
|
||||
/\.env/,
|
||||
/wp-admin/,
|
||||
/wp-login/,
|
||||
/\.git/,
|
||||
/\.php$/,
|
||||
/xmlrpc/,
|
||||
/config/,
|
||||
/setup/,
|
||||
/wp-config/,
|
||||
/_next/,
|
||||
/install/,
|
||||
/admin/,
|
||||
/phpmyadmin/,
|
||||
/sql/,
|
||||
/backup/,
|
||||
/db\./,
|
||||
/backup\./,
|
||||
/cgi-bin/,
|
||||
/\.well-known\/security\.txt/, // Bien que légitime, souvent scanné
|
||||
];
|
||||
|
||||
private readonly BOT_USER_AGENTS = [
|
||||
/bot/i,
|
||||
/crawler/i,
|
||||
/spider/i,
|
||||
/python/i,
|
||||
/curl/i,
|
||||
/wget/i,
|
||||
/nmap/i,
|
||||
/nikto/i,
|
||||
/zgrab/i,
|
||||
/masscan/i,
|
||||
];
|
||||
|
||||
use(req: Request, res: Response, next: NextFunction) {
|
||||
const { method, url, ip } = req;
|
||||
const userAgent = req.get("user-agent") || "unknown";
|
||||
|
||||
res.on("finish", () => {
|
||||
if (res.statusCode === 404) {
|
||||
const isSuspiciousPath = this.SUSPICIOUS_PATTERNS.some((pattern) =>
|
||||
pattern.test(url),
|
||||
);
|
||||
const isBotUserAgent = this.BOT_USER_AGENTS.some((pattern) =>
|
||||
pattern.test(userAgent),
|
||||
);
|
||||
|
||||
if (isSuspiciousPath || isBotUserAgent) {
|
||||
this.logger.warn(
|
||||
`Potential crawler detected: [${ip}] ${method} ${url} - User-Agent: ${userAgent}`,
|
||||
);
|
||||
// Ici, on pourrait ajouter une logique pour bannir l'IP temporairement via Redis
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
next();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user