[email protected]   15826058953
B2B外贸网站建设与运营,WEB服务器运维,始于2016。

python统计服务器日志的IP以数量排序并导出TXT文件

2021-12-29     重庆LiSEO工作室    

1.python版本为3.6.4;

2.日志文件为ngnix服务器日志文件;

3.python源码如下:

import os
import sys

#python统计服务器日志的IP以数量排序并导出TXT文件

#fh=open('access_2017-10-10.log','r')
#使用内涵文件路径测试

#以只读方式打开文件,sys.argv[1]表示的是运行时传入的第二个参数
user_enter_name=sys.argv[1]
fh = open(user_enter_name, "r") 
ipDict={}
while 1:
	logline=fh.readline()
	if not logline:break
	ip=logline.split()[0]
	if  ip not in ipDict.keys():
		ipDict[ip]=1
	else:
		ipDict[ip]+=1
#sort
#print (ipDict)
ipDictSort=sorted(ipDict.items(),key=lambda e:e[1],reverse=True)
#print (ipDictSort)

#根据日志名称创建同名txt文件
(filepath,tempfilename) = os.path.split(user_enter_name)
(filename,extension) = os.path.splitext(tempfilename)
filetext=(filename)+'.txt'

#filetext=r"iplist.txt"
if os.path.exists(filetext):
	os.remove(filetext)
file_write_obj=open(filetext,'a+')

#逐行写入txt
#以访问数量统计出前100个IP
a=0
for ip,count in ipDictSort:
	if a==100:break
	#print ('ip:',ip,'     times:' count)
	var=str(['ip:',ip,'     times:',count]).replace("'","").replace(",","")
	file_write_obj.writelines(var)
	file_write_obj.writelines('\n')
	a+=1
if file_write_obj:
	print ('生成成功')
	
fh.close()
file_write_obj.close()

20211229更新新版日志分析工具,源码如下:

#!/usr/bin/env python
import re      #导入正则表达式模块
import sys
import os

#以只读方式打开文件,sys.argv[1]表示的是运行时传入的第二个参数
log_file_name=sys.argv[1]
f = open(log_file_name, "r") 
arr = {}      #用字典来存储IP跟访问次数
#num表示1-255之间的字串,\b为单词的词首或词尾锚定
num='\\b([1-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\b'
lines = f.readlines()
#遍历文件的每一行
ip=''
for line in lines:
    pattern = re.compile(r'('+num+'\.){3}'+num)  #python中用“+”来连接字符串
    match = pattern.match(line)
    if match: 
        ip = match.group()
    if(ip in arr):
        arr[ip] += 1 
    else:  
        arr[ip]=1  
f.close()  
#排序输出
numList=list(set(arr.values())) #set集合这里是去重
numList.sort(reverse=True)   #reverse=True表示逆序,reverse=False表示顺序
#如果同名日志文件存在,则删除
log_file_name_txt = log_file_name + '.txt'
if os.path.exists(log_file_name_txt):
    os.remove(log_file_name_txt)
file_handle=open(log_file_name_txt,mode='w')
for ipNum in numList:
    for ip in arr:
        if (ipNum==arr[ip]):
            print(ip + "--->" + str(arr[ip]))
            file_handle.write(ip + "--->" + str(arr[ip]) + '\n')
file_handle.close()

4.运行效果图如下:

python统计服务器日志的IP以数量排序并导出TXT文件效果图

5.运行导出的日志如下:

[ip: 207.46.13.212      times: 3022]
[ip: 207.46.13.109      times: 2952]
[ip: 5.255.250.152      times: 2844]
[ip: 157.55.39.44      times: 2582]
[ip: 207.46.13.134      times: 1212]
[ip: 157.55.39.89      times: 1106]
[ip: 207.46.13.25      times: 1076]
[ip: 179.187.95.65      times: 890]
[ip: 207.46.13.7      times: 880]
[ip: 157.55.39.82      times: 845]
[ip: 157.55.39.135      times: 719]
[ip: 93.158.161.85      times: 607]
[ip: 207.46.13.132      times: 374]
[ip: 46.229.168.67      times: 359]
[ip: 46.229.168.75      times: 334]
[ip: 207.46.13.147      times: 321]
[ip: 46.229.168.71      times: 320]
[ip: 46.229.168.65      times: 319]
[ip: 46.229.168.68      times: 313]
[ip: 46.229.168.70      times: 308]