我编写了一个脚本,在其中读取了大约400万个点和800.000个图.该脚本将每个图的点剪辑,并为每个图保存一个新的文本文件.一段时间后,我的PC内存已满.我曾尝试在脚本中进行挖掘,但是在xrange(len(sr))中为我进行的每个循...

我编写了一个脚本,在其中读取了大约400万个点和800.000个图.该脚本将每个图的点剪辑,并为每个图保存一个新的文本文件.
一段时间后,我的PC内存已满.我曾尝试在脚本中进行挖掘,但是在xrange(len(sr))中为我进行的每个循环中:替换了每个对象并将剪切的点保存在新的txt文件中.
在这种情况下,有什么策略可以用来提高内存使用率而不降低性能(脚本已经很慢了)?我是python的初学者,对不起,如果问题很简单.
提前致谢
詹妮
inFile ="C://04-las_clip_inside_area//prova//Ku_115_class_Notground_normalize.las"
poly ="C://04-las_clip_inside_area//prova//ku_115_plot_clip.shp"
chunkSize = None
MinPoints = 1
sf = shapefile.Reader(poly) #open shpfile
sr = sf.shapeRecords()
poly_filename, ext = path.splitext(poly)
inFile_filename = os.path.splitext(os.path.basename(inFile))[0]
pbar = ProgressBar(len(sr)) # set progressbar
if chunkSize == None:
points = [(p.x,p.y) for p in lasfile.File(inFile,None,'r')]
for i in xrange(len(sr)):
pbar.update(i+1) # progressbar
verts = np.array(sr[i].shape.points,float)
record = sr[i].record[0]
index = nonzero(points_inside_poly(points, verts))[0]
if len(index) >= MinPoints:
file_out = open("{0}_{1}_{2}.txt".format(poly_filename, inFile_filename, record), "w")
inside_points = [lasfile.File(inFile,None,'r')[l] for l in index]
for p in inside_points:
file_out.write("%s %s %s %s %s %s %s %s %s %s %s" % (p.x, p.y, p.z, p.intensity,p.return_number,p.number_of_returns,p.scan_direction,p.flightline_edge,p.classification,p.scan_angle,record)+ "\n")
file_out.close()
这是原始功能
def LAS2TXTClipSplitbyChunk(inFile,poly,chunkSize=1,MinPoints=1):
sf = shapefile.Reader(poly) #open shpfile
sr = sf.shapeRecords()
poly_filename, ext = path.splitext(poly)
inFile_filename = os.path.splitext(os.path.basename(inFile))[0]
pbar = ProgressBar(len(sr)) # set progressbar
if chunkSize == None:
points = [(p.x,p.y) for p in lasfile.File(inFile,None,'r')]
for i in xrange(len(sr)):
pbar.update(i+1) # progressbar
verts = np.array(sr[i].shape.points,float)
record = sr[i].record[0]
index = nonzero(points_inside_poly(points, verts))[0]
if len(index) >= MinPoints:
file_out = open("{0}_{1}_{2}.txt".format(poly_filename, inFile_filename, record), "w")
inside_points = [lasfile.File(inFile,None,'r')[l] for l in index]
for p in inside_points:
file_out.write("%s %s %s %s %s %s %s %s %s %s %s" % (p.x, p.y, p.z, p.intensity,p.return_number,p.number_of_returns,p.scan_direction,p.flightline_edge,p.classification,p.scan_angle,record)+ "\n")
file_out.close()
else:
for i in xrange(len(sr)):
pbar.update(i+1) # progressbar
verts = np.array(sr[i].shape.points,float)
record = sr[i].record[0]
f = lasfile.File(inFile,None,'r')
file_out = open("{0}_{1}_{2}.txt".format(poly_filename, inFile_filename, record), "w")
TotPoints = 0
while True:
chunk = list(islice(f,chunkSize))
if not chunk:
break
points = [(p.x,p.y) for p in chunk]
index = nonzero(points_inside_poly(points, verts))[0]
TotPoints += len(index) #add points to count inside th plot
chunk = [chunk[l] for l in index]
for p in chunk:
file_out.write("%s %s %s %s %s %s %s %s %s %s %s" % (p.x, p.y, p.z, p.intensity,p.return_number,p.number_of_returns,p.scan_direction,p.flightline_edge,p.classification,p.scan_angle,record)+ "\n")
if TotPoints >= MinPoints:
file_out.close()
else:
file_out.close()
os.remove("{0}_{1}_{2}.txt".format(poly_filename, inFile_filename, record))
f.close()
unutbu建议的脚本是:
import shapefile
import os
import glob
from os import path
import numpy as np
from numpy import nonzero
from matplotlib.nxutils import points_inside_poly
from itertools import islice
from liblas import file as lasfile
from shapely.geometry import Polygon
from progressbar import ProgressBar
import multiprocessing as mp
inFile ="C://04-las_clip_inside_area//prova//Ku_115_class_Notground_normalize.las"
poly ="C://04-las_clip_inside_area//prova//ku_115_plot_clip.shp"
chunkSize = None
MinPoints = 1
def pointinside(record):
verts = np.array(record.shape.points, float)
record = record.record[0]
index = nonzero(points_inside_poly(points, verts))[0]
if len(index) >= MinPoints:
outfile = "{0}_{1}_{2}.txt".format(poly_filename, inFile_filename, record)
with open(outfile, "w") as file_out:
inside_points = [lasfile.File(inFile, None, 'r')[l] for l in index]
for p in inside_points:
fields = (p.x, p.y, p.z, p.intensity, p.return_number,
p.number_of_returns, p.scan_direction, p.flightline_edge,
p.classification, p.scan_angle, record)
file_out.write(' '.join(map(str, fields)) + "\n")
sf = shapefile.Reader(poly) #open shpfile
sr = sf.shapeRecords()
poly_filename, ext = path.splitext(poly)
inFile_filename = os.path.splitext(os.path.basename(inFile))[0]
pbar = ProgressBar(len(sr)) # set progressbar
if chunkSize == None:
points = [(p.x,p.y) for p in lasfile.File(inFile,None,'r')]
for i in xrange(len(sr)):
pbar.update(i+1) # progressbar
proc = mp.Process(target = pointinside, args = (sr[i], ))
proc.start()
proc.join()
解决方法:
The only reliable way释放用于临时计算的内存是在子进程中运行该计算.子进程结束时,将释放内存.
如果将外部循环中的代码移动到一个函数中(我们称之为工作),则可以使用多处理模块在子进程中运行工作:
import sys
import os
import time
import itertools
import multiprocessing as mp
import numpy as np
import matplotlib.nxutils as nx
import liblas
import shapefile
clock = time.clock if sys.platform == 'win32' else time.time
def LAS2TXTClipSplitbyChunk(inFile, poly, chunkSize = 1, MinPoints = 1):
sf = shapefile.Reader(poly) #open shpfile
sr = sf.shapeRecords()
poly_filename, ext = os.path.splitext(poly)
for record in sr:
inFile_filename = os.path.splitext(os.path.basename(inFile))[0]
record_num = record.record[0]
out_filename = '{0}_{1}_{2}.txt'.format(
poly_filename, inFile_filename, record_num)
pool.apply_async(pointinside,
args = (record, out_filename, inFile, chunkSize, MinPoints),
callback = update)
def pointinside(record, out_filename, inFile, chunkSize, MinPoints):
start = clock()
record_num = record.record[0]
verts = np.array(record.shape.points, float)
f = iter(liblas.file.File(inFile, None, 'rb'))
result = []
worth_writing = False
for chunk in iter(lambda: list(itertools.islice(f, chunkSize)), []):
points = [(p.x, p.y) for p in chunk]
index = nx.points_inside_poly(points, verts)
chunk = [p for inside, p in itertools.izip(index,chunk) if inside]
for p in chunk:
fields = (p.x, p.y, p.z, p.intensity, p.return_number,
p.number_of_returns, p.scan_direction, p.flightline_edge,
p.classification, p.scan_angle, record_num)
result.append(' '.join(map(str, fields)))
if len(result) >= bufferSize:
# Writing to disk is slow. Doing it once for every iteration is
# inefficient. So instead build up bufferSize number of lines
# before writing them all to disk.
worth_writing = True
with open(out_filename, 'a') as file_out:
file_out.write('\n'.join(result)+'\n')
result = []
# In case there were some results (less than bufferSize lines), we
# dump them to disk here.
if (len(result) >= MinPoints) or worth_writing:
with open(out_filename, 'a') as file_out:
file_out.write('\n'.join(result)+'\n')
f.close()
end = clock()
return end-start
def update(result):
with open(debug_filename, 'a') as f:
f.write('{r}\n'.format(r = result))
if __name__ == '__main__':
workdir = 'C://04-las_clip_inside_area//prova//'
# workdir = os.path.expanduser('~/tmp/tmp')
os.chdir(workdir)
inFile = 'Ku_115_class_Notground_normalize.las'
poly = 'ku_115_plot_clip.shp'
debug_filename = 'debug.dat'
chunkSize = None
MinPoints = 1
bufferSize = max(MinPoints, 100)
pool = mp.Pool()
LAS2TXTClipSplitbyChunk(inFile, poly, chunkSize, MinPoints)
pool.close()
pool.join()
以下是每个任务完成时间的图表:
In [129]: import matplotlib.pyplot as plt
In [130]: import numpy as np
In [131]: x = np.genfromtxt('debug.dat')
In [132]: plt.plot(x)
Out[132]: [<matplotlib.lines.Line2D object at 0xe309b4c>]
In [133]: plt.show()
我没有看到任何渐进的减速.也许尝试一下此代码.
本文标题为:Python:脚本中的内存问题


基础教程推荐
- Python-Windows之命令行cd总结 2023-09-03
- Python读取Linux内存进程错误(/ proc / $pid / mem) 2023-11-14
- python 基于aiohttp的异步爬虫实战详解 2022-10-20
- python-多个进程从同一管道异步读取 2023-11-12
- Windows 10 Linux子系统-Python-电脑剪贴板的字符串 2023-11-13
- python总结之闭包和装饰器 2023-08-11
- windows python2下停止,清空,启动tomcat 2023-09-03
- 如何在Windows中的python中创建服务? 2023-11-10
- 在GNU / Linux中安装python模块 2023-11-10
- django+nginx+python3 生产环境部署 2023-09-04