Я пытаюсь создать базу данных LMDB для моего проекта машинного обучения Caffe. Но LMDB выдает ошибку при первой попытке вставить точку данных, говоря, что размер карты среды заполнен.
Вот код, который пытается заполнить базу данных:
import numpy as np
from PIL import Image
import os
import lmdb
import random
# my data structure for holding image/label pairs
from serialization import DataPoint
class LoadImages(object):
def __init__(self, image_data_path):
self.image_data_path = image_data_path
self.dirlist = os.listdir(image_data_path)
# find the number of images that are to be read from disk
# in this case there are 370 images.
num = len(self.dirlist)
# shuffle the list of image files so that they are read in a random order
random.shuffle(self.dirlist)
map_size = num*10
j=0
# load images from disk
for image_filename in os.listdir(image_data_path):
# check that every image belongs to either category _D_ or _P_
assert (image_filename[:3] == '_D_' or image_filename[:3] == '_P_'), "ERROR: unknown category"
# set up the LMDB datbase object
env = lmdb.open('image_lmdb', map_size=map_size)
with env.begin(write=True) as txn:
# iterate over (shuffled) list of image files
for image_filename in self.dirlist:
print "Loading " + str(j) + "th image from disk - percentage complete: " + str((float(j)/num) * 100) + " %"
# open the image
with open(str(image_data_path + "/" + image_filename), 'rb') as f:
image = Image.open(f)
npimage = np.asarray(image, dtype=np.float64)
# discard alpha channel, if necessary
if npimage.shape[2] == 4:
npimage = npimage[:,:,:3]
print image_filename + " had its alpha channel removed."
# get category
if image_filename[:3] == '_D_':
category = 0
elif image_filename[:3] == '_P_':
category = 1
# wrap image data and label into a serializable data structure
datapoint = DataPoint(npimage, category)
serialized_datapoint = datapoint.serialize()
# a database key
str_id = '{:08}'.format(j)
# put the data point in the LMDB
txn.put(str_id.encode('ascii'), serialized_datapoint)
j+=1
Я также создал небольшую структуру данных для хранения изображений и меток и их сериализации, которая использовалась выше:
import numpy as np
class DataPoint(object):
def __init__(self, image=None, label=None, dtype=np.float64):
self.image = image
if self.image is not None:
self.image = self.image.astype(dtype)
self.label = label
def serialize(self):
image_string = self.image.tobytes()
label_string = chr(self.label)
datum_string = label_string + image_string
return datum_string
def deserialize(self, string):
image_string = string[1:]
label_string = string[:1]
image = np.fromstring(image_string, dtype=np.float64)
label = ord(label_string)
return DataPoint(image, label)
Вот ошибка:
/usr/bin/python2.7 /home/hal9000/PycharmProjects/Caffe_Experiments_0.6/gather_images.py
Loading 0th image from disk - percentage complete: 0.0 %
Traceback (most recent call last):
File "/home/hal9000/PycharmProjects/Caffe_Experiments_0.6/gather_images.py", line 69, in <module>
g = LoadImages(path)
File "/home/hal9000/PycharmProjects/Caffe_Experiments_0.6/gather_images.py", line 62, in __init__
txn.put(str_id.encode('ascii'), serialized_datapoint)
lmdb.MapFullError: mdb_put: MDB_MAP_FULL: Environment mapsize limit reached