933 lines
33 KiB
Python
933 lines
33 KiB
Python
# -*- coding: utf-8 -*-
|
|
# imageio is distributed under the terms of the (new) BSD License.
|
|
|
|
""" Plugin for reading DICOM files.
|
|
"""
|
|
|
|
# todo: Use pydicom:
|
|
# * Note: is not py3k ready yet
|
|
# * Allow reading the full meta info
|
|
# I think we can more or less replace the SimpleDicomReader with a
|
|
# pydicom.Dataset For series, only ned to read the full info from one
|
|
# file: speed still high
|
|
# * Perhaps allow writing?
|
|
|
|
import sys
|
|
import os
|
|
import struct
|
|
import logging
|
|
|
|
import numpy as np
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Determine endianity of system
|
|
sys_is_little_endian = sys.byteorder == "little"
|
|
|
|
# Define a dictionary that contains the tags that we would like to know
|
|
MINIDICT = {
|
|
(0x7FE0, 0x0010): ("PixelData", "OB"),
|
|
# Date and time
|
|
(0x0008, 0x0020): ("StudyDate", "DA"),
|
|
(0x0008, 0x0021): ("SeriesDate", "DA"),
|
|
(0x0008, 0x0022): ("AcquisitionDate", "DA"),
|
|
(0x0008, 0x0023): ("ContentDate", "DA"),
|
|
(0x0008, 0x0030): ("StudyTime", "TM"),
|
|
(0x0008, 0x0031): ("SeriesTime", "TM"),
|
|
(0x0008, 0x0032): ("AcquisitionTime", "TM"),
|
|
(0x0008, 0x0033): ("ContentTime", "TM"),
|
|
# With what, where, by whom?
|
|
(0x0008, 0x0060): ("Modality", "CS"),
|
|
(0x0008, 0x0070): ("Manufacturer", "LO"),
|
|
(0x0008, 0x0080): ("InstitutionName", "LO"),
|
|
# Descriptions
|
|
(0x0008, 0x1030): ("StudyDescription", "LO"),
|
|
(0x0008, 0x103E): ("SeriesDescription", "LO"),
|
|
# UID's
|
|
(0x0008, 0x0016): ("SOPClassUID", "UI"),
|
|
(0x0008, 0x0018): ("SOPInstanceUID", "UI"),
|
|
(0x0020, 0x000D): ("StudyInstanceUID", "UI"),
|
|
(0x0020, 0x000E): ("SeriesInstanceUID", "UI"),
|
|
(0x0008, 0x0117): ("ContextUID", "UI"),
|
|
# Numbers
|
|
(0x0020, 0x0011): ("SeriesNumber", "IS"),
|
|
(0x0020, 0x0012): ("AcquisitionNumber", "IS"),
|
|
(0x0020, 0x0013): ("InstanceNumber", "IS"),
|
|
(0x0020, 0x0014): ("IsotopeNumber", "IS"),
|
|
(0x0020, 0x0015): ("PhaseNumber", "IS"),
|
|
(0x0020, 0x0016): ("IntervalNumber", "IS"),
|
|
(0x0020, 0x0017): ("TimeSlotNumber", "IS"),
|
|
(0x0020, 0x0018): ("AngleNumber", "IS"),
|
|
(0x0020, 0x0019): ("ItemNumber", "IS"),
|
|
(0x0020, 0x0020): ("PatientOrientation", "CS"),
|
|
(0x0020, 0x0030): ("ImagePosition", "CS"),
|
|
(0x0020, 0x0032): ("ImagePositionPatient", "CS"),
|
|
(0x0020, 0x0035): ("ImageOrientation", "CS"),
|
|
(0x0020, 0x0037): ("ImageOrientationPatient", "CS"),
|
|
# Patient information
|
|
(0x0010, 0x0010): ("PatientName", "PN"),
|
|
(0x0010, 0x0020): ("PatientID", "LO"),
|
|
(0x0010, 0x0030): ("PatientBirthDate", "DA"),
|
|
(0x0010, 0x0040): ("PatientSex", "CS"),
|
|
(0x0010, 0x1010): ("PatientAge", "AS"),
|
|
(0x0010, 0x1020): ("PatientSize", "DS"),
|
|
(0x0010, 0x1030): ("PatientWeight", "DS"),
|
|
# Image specific (required to construct numpy array)
|
|
(0x0028, 0x0002): ("SamplesPerPixel", "US"),
|
|
(0x0028, 0x0008): ("NumberOfFrames", "IS"),
|
|
(0x0028, 0x0100): ("BitsAllocated", "US"),
|
|
(0x0028, 0x0101): ("BitsStored", "US"),
|
|
(0x0028, 0x0102): ("HighBit", "US"),
|
|
(0x0028, 0x0103): ("PixelRepresentation", "US"),
|
|
(0x0028, 0x0010): ("Rows", "US"),
|
|
(0x0028, 0x0011): ("Columns", "US"),
|
|
(0x0028, 0x1052): ("RescaleIntercept", "DS"),
|
|
(0x0028, 0x1053): ("RescaleSlope", "DS"),
|
|
# Image specific (for the user)
|
|
(0x0028, 0x0030): ("PixelSpacing", "DS"),
|
|
(0x0018, 0x0088): ("SliceSpacing", "DS"),
|
|
}
|
|
|
|
# Define some special tags:
|
|
# See PS 3.5-2008 section 7.5 (p.40)
|
|
ItemTag = (0xFFFE, 0xE000) # start of Sequence Item
|
|
ItemDelimiterTag = (0xFFFE, 0xE00D) # end of Sequence Item
|
|
SequenceDelimiterTag = (0xFFFE, 0xE0DD) # end of Sequence of undefined length
|
|
|
|
# Define set of groups that we're interested in (so we can quickly skip others)
|
|
GROUPS = set([key[0] for key in MINIDICT.keys()])
|
|
VRS = set([val[1] for val in MINIDICT.values()])
|
|
|
|
|
|
class NotADicomFile(Exception):
|
|
pass
|
|
|
|
|
|
class CompressedDicom(RuntimeError):
|
|
pass
|
|
|
|
|
|
class SimpleDicomReader(object):
|
|
"""
|
|
This class provides reading of pixel data from DICOM files. It is
|
|
focussed on getting the pixel data, not the meta info.
|
|
|
|
To use, first create an instance of this class (giving it
|
|
a file object or filename). Next use the info attribute to
|
|
get a dict of the meta data. The loading of pixel data is
|
|
deferred until get_numpy_array() is called.
|
|
|
|
Comparison with Pydicom
|
|
-----------------------
|
|
|
|
This code focusses on getting the pixel data out, which allows some
|
|
shortcuts, resulting in the code being much smaller.
|
|
|
|
Since the processing of data elements is much cheaper (it skips a lot
|
|
of tags), this code is about 3x faster than pydicom (except for the
|
|
deflated DICOM files).
|
|
|
|
This class does borrow some code (and ideas) from the pydicom
|
|
project, and (to the best of our knowledge) has the same limitations
|
|
as pydicom with regard to the type of files that it can handle.
|
|
|
|
Limitations
|
|
-----------
|
|
|
|
For more advanced DICOM processing, please check out pydicom.
|
|
|
|
* Only a predefined subset of data elements (meta information) is read.
|
|
* This is a reader; it can not write DICOM files.
|
|
* (just like pydicom) it can handle none of the compressed DICOM
|
|
formats except for "Deflated Explicit VR Little Endian"
|
|
(1.2.840.10008.1.2.1.99).
|
|
|
|
"""
|
|
|
|
def __init__(self, file):
|
|
# Open file if filename given
|
|
if isinstance(file, str):
|
|
self._filename = file
|
|
self._file = open(file, "rb")
|
|
else:
|
|
self._filename = "<unknown file>"
|
|
self._file = file
|
|
# Init variable to store position and size of pixel data
|
|
self._pixel_data_loc = None
|
|
# The meta header is always explicit and little endian
|
|
self.is_implicit_VR = False
|
|
self.is_little_endian = True
|
|
self._unpackPrefix = "<"
|
|
# Dict to store data elements of interest in
|
|
self._info = {}
|
|
# VR Conversion
|
|
self._converters = {
|
|
# Numbers
|
|
"US": lambda x: self._unpack("H", x),
|
|
"UL": lambda x: self._unpack("L", x),
|
|
# Numbers encoded as strings
|
|
"DS": lambda x: self._splitValues(x, float, "\\"),
|
|
"IS": lambda x: self._splitValues(x, int, "\\"),
|
|
# strings
|
|
"AS": lambda x: x.decode("ascii", "ignore").strip("\x00"),
|
|
"DA": lambda x: x.decode("ascii", "ignore").strip("\x00"),
|
|
"TM": lambda x: x.decode("ascii", "ignore").strip("\x00"),
|
|
"UI": lambda x: x.decode("ascii", "ignore").strip("\x00"),
|
|
"LO": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(),
|
|
"CS": lambda x: self._splitValues(x, float, "\\"),
|
|
"PN": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(),
|
|
}
|
|
|
|
# Initiate reading
|
|
self._read()
|
|
|
|
@property
|
|
def info(self):
|
|
return self._info
|
|
|
|
def _splitValues(self, x, type, splitter):
|
|
s = x.decode("ascii").strip("\x00")
|
|
try:
|
|
if splitter in s:
|
|
return tuple([type(v) for v in s.split(splitter) if v.strip()])
|
|
else:
|
|
return type(s)
|
|
except ValueError:
|
|
return s
|
|
|
|
def _unpack(self, fmt, value):
|
|
return struct.unpack(self._unpackPrefix + fmt, value)[0]
|
|
|
|
# Really only so we need minimal changes to _pixel_data_numpy
|
|
def __iter__(self):
|
|
return iter(self._info.keys())
|
|
|
|
def __getattr__(self, key):
|
|
info = object.__getattribute__(self, "_info")
|
|
if key in info:
|
|
return info[key]
|
|
return object.__getattribute__(self, key) # pragma: no cover
|
|
|
|
def _read(self):
|
|
f = self._file
|
|
# Check prefix after peamble
|
|
f.seek(128)
|
|
if f.read(4) != b"DICM":
|
|
raise NotADicomFile("Not a valid DICOM file.")
|
|
# Read
|
|
self._read_header()
|
|
self._read_data_elements()
|
|
self._get_shape_and_sampling()
|
|
# Close if done, reopen if necessary to read pixel data
|
|
if os.path.isfile(self._filename):
|
|
self._file.close()
|
|
self._file = None
|
|
|
|
def _readDataElement(self):
|
|
f = self._file
|
|
# Get group and element
|
|
group = self._unpack("H", f.read(2))
|
|
element = self._unpack("H", f.read(2))
|
|
# Get value length
|
|
if self.is_implicit_VR:
|
|
vl = self._unpack("I", f.read(4))
|
|
else:
|
|
vr = f.read(2)
|
|
if vr in (b"OB", b"OW", b"SQ", b"UN"):
|
|
reserved = f.read(2) # noqa
|
|
vl = self._unpack("I", f.read(4))
|
|
else:
|
|
vl = self._unpack("H", f.read(2))
|
|
# Get value
|
|
if group == 0x7FE0 and element == 0x0010:
|
|
here = f.tell()
|
|
self._pixel_data_loc = here, vl
|
|
f.seek(here + vl)
|
|
return group, element, b"Deferred loading of pixel data"
|
|
else:
|
|
if vl == 0xFFFFFFFF:
|
|
value = self._read_undefined_length_value()
|
|
else:
|
|
value = f.read(vl)
|
|
return group, element, value
|
|
|
|
def _read_undefined_length_value(self, read_size=128):
|
|
"""Copied (in compacted form) from PyDicom
|
|
Copyright Darcy Mason.
|
|
"""
|
|
fp = self._file
|
|
# data_start = fp.tell()
|
|
search_rewind = 3
|
|
bytes_to_find = struct.pack(
|
|
self._unpackPrefix + "HH", SequenceDelimiterTag[0], SequenceDelimiterTag[1]
|
|
)
|
|
|
|
found = False
|
|
value_chunks = []
|
|
while not found:
|
|
chunk_start = fp.tell()
|
|
bytes_read = fp.read(read_size)
|
|
if len(bytes_read) < read_size:
|
|
# try again,
|
|
# if still don't get required amount, this is last block
|
|
new_bytes = fp.read(read_size - len(bytes_read))
|
|
bytes_read += new_bytes
|
|
if len(bytes_read) < read_size:
|
|
raise EOFError(
|
|
"End of file reached before sequence " "delimiter found."
|
|
)
|
|
index = bytes_read.find(bytes_to_find)
|
|
if index != -1:
|
|
found = True
|
|
value_chunks.append(bytes_read[:index])
|
|
fp.seek(chunk_start + index + 4) # rewind to end of delimiter
|
|
length = fp.read(4)
|
|
if length != b"\0\0\0\0":
|
|
logger.warning(
|
|
"Expected 4 zero bytes after undefined length " "delimiter"
|
|
)
|
|
else:
|
|
fp.seek(fp.tell() - search_rewind) # rewind a bit
|
|
# accumulate the bytes read (not including the rewind)
|
|
value_chunks.append(bytes_read[:-search_rewind])
|
|
|
|
# if get here then have found the byte string
|
|
return b"".join(value_chunks)
|
|
|
|
def _read_header(self):
|
|
f = self._file
|
|
TransferSyntaxUID = None
|
|
|
|
# Read all elements, store transferSyntax when we encounter it
|
|
try:
|
|
while True:
|
|
fp_save = f.tell()
|
|
# Get element
|
|
group, element, value = self._readDataElement()
|
|
if group == 0x02:
|
|
if group == 0x02 and element == 0x10:
|
|
TransferSyntaxUID = value.decode("ascii").strip("\x00")
|
|
else:
|
|
# No more group 2: rewind and break
|
|
# (don't trust group length)
|
|
f.seek(fp_save)
|
|
break
|
|
except (EOFError, struct.error): # pragma: no cover
|
|
raise RuntimeError("End of file reached while still in header.")
|
|
|
|
# Handle transfer syntax
|
|
self._info["TransferSyntaxUID"] = TransferSyntaxUID
|
|
#
|
|
if TransferSyntaxUID is None:
|
|
# Assume ExplicitVRLittleEndian
|
|
is_implicit_VR, is_little_endian = False, True
|
|
elif TransferSyntaxUID == "1.2.840.10008.1.2.1":
|
|
# ExplicitVRLittleEndian
|
|
is_implicit_VR, is_little_endian = False, True
|
|
elif TransferSyntaxUID == "1.2.840.10008.1.2.2":
|
|
# ExplicitVRBigEndian
|
|
is_implicit_VR, is_little_endian = False, False
|
|
elif TransferSyntaxUID == "1.2.840.10008.1.2":
|
|
# implicit VR little endian
|
|
is_implicit_VR, is_little_endian = True, True
|
|
elif TransferSyntaxUID == "1.2.840.10008.1.2.1.99":
|
|
# DeflatedExplicitVRLittleEndian:
|
|
is_implicit_VR, is_little_endian = False, True
|
|
self._inflate()
|
|
else:
|
|
# http://www.dicomlibrary.com/dicom/transfer-syntax/
|
|
t, extra_info = TransferSyntaxUID, ""
|
|
if "1.2.840.10008.1.2.4.50" <= t < "1.2.840.10008.1.2.4.99":
|
|
extra_info = " (JPEG)"
|
|
if "1.2.840.10008.1.2.4.90" <= t < "1.2.840.10008.1.2.4.99":
|
|
extra_info = " (JPEG 2000)"
|
|
if t == "1.2.840.10008.1.2.5":
|
|
extra_info = " (RLE)"
|
|
if t == "1.2.840.10008.1.2.6.1":
|
|
extra_info = " (RFC 2557)"
|
|
raise CompressedDicom(
|
|
"The dicom reader can only read files with "
|
|
"uncompressed image data - not %r%s. You "
|
|
"can try using dcmtk or gdcm to convert the "
|
|
"image." % (t, extra_info)
|
|
)
|
|
|
|
# From hereon, use implicit/explicit big/little endian
|
|
self.is_implicit_VR = is_implicit_VR
|
|
self.is_little_endian = is_little_endian
|
|
self._unpackPrefix = "><"[is_little_endian]
|
|
|
|
def _read_data_elements(self):
|
|
info = self._info
|
|
try:
|
|
while True:
|
|
# Get element
|
|
group, element, value = self._readDataElement()
|
|
# Is it a group we are interested in?
|
|
if group in GROUPS:
|
|
key = (group, element)
|
|
name, vr = MINIDICT.get(key, (None, None))
|
|
# Is it an element we are interested in?
|
|
if name:
|
|
# Store value
|
|
converter = self._converters.get(vr, lambda x: x)
|
|
info[name] = converter(value)
|
|
except (EOFError, struct.error):
|
|
pass # end of file ...
|
|
|
|
def get_numpy_array(self):
|
|
"""Get numpy arra for this DICOM file, with the correct shape,
|
|
and pixel values scaled appropriately.
|
|
"""
|
|
# Is there pixel data at all?
|
|
if "PixelData" not in self:
|
|
raise TypeError("No pixel data found in this dataset.")
|
|
|
|
# Load it now if it was not already loaded
|
|
if self._pixel_data_loc and len(self.PixelData) < 100:
|
|
# Reopen file?
|
|
close_file = False
|
|
if self._file is None:
|
|
close_file = True
|
|
self._file = open(self._filename, "rb")
|
|
# Read data
|
|
self._file.seek(self._pixel_data_loc[0])
|
|
if self._pixel_data_loc[1] == 0xFFFFFFFF:
|
|
value = self._read_undefined_length_value()
|
|
else:
|
|
value = self._file.read(self._pixel_data_loc[1])
|
|
# Close file
|
|
if close_file:
|
|
self._file.close()
|
|
self._file = None
|
|
# Overwrite
|
|
self._info["PixelData"] = value
|
|
|
|
# Get data
|
|
data = self._pixel_data_numpy()
|
|
data = self._apply_slope_and_offset(data)
|
|
|
|
# Remove data again to preserve memory
|
|
# Note that the data for the original file is loaded twice ...
|
|
self._info["PixelData"] = (
|
|
b"Data converted to numpy array, " + b"raw data removed to preserve memory"
|
|
)
|
|
return data
|
|
|
|
def _get_shape_and_sampling(self):
|
|
"""Get shape and sampling without actuall using the pixel data.
|
|
In this way, the user can get an idea what's inside without having
|
|
to load it.
|
|
"""
|
|
# Get shape (in the same way that pydicom does)
|
|
if "NumberOfFrames" in self and self.NumberOfFrames > 1:
|
|
if self.SamplesPerPixel > 1:
|
|
shape = (
|
|
self.SamplesPerPixel,
|
|
self.NumberOfFrames,
|
|
self.Rows,
|
|
self.Columns,
|
|
)
|
|
else:
|
|
shape = self.NumberOfFrames, self.Rows, self.Columns
|
|
elif "SamplesPerPixel" in self:
|
|
if self.SamplesPerPixel > 1:
|
|
if self.BitsAllocated == 8:
|
|
shape = self.SamplesPerPixel, self.Rows, self.Columns
|
|
else:
|
|
raise NotImplementedError(
|
|
"DICOM plugin only handles "
|
|
"SamplesPerPixel > 1 if Bits "
|
|
"Allocated = 8"
|
|
)
|
|
else:
|
|
shape = self.Rows, self.Columns
|
|
else:
|
|
raise RuntimeError(
|
|
"DICOM file has no SamplesPerPixel " "(perhaps this is a report?)"
|
|
)
|
|
|
|
# Try getting sampling between pixels
|
|
if "PixelSpacing" in self:
|
|
sampling = float(self.PixelSpacing[0]), float(self.PixelSpacing[1])
|
|
else:
|
|
sampling = 1.0, 1.0
|
|
if "SliceSpacing" in self:
|
|
sampling = (abs(self.SliceSpacing),) + sampling
|
|
|
|
# Ensure that sampling has as many elements as shape
|
|
sampling = (1.0,) * (len(shape) - len(sampling)) + sampling[-len(shape) :]
|
|
|
|
# Set shape and sampling
|
|
self._info["shape"] = shape
|
|
self._info["sampling"] = sampling
|
|
|
|
def _pixel_data_numpy(self):
|
|
"""Return a NumPy array of the pixel data."""
|
|
# Taken from pydicom
|
|
# Copyright (c) 2008-2012 Darcy Mason
|
|
|
|
if "PixelData" not in self:
|
|
raise TypeError("No pixel data found in this dataset.")
|
|
|
|
# determine the type used for the array
|
|
need_byteswap = self.is_little_endian != sys_is_little_endian
|
|
|
|
# Make NumPy format code, e.g. "uint16", "int32" etc
|
|
# from two pieces of info:
|
|
# self.PixelRepresentation -- 0 for unsigned, 1 for signed;
|
|
# self.BitsAllocated -- 8, 16, or 32
|
|
format_str = "%sint%d" % (
|
|
("u", "")[self.PixelRepresentation],
|
|
self.BitsAllocated,
|
|
)
|
|
try:
|
|
numpy_format = np.dtype(format_str)
|
|
except TypeError: # pragma: no cover
|
|
raise TypeError(
|
|
"Data type not understood by NumPy: format='%s', "
|
|
" PixelRepresentation=%d, BitsAllocated=%d"
|
|
% (numpy_format, self.PixelRepresentation, self.BitsAllocated)
|
|
)
|
|
|
|
# Have correct Numpy format, so create the NumPy array
|
|
arr = np.frombuffer(self.PixelData, numpy_format).copy()
|
|
|
|
# XXX byte swap - may later handle this in read_file!!?
|
|
if need_byteswap:
|
|
arr.byteswap(True) # True means swap in-place, don't make new copy
|
|
|
|
# Note the following reshape operations return a new *view* onto arr,
|
|
# but don't copy the data
|
|
arr = arr.reshape(*self._info["shape"])
|
|
return arr
|
|
|
|
def _apply_slope_and_offset(self, data):
|
|
"""
|
|
If RescaleSlope and RescaleIntercept are present in the data,
|
|
apply them. The data type of the data is changed if necessary.
|
|
"""
|
|
# Obtain slope and offset
|
|
slope, offset = 1, 0
|
|
needFloats, needApplySlopeOffset = False, False
|
|
if "RescaleSlope" in self:
|
|
needApplySlopeOffset = True
|
|
slope = self.RescaleSlope
|
|
if "RescaleIntercept" in self:
|
|
needApplySlopeOffset = True
|
|
offset = self.RescaleIntercept
|
|
if int(slope) != slope or int(offset) != offset:
|
|
needFloats = True
|
|
if not needFloats:
|
|
slope, offset = int(slope), int(offset)
|
|
|
|
# Apply slope and offset
|
|
if needApplySlopeOffset:
|
|
# Maybe we need to change the datatype?
|
|
if data.dtype in [np.float32, np.float64]:
|
|
pass
|
|
elif needFloats:
|
|
data = data.astype(np.float32)
|
|
else:
|
|
# Determine required range
|
|
minReq, maxReq = data.min(), data.max()
|
|
minReq = min([minReq, minReq * slope + offset, maxReq * slope + offset])
|
|
maxReq = max([maxReq, minReq * slope + offset, maxReq * slope + offset])
|
|
|
|
# Determine required datatype from that
|
|
dtype = None
|
|
if minReq < 0:
|
|
# Signed integer type
|
|
maxReq = max([-minReq, maxReq])
|
|
if maxReq < 2**7:
|
|
dtype = np.int8
|
|
elif maxReq < 2**15:
|
|
dtype = np.int16
|
|
elif maxReq < 2**31:
|
|
dtype = np.int32
|
|
else:
|
|
dtype = np.float32
|
|
else:
|
|
# Unsigned integer type
|
|
if maxReq < 2**8:
|
|
dtype = np.int8
|
|
elif maxReq < 2**16:
|
|
dtype = np.int16
|
|
elif maxReq < 2**32:
|
|
dtype = np.int32
|
|
else:
|
|
dtype = np.float32
|
|
# Change datatype
|
|
if dtype != data.dtype:
|
|
data = data.astype(dtype)
|
|
|
|
# Apply slope and offset
|
|
data *= slope
|
|
data += offset
|
|
|
|
# Done
|
|
return data
|
|
|
|
def _inflate(self):
|
|
# Taken from pydicom
|
|
# Copyright (c) 2008-2012 Darcy Mason
|
|
import zlib
|
|
from io import BytesIO
|
|
|
|
# See PS3.6-2008 A.5 (p 71) -- when written, the entire dataset
|
|
# following the file metadata was prepared the normal way,
|
|
# then "deflate" compression applied.
|
|
# All that is needed here is to decompress and then
|
|
# use as normal in a file-like object
|
|
zipped = self._file.read()
|
|
# -MAX_WBITS part is from comp.lang.python answer:
|
|
# groups.google.com/group/comp.lang.python/msg/e95b3b38a71e6799
|
|
unzipped = zlib.decompress(zipped, -zlib.MAX_WBITS)
|
|
self._file = BytesIO(unzipped) # a file-like object
|
|
|
|
|
|
class DicomSeries(object):
|
|
"""DicomSeries
|
|
This class represents a serie of dicom files (SimpleDicomReader
|
|
objects) that belong together. If these are multiple files, they
|
|
represent the slices of a volume (like for CT or MRI).
|
|
"""
|
|
|
|
def __init__(self, suid, progressIndicator):
|
|
# Init dataset list and the callback
|
|
self._entries = []
|
|
|
|
# Init props
|
|
self._suid = suid
|
|
self._info = {}
|
|
self._progressIndicator = progressIndicator
|
|
|
|
def __len__(self):
|
|
return len(self._entries)
|
|
|
|
def __iter__(self):
|
|
return iter(self._entries)
|
|
|
|
def __getitem__(self, index):
|
|
return self._entries[index]
|
|
|
|
@property
|
|
def suid(self):
|
|
return self._suid
|
|
|
|
@property
|
|
def shape(self):
|
|
"""The shape of the data (nz, ny, nx)."""
|
|
return self._info["shape"]
|
|
|
|
@property
|
|
def sampling(self):
|
|
"""The sampling (voxel distances) of the data (dz, dy, dx)."""
|
|
return self._info["sampling"]
|
|
|
|
@property
|
|
def info(self):
|
|
"""A dictionary containing the information as present in the
|
|
first dicomfile of this serie. None if there are no entries."""
|
|
return self._info
|
|
|
|
@property
|
|
def description(self):
|
|
"""A description of the dicom series. Used fields are
|
|
PatientName, shape of the data, SeriesDescription, and
|
|
ImageComments.
|
|
"""
|
|
info = self.info
|
|
|
|
# If no info available, return simple description
|
|
if not info: # pragma: no cover
|
|
return "DicomSeries containing %i images" % len(self)
|
|
|
|
fields = []
|
|
# Give patient name
|
|
if "PatientName" in info:
|
|
fields.append("" + info["PatientName"])
|
|
# Also add dimensions
|
|
if self.shape:
|
|
tmp = [str(d) for d in self.shape]
|
|
fields.append("x".join(tmp))
|
|
# Try adding more fields
|
|
if "SeriesDescription" in info:
|
|
fields.append("'" + info["SeriesDescription"] + "'")
|
|
if "ImageComments" in info:
|
|
fields.append("'" + info["ImageComments"] + "'")
|
|
|
|
# Combine
|
|
return " ".join(fields)
|
|
|
|
def __repr__(self):
|
|
adr = hex(id(self)).upper()
|
|
return "<DicomSeries with %i images at %s>" % (len(self), adr)
|
|
|
|
def get_numpy_array(self):
|
|
"""Get (load) the data that this DicomSeries represents, and return
|
|
it as a numpy array. If this serie contains multiple images, the
|
|
resulting array is 3D, otherwise it's 2D.
|
|
"""
|
|
|
|
# It's easy if no file or if just a single file
|
|
if len(self) == 0:
|
|
raise ValueError("Serie does not contain any files.")
|
|
elif len(self) == 1:
|
|
return self[0].get_numpy_array()
|
|
|
|
# Check info
|
|
if self.info is None:
|
|
raise RuntimeError("Cannot return volume if series not finished.")
|
|
|
|
# Init data (using what the dicom packaged produces as a reference)
|
|
slice = self[0].get_numpy_array()
|
|
vol = np.zeros(self.shape, dtype=slice.dtype)
|
|
vol[0] = slice
|
|
|
|
# Fill volume
|
|
self._progressIndicator.start("loading data", "", len(self))
|
|
for z in range(1, len(self)):
|
|
vol[z] = self[z].get_numpy_array()
|
|
self._progressIndicator.set_progress(z + 1)
|
|
self._progressIndicator.finish()
|
|
|
|
# Done
|
|
import gc
|
|
|
|
gc.collect()
|
|
return vol
|
|
|
|
def _append(self, dcm):
|
|
self._entries.append(dcm)
|
|
|
|
def _sort(self):
|
|
self._entries.sort(
|
|
key=lambda k: (
|
|
k.InstanceNumber,
|
|
(
|
|
k.ImagePositionPatient[2]
|
|
if hasattr(k, "ImagePositionPatient")
|
|
else None
|
|
),
|
|
)
|
|
)
|
|
|
|
def _finish(self):
|
|
"""
|
|
Evaluate the series of dicom files. Together they should make up
|
|
a volumetric dataset. This means the files should meet certain
|
|
conditions. Also some additional information has to be calculated,
|
|
such as the distance between the slices. This method sets the
|
|
attributes for "shape", "sampling" and "info".
|
|
|
|
This method checks:
|
|
* that there are no missing files
|
|
* that the dimensions of all images match
|
|
* that the pixel spacing of all images match
|
|
"""
|
|
|
|
# The datasets list should be sorted by instance number
|
|
L = self._entries
|
|
if len(L) == 0:
|
|
return
|
|
elif len(L) == 1:
|
|
self._info = L[0].info
|
|
return
|
|
|
|
# Get previous
|
|
ds1 = L[0]
|
|
# Init measures to calculate average of
|
|
distance_sum = 0.0
|
|
# Init measures to check (these are in 2D)
|
|
dimensions = ds1.Rows, ds1.Columns
|
|
# sampling = float(ds1.PixelSpacing[0]), float(ds1.PixelSpacing[1])
|
|
sampling = ds1.info["sampling"][:2] # row, column
|
|
|
|
for index in range(len(L)):
|
|
# The first round ds1 and ds2 will be the same, for the
|
|
# distance calculation this does not matter
|
|
# Get current
|
|
ds2 = L[index]
|
|
# Get positions
|
|
pos1 = float(ds1.ImagePositionPatient[2])
|
|
pos2 = float(ds2.ImagePositionPatient[2])
|
|
# Update distance_sum to calculate distance later
|
|
distance_sum += abs(pos1 - pos2)
|
|
# Test measures
|
|
dimensions2 = ds2.Rows, ds2.Columns
|
|
# sampling2 = float(ds2.PixelSpacing[0]), float(ds2.PixelSpacing[1])
|
|
sampling2 = ds2.info["sampling"][:2] # row, column
|
|
if dimensions != dimensions2:
|
|
# We cannot produce a volume if the dimensions match
|
|
raise ValueError("Dimensions of slices does not match.")
|
|
if sampling != sampling2:
|
|
# We can still produce a volume, but we should notify the user
|
|
self._progressIndicator.write("Warn: sampling does not match.")
|
|
# Store previous
|
|
ds1 = ds2
|
|
|
|
# Finish calculating average distance
|
|
# (Note that there are len(L)-1 distances)
|
|
distance_mean = distance_sum / (len(L) - 1)
|
|
|
|
# Set info dict
|
|
self._info = L[0].info.copy()
|
|
|
|
# Store information that is specific for the serie
|
|
self._info["shape"] = (len(L),) + ds2.info["shape"]
|
|
self._info["sampling"] = (distance_mean,) + ds2.info["sampling"]
|
|
|
|
|
|
def list_files(files, path):
|
|
"""List all files in the directory, recursively."""
|
|
for item in os.listdir(path):
|
|
item = os.path.join(path, item)
|
|
if os.path.isdir(item):
|
|
list_files(files, item)
|
|
elif os.path.isfile(item):
|
|
files.append(item)
|
|
|
|
|
|
def process_directory(request, progressIndicator, readPixelData=False):
|
|
"""
|
|
Reads dicom files and returns a list of DicomSeries objects, which
|
|
contain information about the data, and can be used to load the
|
|
image or volume data.
|
|
|
|
if readPixelData is True, the pixel data of all series is read. By
|
|
default the loading of pixeldata is deferred until it is requested
|
|
using the DicomSeries.get_pixel_array() method. In general, both
|
|
methods should be equally fast.
|
|
"""
|
|
# Get directory to examine
|
|
if os.path.isdir(request.filename):
|
|
path = request.filename
|
|
elif os.path.isfile(request.filename):
|
|
path = os.path.dirname(request.filename)
|
|
else: # pragma: no cover - tested earlier
|
|
raise ValueError("Dicom plugin needs a valid filename to examine the directory")
|
|
|
|
# Check files
|
|
files = []
|
|
list_files(files, path) # Find files recursively
|
|
|
|
# Gather file data and put in DicomSeries
|
|
series = {}
|
|
count = 0
|
|
progressIndicator.start("examining files", "files", len(files))
|
|
for filename in files:
|
|
# Show progress (note that we always start with a 0.0)
|
|
count += 1
|
|
progressIndicator.set_progress(count)
|
|
# Skip DICOMDIR files
|
|
if filename.count("DICOMDIR"): # pragma: no cover
|
|
continue
|
|
# Try loading dicom ...
|
|
try:
|
|
dcm = SimpleDicomReader(filename)
|
|
except NotADicomFile:
|
|
continue # skip non-dicom file
|
|
except Exception as why: # pragma: no cover
|
|
progressIndicator.write(str(why))
|
|
continue
|
|
# Get SUID and register the file with an existing or new series object
|
|
try:
|
|
suid = dcm.SeriesInstanceUID
|
|
except AttributeError: # pragma: no cover
|
|
continue # some other kind of dicom file
|
|
if suid not in series:
|
|
series[suid] = DicomSeries(suid, progressIndicator)
|
|
series[suid]._append(dcm)
|
|
|
|
# Finish progress
|
|
# progressIndicator.finish('Found %i series.' % len(series))
|
|
|
|
# Make a list and sort, so that the order is deterministic
|
|
series = list(series.values())
|
|
series.sort(key=lambda x: x.suid)
|
|
|
|
# Split series if necessary
|
|
for serie in reversed([serie for serie in series]):
|
|
splitSerieIfRequired(serie, series, progressIndicator)
|
|
|
|
# Finish all series
|
|
# progressIndicator.start('analyse series', '', len(series))
|
|
series_ = []
|
|
for i in range(len(series)):
|
|
try:
|
|
series[i]._finish()
|
|
series_.append(series[i])
|
|
except Exception as err: # pragma: no cover
|
|
progressIndicator.write(str(err))
|
|
pass # Skip serie (probably report-like file without pixels)
|
|
# progressIndicator.set_progress(i+1)
|
|
progressIndicator.finish("Found %i correct series." % len(series_))
|
|
|
|
# Done
|
|
return series_
|
|
|
|
|
|
def splitSerieIfRequired(serie, series, progressIndicator):
|
|
"""
|
|
Split the serie in multiple series if this is required. The choice
|
|
is based on examing the image position relative to the previous
|
|
image. If it differs too much, it is assumed that there is a new
|
|
dataset. This can happen for example in unspitted gated CT data.
|
|
"""
|
|
|
|
# Sort the original list and get local name
|
|
serie._sort()
|
|
L = serie._entries
|
|
# Init previous slice
|
|
ds1 = L[0]
|
|
# Check whether we can do this
|
|
if "ImagePositionPatient" not in ds1:
|
|
return
|
|
# Initialize a list of new lists
|
|
L2 = [[ds1]]
|
|
# Init slice distance estimate
|
|
distance = 0
|
|
|
|
for index in range(1, len(L)):
|
|
# Get current slice
|
|
ds2 = L[index]
|
|
# Get positions
|
|
pos1 = float(ds1.ImagePositionPatient[2])
|
|
pos2 = float(ds2.ImagePositionPatient[2])
|
|
# Get distances
|
|
newDist = abs(pos1 - pos2)
|
|
# deltaDist = abs(firstPos-pos2)
|
|
# If the distance deviates more than 2x from what we've seen,
|
|
# we can agree it's a new dataset.
|
|
if distance and newDist > 2.1 * distance:
|
|
L2.append([])
|
|
distance = 0
|
|
else:
|
|
# Test missing file
|
|
if distance and newDist > 1.5 * distance:
|
|
progressIndicator.write(
|
|
"Warning: missing file after %r" % ds1._filename
|
|
)
|
|
distance = newDist
|
|
# Add to last list
|
|
L2[-1].append(ds2)
|
|
# Store previous
|
|
ds1 = ds2
|
|
|
|
# Split if we should
|
|
if len(L2) > 1:
|
|
# At what position are we now?
|
|
i = series.index(serie)
|
|
# Create new series
|
|
series2insert = []
|
|
for L in L2:
|
|
newSerie = DicomSeries(serie.suid, progressIndicator)
|
|
newSerie._entries = L
|
|
series2insert.append(newSerie)
|
|
# Insert series and remove self
|
|
for newSerie in reversed(series2insert):
|
|
series.insert(i, newSerie)
|
|
series.remove(serie)
|