ai-content-maker/.venv/Lib/site-packages/imageio/plugins/_dicom.py

933 lines
33 KiB
Python

# -*- coding: utf-8 -*-
# imageio is distributed under the terms of the (new) BSD License.
""" Plugin for reading DICOM files.
"""
# todo: Use pydicom:
# * Note: is not py3k ready yet
# * Allow reading the full meta info
# I think we can more or less replace the SimpleDicomReader with a
# pydicom.Dataset For series, only ned to read the full info from one
# file: speed still high
# * Perhaps allow writing?
import sys
import os
import struct
import logging
import numpy as np
logger = logging.getLogger(__name__)
# Determine endianity of system
sys_is_little_endian = sys.byteorder == "little"
# Define a dictionary that contains the tags that we would like to know
MINIDICT = {
(0x7FE0, 0x0010): ("PixelData", "OB"),
# Date and time
(0x0008, 0x0020): ("StudyDate", "DA"),
(0x0008, 0x0021): ("SeriesDate", "DA"),
(0x0008, 0x0022): ("AcquisitionDate", "DA"),
(0x0008, 0x0023): ("ContentDate", "DA"),
(0x0008, 0x0030): ("StudyTime", "TM"),
(0x0008, 0x0031): ("SeriesTime", "TM"),
(0x0008, 0x0032): ("AcquisitionTime", "TM"),
(0x0008, 0x0033): ("ContentTime", "TM"),
# With what, where, by whom?
(0x0008, 0x0060): ("Modality", "CS"),
(0x0008, 0x0070): ("Manufacturer", "LO"),
(0x0008, 0x0080): ("InstitutionName", "LO"),
# Descriptions
(0x0008, 0x1030): ("StudyDescription", "LO"),
(0x0008, 0x103E): ("SeriesDescription", "LO"),
# UID's
(0x0008, 0x0016): ("SOPClassUID", "UI"),
(0x0008, 0x0018): ("SOPInstanceUID", "UI"),
(0x0020, 0x000D): ("StudyInstanceUID", "UI"),
(0x0020, 0x000E): ("SeriesInstanceUID", "UI"),
(0x0008, 0x0117): ("ContextUID", "UI"),
# Numbers
(0x0020, 0x0011): ("SeriesNumber", "IS"),
(0x0020, 0x0012): ("AcquisitionNumber", "IS"),
(0x0020, 0x0013): ("InstanceNumber", "IS"),
(0x0020, 0x0014): ("IsotopeNumber", "IS"),
(0x0020, 0x0015): ("PhaseNumber", "IS"),
(0x0020, 0x0016): ("IntervalNumber", "IS"),
(0x0020, 0x0017): ("TimeSlotNumber", "IS"),
(0x0020, 0x0018): ("AngleNumber", "IS"),
(0x0020, 0x0019): ("ItemNumber", "IS"),
(0x0020, 0x0020): ("PatientOrientation", "CS"),
(0x0020, 0x0030): ("ImagePosition", "CS"),
(0x0020, 0x0032): ("ImagePositionPatient", "CS"),
(0x0020, 0x0035): ("ImageOrientation", "CS"),
(0x0020, 0x0037): ("ImageOrientationPatient", "CS"),
# Patient information
(0x0010, 0x0010): ("PatientName", "PN"),
(0x0010, 0x0020): ("PatientID", "LO"),
(0x0010, 0x0030): ("PatientBirthDate", "DA"),
(0x0010, 0x0040): ("PatientSex", "CS"),
(0x0010, 0x1010): ("PatientAge", "AS"),
(0x0010, 0x1020): ("PatientSize", "DS"),
(0x0010, 0x1030): ("PatientWeight", "DS"),
# Image specific (required to construct numpy array)
(0x0028, 0x0002): ("SamplesPerPixel", "US"),
(0x0028, 0x0008): ("NumberOfFrames", "IS"),
(0x0028, 0x0100): ("BitsAllocated", "US"),
(0x0028, 0x0101): ("BitsStored", "US"),
(0x0028, 0x0102): ("HighBit", "US"),
(0x0028, 0x0103): ("PixelRepresentation", "US"),
(0x0028, 0x0010): ("Rows", "US"),
(0x0028, 0x0011): ("Columns", "US"),
(0x0028, 0x1052): ("RescaleIntercept", "DS"),
(0x0028, 0x1053): ("RescaleSlope", "DS"),
# Image specific (for the user)
(0x0028, 0x0030): ("PixelSpacing", "DS"),
(0x0018, 0x0088): ("SliceSpacing", "DS"),
}
# Define some special tags:
# See PS 3.5-2008 section 7.5 (p.40)
ItemTag = (0xFFFE, 0xE000) # start of Sequence Item
ItemDelimiterTag = (0xFFFE, 0xE00D) # end of Sequence Item
SequenceDelimiterTag = (0xFFFE, 0xE0DD) # end of Sequence of undefined length
# Define set of groups that we're interested in (so we can quickly skip others)
GROUPS = set([key[0] for key in MINIDICT.keys()])
VRS = set([val[1] for val in MINIDICT.values()])
class NotADicomFile(Exception):
pass
class CompressedDicom(RuntimeError):
pass
class SimpleDicomReader(object):
"""
This class provides reading of pixel data from DICOM files. It is
focussed on getting the pixel data, not the meta info.
To use, first create an instance of this class (giving it
a file object or filename). Next use the info attribute to
get a dict of the meta data. The loading of pixel data is
deferred until get_numpy_array() is called.
Comparison with Pydicom
-----------------------
This code focusses on getting the pixel data out, which allows some
shortcuts, resulting in the code being much smaller.
Since the processing of data elements is much cheaper (it skips a lot
of tags), this code is about 3x faster than pydicom (except for the
deflated DICOM files).
This class does borrow some code (and ideas) from the pydicom
project, and (to the best of our knowledge) has the same limitations
as pydicom with regard to the type of files that it can handle.
Limitations
-----------
For more advanced DICOM processing, please check out pydicom.
* Only a predefined subset of data elements (meta information) is read.
* This is a reader; it can not write DICOM files.
* (just like pydicom) it can handle none of the compressed DICOM
formats except for "Deflated Explicit VR Little Endian"
(1.2.840.10008.1.2.1.99).
"""
def __init__(self, file):
# Open file if filename given
if isinstance(file, str):
self._filename = file
self._file = open(file, "rb")
else:
self._filename = "<unknown file>"
self._file = file
# Init variable to store position and size of pixel data
self._pixel_data_loc = None
# The meta header is always explicit and little endian
self.is_implicit_VR = False
self.is_little_endian = True
self._unpackPrefix = "<"
# Dict to store data elements of interest in
self._info = {}
# VR Conversion
self._converters = {
# Numbers
"US": lambda x: self._unpack("H", x),
"UL": lambda x: self._unpack("L", x),
# Numbers encoded as strings
"DS": lambda x: self._splitValues(x, float, "\\"),
"IS": lambda x: self._splitValues(x, int, "\\"),
# strings
"AS": lambda x: x.decode("ascii", "ignore").strip("\x00"),
"DA": lambda x: x.decode("ascii", "ignore").strip("\x00"),
"TM": lambda x: x.decode("ascii", "ignore").strip("\x00"),
"UI": lambda x: x.decode("ascii", "ignore").strip("\x00"),
"LO": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(),
"CS": lambda x: self._splitValues(x, float, "\\"),
"PN": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(),
}
# Initiate reading
self._read()
@property
def info(self):
return self._info
def _splitValues(self, x, type, splitter):
s = x.decode("ascii").strip("\x00")
try:
if splitter in s:
return tuple([type(v) for v in s.split(splitter) if v.strip()])
else:
return type(s)
except ValueError:
return s
def _unpack(self, fmt, value):
return struct.unpack(self._unpackPrefix + fmt, value)[0]
# Really only so we need minimal changes to _pixel_data_numpy
def __iter__(self):
return iter(self._info.keys())
def __getattr__(self, key):
info = object.__getattribute__(self, "_info")
if key in info:
return info[key]
return object.__getattribute__(self, key) # pragma: no cover
def _read(self):
f = self._file
# Check prefix after peamble
f.seek(128)
if f.read(4) != b"DICM":
raise NotADicomFile("Not a valid DICOM file.")
# Read
self._read_header()
self._read_data_elements()
self._get_shape_and_sampling()
# Close if done, reopen if necessary to read pixel data
if os.path.isfile(self._filename):
self._file.close()
self._file = None
def _readDataElement(self):
f = self._file
# Get group and element
group = self._unpack("H", f.read(2))
element = self._unpack("H", f.read(2))
# Get value length
if self.is_implicit_VR:
vl = self._unpack("I", f.read(4))
else:
vr = f.read(2)
if vr in (b"OB", b"OW", b"SQ", b"UN"):
reserved = f.read(2) # noqa
vl = self._unpack("I", f.read(4))
else:
vl = self._unpack("H", f.read(2))
# Get value
if group == 0x7FE0 and element == 0x0010:
here = f.tell()
self._pixel_data_loc = here, vl
f.seek(here + vl)
return group, element, b"Deferred loading of pixel data"
else:
if vl == 0xFFFFFFFF:
value = self._read_undefined_length_value()
else:
value = f.read(vl)
return group, element, value
def _read_undefined_length_value(self, read_size=128):
"""Copied (in compacted form) from PyDicom
Copyright Darcy Mason.
"""
fp = self._file
# data_start = fp.tell()
search_rewind = 3
bytes_to_find = struct.pack(
self._unpackPrefix + "HH", SequenceDelimiterTag[0], SequenceDelimiterTag[1]
)
found = False
value_chunks = []
while not found:
chunk_start = fp.tell()
bytes_read = fp.read(read_size)
if len(bytes_read) < read_size:
# try again,
# if still don't get required amount, this is last block
new_bytes = fp.read(read_size - len(bytes_read))
bytes_read += new_bytes
if len(bytes_read) < read_size:
raise EOFError(
"End of file reached before sequence " "delimiter found."
)
index = bytes_read.find(bytes_to_find)
if index != -1:
found = True
value_chunks.append(bytes_read[:index])
fp.seek(chunk_start + index + 4) # rewind to end of delimiter
length = fp.read(4)
if length != b"\0\0\0\0":
logger.warning(
"Expected 4 zero bytes after undefined length " "delimiter"
)
else:
fp.seek(fp.tell() - search_rewind) # rewind a bit
# accumulate the bytes read (not including the rewind)
value_chunks.append(bytes_read[:-search_rewind])
# if get here then have found the byte string
return b"".join(value_chunks)
def _read_header(self):
f = self._file
TransferSyntaxUID = None
# Read all elements, store transferSyntax when we encounter it
try:
while True:
fp_save = f.tell()
# Get element
group, element, value = self._readDataElement()
if group == 0x02:
if group == 0x02 and element == 0x10:
TransferSyntaxUID = value.decode("ascii").strip("\x00")
else:
# No more group 2: rewind and break
# (don't trust group length)
f.seek(fp_save)
break
except (EOFError, struct.error): # pragma: no cover
raise RuntimeError("End of file reached while still in header.")
# Handle transfer syntax
self._info["TransferSyntaxUID"] = TransferSyntaxUID
#
if TransferSyntaxUID is None:
# Assume ExplicitVRLittleEndian
is_implicit_VR, is_little_endian = False, True
elif TransferSyntaxUID == "1.2.840.10008.1.2.1":
# ExplicitVRLittleEndian
is_implicit_VR, is_little_endian = False, True
elif TransferSyntaxUID == "1.2.840.10008.1.2.2":
# ExplicitVRBigEndian
is_implicit_VR, is_little_endian = False, False
elif TransferSyntaxUID == "1.2.840.10008.1.2":
# implicit VR little endian
is_implicit_VR, is_little_endian = True, True
elif TransferSyntaxUID == "1.2.840.10008.1.2.1.99":
# DeflatedExplicitVRLittleEndian:
is_implicit_VR, is_little_endian = False, True
self._inflate()
else:
# http://www.dicomlibrary.com/dicom/transfer-syntax/
t, extra_info = TransferSyntaxUID, ""
if "1.2.840.10008.1.2.4.50" <= t < "1.2.840.10008.1.2.4.99":
extra_info = " (JPEG)"
if "1.2.840.10008.1.2.4.90" <= t < "1.2.840.10008.1.2.4.99":
extra_info = " (JPEG 2000)"
if t == "1.2.840.10008.1.2.5":
extra_info = " (RLE)"
if t == "1.2.840.10008.1.2.6.1":
extra_info = " (RFC 2557)"
raise CompressedDicom(
"The dicom reader can only read files with "
"uncompressed image data - not %r%s. You "
"can try using dcmtk or gdcm to convert the "
"image." % (t, extra_info)
)
# From hereon, use implicit/explicit big/little endian
self.is_implicit_VR = is_implicit_VR
self.is_little_endian = is_little_endian
self._unpackPrefix = "><"[is_little_endian]
def _read_data_elements(self):
info = self._info
try:
while True:
# Get element
group, element, value = self._readDataElement()
# Is it a group we are interested in?
if group in GROUPS:
key = (group, element)
name, vr = MINIDICT.get(key, (None, None))
# Is it an element we are interested in?
if name:
# Store value
converter = self._converters.get(vr, lambda x: x)
info[name] = converter(value)
except (EOFError, struct.error):
pass # end of file ...
def get_numpy_array(self):
"""Get numpy arra for this DICOM file, with the correct shape,
and pixel values scaled appropriately.
"""
# Is there pixel data at all?
if "PixelData" not in self:
raise TypeError("No pixel data found in this dataset.")
# Load it now if it was not already loaded
if self._pixel_data_loc and len(self.PixelData) < 100:
# Reopen file?
close_file = False
if self._file is None:
close_file = True
self._file = open(self._filename, "rb")
# Read data
self._file.seek(self._pixel_data_loc[0])
if self._pixel_data_loc[1] == 0xFFFFFFFF:
value = self._read_undefined_length_value()
else:
value = self._file.read(self._pixel_data_loc[1])
# Close file
if close_file:
self._file.close()
self._file = None
# Overwrite
self._info["PixelData"] = value
# Get data
data = self._pixel_data_numpy()
data = self._apply_slope_and_offset(data)
# Remove data again to preserve memory
# Note that the data for the original file is loaded twice ...
self._info["PixelData"] = (
b"Data converted to numpy array, " + b"raw data removed to preserve memory"
)
return data
def _get_shape_and_sampling(self):
"""Get shape and sampling without actuall using the pixel data.
In this way, the user can get an idea what's inside without having
to load it.
"""
# Get shape (in the same way that pydicom does)
if "NumberOfFrames" in self and self.NumberOfFrames > 1:
if self.SamplesPerPixel > 1:
shape = (
self.SamplesPerPixel,
self.NumberOfFrames,
self.Rows,
self.Columns,
)
else:
shape = self.NumberOfFrames, self.Rows, self.Columns
elif "SamplesPerPixel" in self:
if self.SamplesPerPixel > 1:
if self.BitsAllocated == 8:
shape = self.SamplesPerPixel, self.Rows, self.Columns
else:
raise NotImplementedError(
"DICOM plugin only handles "
"SamplesPerPixel > 1 if Bits "
"Allocated = 8"
)
else:
shape = self.Rows, self.Columns
else:
raise RuntimeError(
"DICOM file has no SamplesPerPixel " "(perhaps this is a report?)"
)
# Try getting sampling between pixels
if "PixelSpacing" in self:
sampling = float(self.PixelSpacing[0]), float(self.PixelSpacing[1])
else:
sampling = 1.0, 1.0
if "SliceSpacing" in self:
sampling = (abs(self.SliceSpacing),) + sampling
# Ensure that sampling has as many elements as shape
sampling = (1.0,) * (len(shape) - len(sampling)) + sampling[-len(shape) :]
# Set shape and sampling
self._info["shape"] = shape
self._info["sampling"] = sampling
def _pixel_data_numpy(self):
"""Return a NumPy array of the pixel data."""
# Taken from pydicom
# Copyright (c) 2008-2012 Darcy Mason
if "PixelData" not in self:
raise TypeError("No pixel data found in this dataset.")
# determine the type used for the array
need_byteswap = self.is_little_endian != sys_is_little_endian
# Make NumPy format code, e.g. "uint16", "int32" etc
# from two pieces of info:
# self.PixelRepresentation -- 0 for unsigned, 1 for signed;
# self.BitsAllocated -- 8, 16, or 32
format_str = "%sint%d" % (
("u", "")[self.PixelRepresentation],
self.BitsAllocated,
)
try:
numpy_format = np.dtype(format_str)
except TypeError: # pragma: no cover
raise TypeError(
"Data type not understood by NumPy: format='%s', "
" PixelRepresentation=%d, BitsAllocated=%d"
% (numpy_format, self.PixelRepresentation, self.BitsAllocated)
)
# Have correct Numpy format, so create the NumPy array
arr = np.frombuffer(self.PixelData, numpy_format).copy()
# XXX byte swap - may later handle this in read_file!!?
if need_byteswap:
arr.byteswap(True) # True means swap in-place, don't make new copy
# Note the following reshape operations return a new *view* onto arr,
# but don't copy the data
arr = arr.reshape(*self._info["shape"])
return arr
def _apply_slope_and_offset(self, data):
"""
If RescaleSlope and RescaleIntercept are present in the data,
apply them. The data type of the data is changed if necessary.
"""
# Obtain slope and offset
slope, offset = 1, 0
needFloats, needApplySlopeOffset = False, False
if "RescaleSlope" in self:
needApplySlopeOffset = True
slope = self.RescaleSlope
if "RescaleIntercept" in self:
needApplySlopeOffset = True
offset = self.RescaleIntercept
if int(slope) != slope or int(offset) != offset:
needFloats = True
if not needFloats:
slope, offset = int(slope), int(offset)
# Apply slope and offset
if needApplySlopeOffset:
# Maybe we need to change the datatype?
if data.dtype in [np.float32, np.float64]:
pass
elif needFloats:
data = data.astype(np.float32)
else:
# Determine required range
minReq, maxReq = data.min(), data.max()
minReq = min([minReq, minReq * slope + offset, maxReq * slope + offset])
maxReq = max([maxReq, minReq * slope + offset, maxReq * slope + offset])
# Determine required datatype from that
dtype = None
if minReq < 0:
# Signed integer type
maxReq = max([-minReq, maxReq])
if maxReq < 2**7:
dtype = np.int8
elif maxReq < 2**15:
dtype = np.int16
elif maxReq < 2**31:
dtype = np.int32
else:
dtype = np.float32
else:
# Unsigned integer type
if maxReq < 2**8:
dtype = np.int8
elif maxReq < 2**16:
dtype = np.int16
elif maxReq < 2**32:
dtype = np.int32
else:
dtype = np.float32
# Change datatype
if dtype != data.dtype:
data = data.astype(dtype)
# Apply slope and offset
data *= slope
data += offset
# Done
return data
def _inflate(self):
# Taken from pydicom
# Copyright (c) 2008-2012 Darcy Mason
import zlib
from io import BytesIO
# See PS3.6-2008 A.5 (p 71) -- when written, the entire dataset
# following the file metadata was prepared the normal way,
# then "deflate" compression applied.
# All that is needed here is to decompress and then
# use as normal in a file-like object
zipped = self._file.read()
# -MAX_WBITS part is from comp.lang.python answer:
# groups.google.com/group/comp.lang.python/msg/e95b3b38a71e6799
unzipped = zlib.decompress(zipped, -zlib.MAX_WBITS)
self._file = BytesIO(unzipped) # a file-like object
class DicomSeries(object):
"""DicomSeries
This class represents a serie of dicom files (SimpleDicomReader
objects) that belong together. If these are multiple files, they
represent the slices of a volume (like for CT or MRI).
"""
def __init__(self, suid, progressIndicator):
# Init dataset list and the callback
self._entries = []
# Init props
self._suid = suid
self._info = {}
self._progressIndicator = progressIndicator
def __len__(self):
return len(self._entries)
def __iter__(self):
return iter(self._entries)
def __getitem__(self, index):
return self._entries[index]
@property
def suid(self):
return self._suid
@property
def shape(self):
"""The shape of the data (nz, ny, nx)."""
return self._info["shape"]
@property
def sampling(self):
"""The sampling (voxel distances) of the data (dz, dy, dx)."""
return self._info["sampling"]
@property
def info(self):
"""A dictionary containing the information as present in the
first dicomfile of this serie. None if there are no entries."""
return self._info
@property
def description(self):
"""A description of the dicom series. Used fields are
PatientName, shape of the data, SeriesDescription, and
ImageComments.
"""
info = self.info
# If no info available, return simple description
if not info: # pragma: no cover
return "DicomSeries containing %i images" % len(self)
fields = []
# Give patient name
if "PatientName" in info:
fields.append("" + info["PatientName"])
# Also add dimensions
if self.shape:
tmp = [str(d) for d in self.shape]
fields.append("x".join(tmp))
# Try adding more fields
if "SeriesDescription" in info:
fields.append("'" + info["SeriesDescription"] + "'")
if "ImageComments" in info:
fields.append("'" + info["ImageComments"] + "'")
# Combine
return " ".join(fields)
def __repr__(self):
adr = hex(id(self)).upper()
return "<DicomSeries with %i images at %s>" % (len(self), adr)
def get_numpy_array(self):
"""Get (load) the data that this DicomSeries represents, and return
it as a numpy array. If this serie contains multiple images, the
resulting array is 3D, otherwise it's 2D.
"""
# It's easy if no file or if just a single file
if len(self) == 0:
raise ValueError("Serie does not contain any files.")
elif len(self) == 1:
return self[0].get_numpy_array()
# Check info
if self.info is None:
raise RuntimeError("Cannot return volume if series not finished.")
# Init data (using what the dicom packaged produces as a reference)
slice = self[0].get_numpy_array()
vol = np.zeros(self.shape, dtype=slice.dtype)
vol[0] = slice
# Fill volume
self._progressIndicator.start("loading data", "", len(self))
for z in range(1, len(self)):
vol[z] = self[z].get_numpy_array()
self._progressIndicator.set_progress(z + 1)
self._progressIndicator.finish()
# Done
import gc
gc.collect()
return vol
def _append(self, dcm):
self._entries.append(dcm)
def _sort(self):
self._entries.sort(
key=lambda k: (
k.InstanceNumber,
(
k.ImagePositionPatient[2]
if hasattr(k, "ImagePositionPatient")
else None
),
)
)
def _finish(self):
"""
Evaluate the series of dicom files. Together they should make up
a volumetric dataset. This means the files should meet certain
conditions. Also some additional information has to be calculated,
such as the distance between the slices. This method sets the
attributes for "shape", "sampling" and "info".
This method checks:
* that there are no missing files
* that the dimensions of all images match
* that the pixel spacing of all images match
"""
# The datasets list should be sorted by instance number
L = self._entries
if len(L) == 0:
return
elif len(L) == 1:
self._info = L[0].info
return
# Get previous
ds1 = L[0]
# Init measures to calculate average of
distance_sum = 0.0
# Init measures to check (these are in 2D)
dimensions = ds1.Rows, ds1.Columns
# sampling = float(ds1.PixelSpacing[0]), float(ds1.PixelSpacing[1])
sampling = ds1.info["sampling"][:2] # row, column
for index in range(len(L)):
# The first round ds1 and ds2 will be the same, for the
# distance calculation this does not matter
# Get current
ds2 = L[index]
# Get positions
pos1 = float(ds1.ImagePositionPatient[2])
pos2 = float(ds2.ImagePositionPatient[2])
# Update distance_sum to calculate distance later
distance_sum += abs(pos1 - pos2)
# Test measures
dimensions2 = ds2.Rows, ds2.Columns
# sampling2 = float(ds2.PixelSpacing[0]), float(ds2.PixelSpacing[1])
sampling2 = ds2.info["sampling"][:2] # row, column
if dimensions != dimensions2:
# We cannot produce a volume if the dimensions match
raise ValueError("Dimensions of slices does not match.")
if sampling != sampling2:
# We can still produce a volume, but we should notify the user
self._progressIndicator.write("Warn: sampling does not match.")
# Store previous
ds1 = ds2
# Finish calculating average distance
# (Note that there are len(L)-1 distances)
distance_mean = distance_sum / (len(L) - 1)
# Set info dict
self._info = L[0].info.copy()
# Store information that is specific for the serie
self._info["shape"] = (len(L),) + ds2.info["shape"]
self._info["sampling"] = (distance_mean,) + ds2.info["sampling"]
def list_files(files, path):
"""List all files in the directory, recursively."""
for item in os.listdir(path):
item = os.path.join(path, item)
if os.path.isdir(item):
list_files(files, item)
elif os.path.isfile(item):
files.append(item)
def process_directory(request, progressIndicator, readPixelData=False):
"""
Reads dicom files and returns a list of DicomSeries objects, which
contain information about the data, and can be used to load the
image or volume data.
if readPixelData is True, the pixel data of all series is read. By
default the loading of pixeldata is deferred until it is requested
using the DicomSeries.get_pixel_array() method. In general, both
methods should be equally fast.
"""
# Get directory to examine
if os.path.isdir(request.filename):
path = request.filename
elif os.path.isfile(request.filename):
path = os.path.dirname(request.filename)
else: # pragma: no cover - tested earlier
raise ValueError("Dicom plugin needs a valid filename to examine the directory")
# Check files
files = []
list_files(files, path) # Find files recursively
# Gather file data and put in DicomSeries
series = {}
count = 0
progressIndicator.start("examining files", "files", len(files))
for filename in files:
# Show progress (note that we always start with a 0.0)
count += 1
progressIndicator.set_progress(count)
# Skip DICOMDIR files
if filename.count("DICOMDIR"): # pragma: no cover
continue
# Try loading dicom ...
try:
dcm = SimpleDicomReader(filename)
except NotADicomFile:
continue # skip non-dicom file
except Exception as why: # pragma: no cover
progressIndicator.write(str(why))
continue
# Get SUID and register the file with an existing or new series object
try:
suid = dcm.SeriesInstanceUID
except AttributeError: # pragma: no cover
continue # some other kind of dicom file
if suid not in series:
series[suid] = DicomSeries(suid, progressIndicator)
series[suid]._append(dcm)
# Finish progress
# progressIndicator.finish('Found %i series.' % len(series))
# Make a list and sort, so that the order is deterministic
series = list(series.values())
series.sort(key=lambda x: x.suid)
# Split series if necessary
for serie in reversed([serie for serie in series]):
splitSerieIfRequired(serie, series, progressIndicator)
# Finish all series
# progressIndicator.start('analyse series', '', len(series))
series_ = []
for i in range(len(series)):
try:
series[i]._finish()
series_.append(series[i])
except Exception as err: # pragma: no cover
progressIndicator.write(str(err))
pass # Skip serie (probably report-like file without pixels)
# progressIndicator.set_progress(i+1)
progressIndicator.finish("Found %i correct series." % len(series_))
# Done
return series_
def splitSerieIfRequired(serie, series, progressIndicator):
"""
Split the serie in multiple series if this is required. The choice
is based on examing the image position relative to the previous
image. If it differs too much, it is assumed that there is a new
dataset. This can happen for example in unspitted gated CT data.
"""
# Sort the original list and get local name
serie._sort()
L = serie._entries
# Init previous slice
ds1 = L[0]
# Check whether we can do this
if "ImagePositionPatient" not in ds1:
return
# Initialize a list of new lists
L2 = [[ds1]]
# Init slice distance estimate
distance = 0
for index in range(1, len(L)):
# Get current slice
ds2 = L[index]
# Get positions
pos1 = float(ds1.ImagePositionPatient[2])
pos2 = float(ds2.ImagePositionPatient[2])
# Get distances
newDist = abs(pos1 - pos2)
# deltaDist = abs(firstPos-pos2)
# If the distance deviates more than 2x from what we've seen,
# we can agree it's a new dataset.
if distance and newDist > 2.1 * distance:
L2.append([])
distance = 0
else:
# Test missing file
if distance and newDist > 1.5 * distance:
progressIndicator.write(
"Warning: missing file after %r" % ds1._filename
)
distance = newDist
# Add to last list
L2[-1].append(ds2)
# Store previous
ds1 = ds2
# Split if we should
if len(L2) > 1:
# At what position are we now?
i = series.index(serie)
# Create new series
series2insert = []
for L in L2:
newSerie = DicomSeries(serie.suid, progressIndicator)
newSerie._entries = L
series2insert.append(newSerie)
# Insert series and remove self
for newSerie in reversed(series2insert):
series.insert(i, newSerie)
series.remove(serie)