I wrote something to load PNG files from a custom C++ IStream via GDI+. It worked great until I ran it on Vista machines. Crashes every time.
When compiled on VS 2008, I found that inserting code into the IStream::AddRef method, such as a cout, made the problem go away. When compiling with VS 2010, it still crashes regardless of that.
I stripped the program down to its basics. I copied a FileStream straight from Microsoft's documentation. It can load PNGs when using Bitmap::FromFile. It can load JPEGs, GIFs, and BMPs via FromFile or FromStream.
So in short: on Vista, PNG files loaded via Bitmap::FromStream crash.
#pragma comment(lib, "gdiplus.lib")
#include <iostream>
#include <objidl.h>
#include <gdiplus.h>
class FileStream : public IStream
{
public: 
    FileStream(HANDLE hFile) 
    {
        _refcount = 1;
        _hFile = hFile;
    }
    ~FileStream()
    {
        if (_hFile != INVALID_HANDLE_VALUE)
        {
            ::CloseHandle(_hFile);
        }
    }
public:
    HRESULT static OpenFile(LPCWSTR pName, IStream ** ppStream, bool fWrite)
    {
        HANDLE hFile = ::CreateFileW(pName, fWrite ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ,
            NULL, fWrite ? CREATE_ALWAYS : OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
        if (hFile == INVALID_HANDLE_VALUE)
            return HRESULT_FROM_WIN32(GetLastError());
        *ppStream = new FileStream(hFile);
        if(*ppStream == NULL)
            CloseHandle(hFile);
        return S_OK;
    }
    virtual HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, void ** ppvObject)
    { 
        if (iid == __uuidof(IUnknown)
            || iid == __uuidof(IStream)
            || iid == __uuidof(ISequentialStream))
        {
            *ppvObject = static_cast<IStream*>(this);
            AddRef();
            return S_OK;
        } else
            return E_NOINTERFACE; 
    }
    virtual ULONG STDMETHODCALLTYPE AddRef(void) 
    { 
        return (ULONG)InterlockedIncrement(&_refcount); 
    }
    virtual ULONG STDMETHODCALLTYPE Release(void) 
    {
        ULONG res = (ULONG) InterlockedDecrement(&_refcount);
        if (res == 0) 
            delete this;
        return res;
    }
    // ISequentialStream Interface
public:
    virtual HRESULT STDMETHODCALLTYPE Read(void* pv, ULONG cb, ULONG* pcbRead)
    {
  ULONG local_pcbRead;  
        BOOL rc = ReadFile(_hFile, pv, cb, &local_pcbRead, NULL);
  if (pcbRead) *pcbRead = local_pcbRead;
        return (rc) ? S_OK : HRESULT_FROM_WIN32(GetLastError());
    }
    virtual HRESULT STDMETHODCALLTYPE Write(void const* pv, ULONG cb, ULONG* pcbWritten)
    {
        BOOL rc = WriteFile(_hFile, pv, cb, pcbWritten, NULL);
        return rc ? S_OK : HRESULT_FROM_WIN32(GetLastError());
    }
    // IStream Interface
public:
    virtual HRESULT STDMETHODCALLTYPE SetSize(ULARGE_INTEGER)
    { 
        return E_NOTIMPL;   
    }
    virtual HRESULT STDMETHODCALLTYPE CopyTo(IStream*, ULARGE_INTEGER, ULARGE_INTEGER*,
        ULARGE_INTEGER*) 
    { 
        return E_NOTIMPL;   
    }
    virtual HRESULT STDMETHODCALLTYPE Commit(DWORD)                                      
    { 
        return E_NOTIMPL;   
    }
    virtual HRESULT STDMETHODCALLTYPE Revert(void)                                       
    { 
        return E_NOTIMPL;   
    }
    virtual HRESULT STDMETHODCALLTYPE LockRegion(ULARGE_INTEGER, ULARGE_INTEGER, DWORD)              
    { 
        return E_NOTIMPL;   
    }
    virtual HRESULT STDMETHODCALLTYPE UnlockRegion(ULARGE_INTEGER, ULARGE_INTEGER, DWORD)            
    { 
        return E_NOTIMPL;   
    }
    virtual HRESULT STDMETHODCALLTYPE Clone(IStream **)                                  
    { 
        return E_NOTIMPL;   
    }
    virtual HRESULT STDMETHODCALLTYPE Seek(LARGE_INTEGER liDistanceToMove, DWORD dwOrigin,
        ULARGE_INTEGER* lpNewFilePointer)
    { 
        DWORD dwMoveMethod;
        switch(dwOrigin)
        {
        case STREAM_SEEK_SET:
            dwMoveMethod = FILE_BEGIN;
            break;
        case STREAM_SEEK_CUR:
            dwMoveMethod = FILE_CURRENT;
            break;
        case STREAM_SEEK_END:
            dwMoveMethod = FILE_END;
            break;
        default:   
            return STG_E_INVALIDFUNCTION;
            break;
        }
        if (SetFilePointerEx(_hFile, liDistanceToMove, (PLARGE_INTEGER) lpNewFilePointer,
                             dwMoveMethod) == 0)
            return HRESULT_FROM_WIN32(GetLastError());
        return S_OK;
    }
    virtual HRESULT STDMETHODCALLTYPE Stat(STATSTG* pStatstg, DWORD grfStatFlag) 
    {
        if (GetFileSizeEx(_hFile, (PLARGE_INTEGER) &pStatstg->cbSize) == 0)
            return HRESULT_FROM_WIN32(GetLastError());
        return S_OK;
    }
private:
    volatile HANDLE _hFile;
    volatile LONG _refcount;
};
#define USE_STREAM
int main()
{
 Gdiplus::GdiplusStartupInput gdiplusStartupInput;
 ULONG_PTR gdiplusToken;
 Gdiplus::GdiplusStartup(&gdiplusToken, &gdiplusStartupInput, NULL);
 Gdiplus::Bitmap *bmp;
#ifndef USE_STREAM
 bmp = Gdiplus::Bitmap::FromFile(L"test.png", false);
 if (!bmp)
 {
  std::cerr << " Unable to open image file." << std::endl;
  return 1;
 }
#else
 IStream *s;
 if (FileStream::OpenFile(L"test.png", &s, false) != S_OK)
 {
  std::cerr << "Unable to open image file." << std::endl;
  return 1;
 }
 bmp = Gdiplus::Bitmap::FromStream(s, false);
#endif
 std::cout << "Image is " << bmp->GetWidth() << " by " << bmp->GetHeight() << std::endl;
 Gdiplus::GdiplusShutdown(gdiplusToken);
#ifdef USE_STREAM
 s->Release();
#endif
 return 0;
}
Tracing and debugging, shows that it does make some calls to the IStream class. It crashes inside of lastResult = DllExports::GdipCreateBitmapFromStream(stream, &bitmap); from GdiPlusBitmap.h, which is a static inline wrapper over the flat API.
Other than the reference counting, the only IStream methods it calls is stat (for file size), read, and seek.
Call stack looks like:
ntdll.dll!_DbgBreakPoint@0()  + 0x1 bytes 
ntdll.dll!_RtlpBreakPointHeap@4()  + 0x28 bytes 
ntdll.dll!_RtlpValidateHeapEntry@12()  + 0x70a3c bytes 
ntdll.dll!_RtlDebugFreeHeap@12()  + 0x9a bytes 
ntdll.dll!@RtlpFreeHeap@16()  + 0x13cdd bytes 
ntdll.dll!_RtlFreeHeap@12()  + 0x2e49 bytes 
kernel32.dll!_HeapFree@12()  + 0x14 bytes 
ole32.dll!CRetailMalloc_Free()  + 0x1c bytes 
ole32.dll!_CoTaskMemFree@4()  + 0x13 bytes 
GdiPlus.dll!GpPngDecoder::GetImageInfo()  + 0x68 bytes 
GdiPlus.dll!GpDecodedImage::InternalGetImageInfo()  + 0x3c bytes 
GdiPlus.dll!GpDecodedImage::GetImageInfo()  + 0x18 bytes 
GdiPlus.dll!CopyOnWriteBitmap::CopyOnWriteBitmap()  + 0x49 bytes 
GdiPlus.dll!CopyOnWriteBitmap::Create()  + 0x1d bytes 
GdiPlus.dll!GpBitmap::GpBitmap()  + 0x2c bytes 
I was unable to find anybody else with the same problem, so I assume there's something wrong with my implementation...