@@ -400,16 +400,16 @@ def get_msdataset_files_by_type(alldsfiles, nrrawfiles=False):
400400 dsfiles = {}
401401 last_filetype = False
402402 dsmzfiles = alldsfiles .filter (mzmlfile__isnull = False , mzmlfile__refined = False ).select_related ('mzmlfile__pwiz' )
403- for pwiz_mzs in dsmzfiles .values ('mzmlfile__pwiz_id' , 'mzmlfile__pwiz__version_description' ).annotate (pwcount = Count ('pk' )):
403+ for pwiz_mzs in dsmzfiles .values ('mzmlfile__pwiz_id' , 'mzmlfile__pwiz__version_description' ).annotate (pwcount = Count ('pk' , distinct = True )):
404404 ftname = f'mzML (pwiz { pwiz_mzs ["mzmlfile__pwiz__version_description" ]} )'
405- dsfiles [ftname ] = dsmzfiles .filter (mzmlfile__pwiz_id = pwiz_mzs ['mzmlfile__pwiz_id' ])
405+ dsfiles [ftname ] = dsmzfiles .filter (mzmlfile__pwiz_id = pwiz_mzs ['mzmlfile__pwiz_id' ]). distinct ( 'pk' )
406406 if nrrawfiles and pwiz_mzs ['pwcount' ] == nrrawfiles :
407407 last_filetype = ftname
408408 # Deliver also refined mzML
409409 dsrefinedfiles = alldsfiles .filter (mzmlfile__isnull = False , mzmlfile__refined = True ).select_related ('mzmlfile__pwiz' )
410- for pwiz_mzs in dsrefinedfiles .values ('mzmlfile__pwiz_id' , 'mzmlfile__pwiz__version_description' ).annotate (pwcount = Count ('pk' )):
410+ for pwiz_mzs in dsrefinedfiles .values ('mzmlfile__pwiz_id' , 'mzmlfile__pwiz__version_description' ).annotate (pwcount = Count ('pk' , distinct = True )):
411411 ftname = f'refined mzML (pwiz { pwiz_mzs ["mzmlfile__pwiz__version_description" ]} )'
412- dsfiles [ftname ] = dsrefinedfiles .filter (mzmlfile__pwiz_id = pwiz_mzs ['mzmlfile__pwiz_id' ])
412+ dsfiles [ftname ] = dsrefinedfiles .filter (mzmlfile__pwiz_id = pwiz_mzs ['mzmlfile__pwiz_id' ]). distinct ( 'pk' )
413413 if nrrawfiles and pwiz_mzs ['pwcount' ] == nrrawfiles :
414414 last_filetype = ftname
415415 return dsfiles , last_filetype
@@ -604,6 +604,8 @@ def get_datasets(request, wfversion_id):
604604 # Files with samples (non-MS, IP, non-isobaric, etc)
605605 if anid and is_msdata :
606606 allfilessamesample = all ((x ['fields' ]['__sample' ] == '' for x in resp_files .values ()))
607+ [x ['fields' ].update ({'__sample' : x ['dsetsample' ]}) for x in resp_files .values ()
608+ if not x ['fields' ]['__sample' ]]
607609
608610 elif not is_msdata :
609611 # sequencing data etcetera, always have sample-per-file since we dont
@@ -843,9 +845,15 @@ def store_analysis(request):
843845 if not hasattr (dset , 'quantdataset' ):
844846 response_errors .append (f'File(s) or channels in dataset { dsname } do not have '
845847 'sample annotations, please edit the dataset first' )
848+
849+ # Dssfiles need not be checked, new mzMLs being produced can also be selected, but
850+ # they need to have a storedfileloc__active=True. NB:
851+ # storedfileloc join causes duplicates, so make sure distinct is used later (cannot
852+ # put distinct('pk') here since it clashes with annotate in get_msdataset_files_by_type
853+ # and gives NotImplentedError
846854 dssfiles = rm .StoredFile .objects .filter (rawfile__datasetrawfile__dataset_id = dsid ,
847- deleted = False , checked = True )
848- dsrawfiles = dssfiles .filter (mzmlfile__isnull = True )
855+ deleted = False , storedfileloc__active = True )
856+ dsrawfiles = dssfiles .filter (mzmlfile__isnull = True ). distinct ( 'pk' )
849857 nrrawfiles = dsrawfiles .count ()
850858 if nrrawfiles < rm .RawFile .objects .filter (datasetrawfile__dataset_id = dsid ).count ():
851859 response_errors .append (f'Dataset { dsname } contains registered files that dont '
0 commit comments