;+ ; NAME: ; make_lookup_2mass ; ; PURPOSE: (one line) ; Makes lookup tables for the entire 2MASS catalog. ; ; DESCRIPTION: ; Reads in entire uncompressed 2MASS catalog (92 files: psc_aaa..psc_bbi) . ; Creates lookup table, one entry per star, so that catalog line can ; be read directly from 2MASS catalog w/o any more searching, using ; oc_getline_2mass.pro. ; ; Also sorts catalog to fix occasional sorting errors in 2MASS dvd files. ; (See HBT e-mails to/from 2MASS central May-06 regarding sorting errors.) ; ; Output is 1800 files, binned in 0.1-degree dec bins, and sorted ; by RA. The files are properly sorted, unlike the 2MASS distribution. A ; sample name is 2mass_88.6_88.7.bin . ; ; Output can be binary or text. Binary format is 14 bytes per line: ; Long: First half of 2MASS ID ; Long: Second half of 2MASS ID ; Int: File number 0 .. 91 that 2MASS ID is found in (0 = psc_aaa, etc.) ; Long: Starting byte position within the psc_* file. (No length is ; necessary, since all entries in psc_* files are CR-terminated.) ; ; At 14 bytes per line, the entire psc lookup tables are ~7 GB. ; This code takes ~ 1 hour/file to run, so ~100 hrs for the catalog. ; ; Text format is similar, and self-explanatory. Text is used for debugging ; only, not for real searching. ; ; There is a bug in that the final file is not output. Only 1799 files ; are created, not 1800. ; ; CATEGORY: ; Star catalogs ; ; CALLING SEQUENCE: ; make_lookup_2mass [, FILE=file] [, /BINARY | /TEXT] ; ; INPUTS: ; None ; ; OPTIONAL INPUT PARAMETERS: ; None ; ; KEYWORD INPUT PARAMETERS: ; FILE -- Input file number to start at, 0 .. 91. Default is zero. ; Note that starting at a non-zero bin may output an incomplete ; first file, since some stars in the starting bin may be in the previous ; file. Really used for debugging only. ; ; /BINARY -- If set, force binary output. Default. ; ; /TEXT -- If set, force text output (primarily for debugging). ; ; KEYWORD OUTPUT PARAMETERS: ; None ; ; OUTPUTS: ; None ; ; COMMON BLOCKS: ; None ; ; SIDE EFFECTS: ; None ; ; RESTRICTIONS: ; None ; ; EXAMPLE: ; make_lookup_2mass, FILE=57, /BIN ; Starts making binary-based lookup tables at dec=+0.0 . ; ; MODIFICATION HISTORY: ; Written 30-May-2006 by Henry Throop, SwRI ; ;- ;;;;;;;;;; pro make_lookup_2mass_empty_queue, BINARY=binary, TEXT=text ;;;;;;;;;; ; This routine is called by make_lookup_2mass. It sorts and writes ; out a set of stars in an 0.1-deg bin that has been read in. common ml, lun_in, lun_out, filepos_arr, filenum_arr, ra_arr, dec_arr, $ id_arr, q, last_bin_output, files_out, numstars, $ numstars_cum, num_lines_file, numfiles_in, i, j, num_lines_max, $ bin_dec_min, bin_dec_max, empty, dfilepos_arr, $ r2d, d2r, linenum_arr, bin print print, 'Now starting to output star ' + radify(numstars_cum,',') + $ ', file ' + st(i) + ' / ' + st(numfiles_in) + $ ', ' + st(j/num_lines_file * 100d) + ' % through file' print, 'Last star read: dec = ' + st(dec_arr[q-1]) ; Grab the elements which we are going to output bin = last_bin_output+1 ; Bin to output ; For negatives, it's (min, max] elem = where((dec_arr[0:q-2] gt bin_dec_min[bin]) and $ (dec_arr[0:q-2] le bin_dec_max[bin])) print, 'Found ' + st(sizex(elem)) + ' stars in range dec = ' + $ st(bin_dec_min[bin]) + ' .. ' + st(bin_dec_max[bin]) numstars = sizex(elem) id_out = id_arr[elem] ra_out = ra_arr[elem] dec_out = dec_arr[elem] filepos_out = filepos_arr[elem] dfilepos_out = dfilepos_arr[elem] filenum_out = filenum_arr[elem] linenum_out = linenum_arr[elem] ; Sort by RA. Note that we sort by full RA, *not* by the truncated ; RA used as part of the ID. order = sort(ra_out) ra_out = ra_out[order] dec_out = dec_out[order] id_out = id_out[order] filepos_out = filepos_out[order] dfilepos_out = dfilepos_out[order] filenum_out = filenum_out[order] linenum_out = linenum_out[order] !p.multi=[0,1,2] plot, frange(0, 1, sizex(ra_out)), ra_out, xtitle = 'RA', $ psym=3, yrange=[0,360], ysty=3 plot, dec_out, xtitle = 'Dec', psym=3, title = 'Dec = ' + $ st(bin_dec_min[bin]) + ' ..' + st(bin_dec_max[bin]) ; Open the file to write openw, lun_out, files_out[last_bin_output+1], /get_lun if keyword_set(TEXT) then begin for k = 0L, numstars-1L do begin ll = string(id_out[k], ra_out[k], dec_out[k], filenum_out[k], $ filepos_out[k], linenum_out[k], dfilepos_out[k], $ format = '(A20, F15.9, F15.9, I4, I12, I6, I5)') printf, lun_out, ll end end if keyword_set(BINARY) then begin for k = 0L, numstars-1L do begin id1 = long(strmid(id_out[k], 0, 8)) id2 = long(strmid(id_out[k], 8, 8)) writeu, lun_out, id1, id2, filenum_out[k], filepos_out[k] end end close, lun_out free_lun, lun_out last_bin_output++ print, 'Wrote: ', files_out[last_bin_output], $ ' with final RA out = ' + st(ra_out[numstars-1L]) ; Now remove these stars from the 'queue', by setting them to zero ra_arr[elem] = 0 dec_arr[elem] = EMPTY filepos_arr[elem] = 0 filenum_arr[elem] = 0 linenum_arr[elem] = 0 id_arr[elem] = '' ; Move these empty entries to the end of the queue, resorting based on dec. ; Low decs will move. ; Note that we sort by *full* dec, not by the truncated dec that is part ; of the ID. order = sort(dec_arr[0:q+10]) dec_arr[0:q+10] = dec_arr[order] ra_arr[0:q+10] = ra_arr[order] filepos_arr[0:q+10] = filepos_arr[order] filenum_arr[0:q+10] = filenum_arr[order] linenum_arr[0:q+10] = linenum_arr[order] id_arr[0:q+10] = id_arr[order] q = min(where(dec_arr eq EMPTY)) numstars_cum += numstars end ;;;;;;;;;; pro make_lookup_2mass, file_start_in=file_start_in, BINARY=binary, TEXT=text ;;;;;;;;;; common ml ; Program loops over all the psc_* files from 2MASS catalog ; Parse inputs. ; FILE_START_IN -- specify the number of the input file to start on (e.g., 0 .. 91) file_start = 0 if keyword_exists(FILE_START_IN) then FILE_START = file_start_in file_start = long(file_start) if (not(keyword_set(BINARY)) and not(keyword_set(TEXT))) then BINARY=1 ; default to BINARY output, not TEXT ; Initialize things d2r = 2*!dpi / 360d r2d = 1/d2r EMPTY = 999d dir_2mass = getenv('TMC_PATH') dir_lookup = str_replace(dir_2mass, 'dvd', 'lookup') dir_uncompress = dir_2mass + '/uncompress' num_lines_file= 5100000d num_lines_max = num_lines_file*2d ; Keep two files in memory at a time numstars_cum = 0L ra_arr = dblarr(num_lines_max) dec_arr = dblarr(num_lines_max) + EMPTY ; Flag to be empty at start id_arr = strarr(num_lines_max) filenum_arr = intarr(num_lines_max) ; Which file was line from? linenum_arr = intarr(num_lines_max) ; Which line was this from? filepos_arr = lonarr(num_lines_max) dfilepos_arr = intarr(num_lines_max) files_in = findfile(dir_uncompress + '/' + 'psc_???') ; Exclude things like 'psc_bbf.lookup' bin_dec_min = (frange(-90d, +90d, 1801))[0:1799] bin_dec_max = (frange(-90d, +90d, 1801))[1:1800] if keyword_set(BINARY) then extension = '.bin' if keyword_set(TEXT) then extension = '.txt' files_out = strcompress( dir_lookup + '/' + '2mass_' + $ string(bin_dec_min,format='(F5.1)') + '_' + $ string(bin_dec_max,format='(F5.1)') + extension, /remove) numfiles_in = sizex(files_in) line = '' ; Reset the pointer ; Line number of the current record, in the raw data file j = 0L ; Byte value of start of the current record, in the raw data file filepos = 0L ; Bin number of the last 0.1-deg bin that was output to disk last_bin_output = -1d ; Position in queue q = 0L ; Read in input files, one by one for i = file_start, numfiles_in-1 do begin openr, lun_in, files_in[i], /get_lun filepos = 0L ; Reset pointer to start of file j = 0L ; Reset line number in current file while not(eof(lun_in)) do begin readf, lun_in, line items = ht_str_split(line, char='|') ra = double(items[0]) ; ra, degrees dec = double(items[1]) ; dec, degrees id = items[5] radtohms, ra*d2r, ra_h, ra_m, ra_s radtodms, dec*d2r, dec_sign, dec_d, dec_m, dec_s ; Now compute which bin we're in. Star 14073572-8600000 is grouped in the ; '(-86.1 .. -86.0]' category. I am guessing for positive values it works ; opposite, but I really don't know. ; Which dec bin are we in? if (dec lt 0) then bin = max(where(dec gt bin_dec_min)) ; Which dec bin are we in? if (dec gt 0) then bin = max(where(dec ge bin_dec_min)) ; Reset this the very first time we call if keyword_exists(FILE_START_IN) and (last_bin_output eq -1) then begin last_bin_output = bin-1 end ra_arr[q] = ra dec_arr[q] = dec id_arr[q] = id ; Byte position of start of record filepos_arr[q] = filepos ; File number we're in filenum_arr[q] = i linenum_arr[q] = j ; Record length, not including CR dfilepos_arr[q] = strlen(line) ; Increase position, including CR at end filepos += strlen(line) + 1 j++ q++ ; Now test and see if we should do a sort and output. We do this if we're ; either a) a full 0.1-deg bin beyond a boundary, or b) at the end of the ; file. if ((bin - last_bin_output) gt 2) or (eof(lun_in) and $ (i eq numfiles_in-1)) then begin ; was ge 2 ml_empty_queue, BINARY=binary, TEXT=text end ; End of file writing end ; Read in all the lines from an input file close, lun_in & free_lun, lun_in end ; Repeat over all input files end ;;;;;;;;;; pro other ; 23481120-8952103 357.04670 -89.869545 0 256369 file = '/data/catalogs/2MASS/lookup/2mass_-89.9_-89.8.bin' file = '/data/catalogs/2MASS/dvd/uncompress/psc_aaa' loc = 256369 openr, lun, file, /get_lun point_lun, lun, loc readf, lun, line print, line end ;;;