Skip to content

sgnligo.sources.datasource

Datasource element utilities for LIGO pipelines.

DataSourceInfo dataclass

Wrapper around data source options

Parameters:

Name Type Description Default
data_source str

str, the data source, can be one of [white|sin|impulse|white-realtime|frames|devshm|arrakis]

required
channel_name list[str]

list[str, ...], a list of channel names ["IFO=CHANNEL_NAME",...]. For fake sources [white|sin|impulse|white-realtime], channel names are used to derive ifos.

required
gps_start_time Optional[float]

float, the gps start time of the data to analyze, in seconds

None
gps_end_time Optional[float]

float, the gps end time of the data to analyze, in seconds

None
frame_cache Optional[str]

str, the frame cache file to read gwf frame files from. Must be provided when data_source is "frames"

None
frame_segments_file Optional[str]

str, the name of the LIGO light-weight XML file from which to load frame segments. Optional iff data_source=frames

None
frame_segments_name Optional[str]

str, the name of the segments to extract from the segment tables. Required iff frame_segments_file is given

None
noiseless_inj_frame_cache Optional[str]

str, the name of the LAL cache listing the noiseless LIGO-Virgo injection .gwf frame files to be added to the strain data in frame_cache. (optional, must be provided with frame_cache)

None
noiseless_inj_channel_name Optional[list[str]]

list[str] or Dict[Detector, HostInfo], the name of the noiseless inj channels to process per detector (optional, must be provided with channel_name)

None
state_channel_name Optional[list[str]]

list, a list of state vector channel names

None
state_vector_on_bits Optional[list[int]]

int, the bit mask for the state vector data

None
shared_memory_dir Optional[list[str]]

str, the path to the shared memory directory to read low-latency data from

None
discont_wait_time float

float, the time to wait for next file before dropping data when data_source is "devshm", in seconds

60
source_queue_timeout float

float, the time to wait for next file from the queue before sending a heartbeat buffer when data_source is "devshm", in seconds. When data_source is "arrakis", used as the in_queue_timeout for ArrakisSource.

1
input_sample_rate Optional[int]

int, the sample rate for fake sources [white|sin|impulse|white-realtime]

None
impulse_position int

int, the sample point position to place the impulse data point. Default -1, which will generate the impulse position randomly

-1
Source code in sgnligo/sources/datasource.py
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
@dataclass
class DataSourceInfo:
    """Wrapper around data source options

    Args:
        data_source:
            str, the data source, can be one of
            [white|sin|impulse|white-realtime|frames|devshm|arrakis]
        channel_name:
            list[str, ...], a list of channel names ["IFO=CHANNEL_NAME",...].
            For fake sources [white|sin|impulse|white-realtime], channel names are used
            to derive ifos.
        gps_start_time:
            float, the gps start time of the data to analyze, in seconds
        gps_end_time:
            float, the gps end time of the data to analyze, in seconds
        frame_cache:
            str, the frame cache file to read gwf frame files from. Must be provided
            when data_source is "frames"
        frame_segments_file:
            str, the name of the LIGO light-weight XML file from which to load
            frame segments. Optional iff data_source=frames
        frame_segments_name:
            str, the name of the segments to extract from the segment tables. Required
            iff frame_segments_file is given
        noiseless_inj_frame_cache:
            str, the name of the LAL cache listing the noiseless LIGO-Virgo injection
            .gwf frame files to be added to the strain data in frame_cache. (optional,
            must be provided with frame_cache)
        noiseless_inj_channel_name:
            list[str] or Dict[Detector, HostInfo], the name of the noiseless
            inj channels to process per detector (optional, must be provided with
            channel_name)
        state_channel_name:
            list, a list of state vector channel names
        state_vector_on_bits:
            int, the bit mask for the state vector data
        shared_memory_dir:
            str, the path to the shared memory directory to read low-latency data from
        discont_wait_time:
            float, the time to wait for next file before dropping data when data_source
            is "devshm", in seconds
        source_queue_timeout:
            float, the time to wait for next file from the queue before sending a
            heartbeat buffer when data_source is "devshm", in seconds.
            When data_source is "arrakis", used as the in_queue_timeout for
            ArrakisSource.
        input_sample_rate:
            int, the sample rate for fake sources [white|sin|impulse|white-realtime]
        impulse_position:
            int, the sample point position to place the impulse data point. Default -1,
            which will generate the impulse position randomly
    """

    data_source: str
    channel_name: list[str]
    gps_start_time: Optional[float] = None
    gps_end_time: Optional[float] = None
    frame_cache: Optional[str] = None
    frame_segments_file: Optional[str] = None
    frame_segments_name: Optional[str] = None
    noiseless_inj_frame_cache: Optional[str] = None
    noiseless_inj_channel_name: Optional[list[str]] = None
    state_channel_name: Optional[list[str]] = None
    state_vector_on_bits: Optional[list[int]] = None
    shared_memory_dir: Optional[list[str]] = None
    discont_wait_time: float = 60
    source_queue_timeout: float = 1
    input_sample_rate: Optional[int] = None
    impulse_position: int = -1
    real_time: bool = False

    def __post_init__(self):
        self.channel_dict = parse_list_to_dict(self.channel_name)
        print(self.channel_dict)
        self.ifos = sorted(self.channel_dict.keys())
        self.seg = None
        self.validate()
        self.all_analysis_ifos = None

    def validate(self):
        if self.data_source not in KNOWN_DATASOURCES:
            raise ValueError(
                "Unknown datasource {}, must be one of: {}".format(
                    self.data_source, ", ".join(KNOWN_DATASOURCES)
                )
            )

        if self.data_source == "devshm":
            if self.shared_memory_dir is None:
                raise ValueError(
                    "Must specify shared_memory_dir when data_source is 'devshm'"
                )
            else:
                self.shared_memory_dict = parse_list_to_dict(self.shared_memory_dir)
                if sorted(self.shared_memory_dict.keys()) != self.ifos:
                    raise ValueError(
                        "Must specify same number of shared_memory_dir as channel_name"
                    )
            if self.state_channel_name is None:
                raise ValueError(
                    "Must specify state_channel_name when data_source is 'devshm'"
                )
            else:
                self.state_channel_dict = parse_list_to_dict(self.state_channel_name)
                if sorted(self.state_channel_dict.keys()) != self.ifos:
                    raise ValueError(
                        "Must specify same number of state_channel_name as channel_name"
                    )
            if self.state_vector_on_bits is None:
                raise ValueError(
                    "Must specify state_vector_on_bits when data_source is 'devshm'"
                )
            else:
                self.state_vector_on_dict = parse_list_to_dict(
                    self.state_vector_on_bits
                )
                if sorted(self.state_vector_on_dict.keys()) != self.ifos:
                    raise ValueError(
                        "Must specify same number of state_vector_on_bits as"
                        " channel_name"
                    )

            if self.gps_start_time is not None or self.gps_end_time is not None:
                raise ValueError(
                    "Must not specify gps_start_time or gps_end_time when"
                    " data_source is 'devshm'"
                )
        elif self.data_source == "arrakis":
            # Arrakis source can have optional start_time and end_time
            # If both are provided, start_time must be less than end_time
            if self.gps_start_time is not None and self.gps_end_time is not None:
                if self.gps_start_time >= self.gps_end_time:
                    raise ValueError("Must specify gps_start_time < gps_end_time")
                else:
                    self.seg = segments.segment(
                        LIGOTimeGPS(self.gps_start_time), LIGOTimeGPS(self.gps_end_time)
                    )
        # Input sample rate is not required but will default to 16384 Hz if
        # not provided
        elif self.data_source == "white-realtime":
            if self.input_sample_rate is None:
                raise ValueError(
                    "Must specify input_sample_rate when data_source is one of"
                    f" {FAKE_DATASOURCES}"
                )
        else:
            # Special case for gwdata-noise with real_time=True
            if self.data_source == "gwdata-noise" and self.real_time:
                # For real-time gwdata-noise, gps_end_time can be None
                if self.gps_start_time is not None and self.gps_end_time is not None:
                    if self.gps_start_time >= self.gps_end_time:
                        raise ValueError("Must specify gps_start_time < gps_end_time")
                    else:
                        self.seg = segments.segment(
                            LIGOTimeGPS(self.gps_start_time),
                            LIGOTimeGPS(self.gps_end_time),
                        )
                # If gps_end_time is None, seg remains None (for GWDataNoiseSource)
            elif self.gps_start_time is None or self.gps_end_time is None:
                raise ValueError(
                    "Must specify gps_start_time and gps_end_time when "
                    f"data_source is one of {OFFLINE_DATASOURCES}"
                )
            elif self.gps_start_time >= self.gps_end_time:
                raise ValueError("Must specify gps_start_time < gps_end_time")
            else:
                self.seg = segments.segment(
                    LIGOTimeGPS(self.gps_start_time), LIGOTimeGPS(self.gps_end_time)
                )

            if self.frame_segments_file is not None:
                if self.frame_segments_name is None:
                    raise ValueError(
                        "Must specify frame_segmetns_name when frame_segments_file is"
                        " given."
                    )
                elif not os.path.exists(self.frame_segments_file):
                    raise ValueError("frame segments file does not exist")

            if self.data_source == "frames":
                if self.frame_cache is None:
                    raise ValueError(
                        "Must specify frame_cache when data_source='frames'"
                    )
                elif not os.path.exists(self.frame_cache):
                    raise ValueError("Frame cahce file does not exist")

                # Validate channel name for each noiseless injection channel name
                if self.noiseless_inj_channel_name is not None:
                    self.noiseless_inj_channel_dict = parse_list_to_dict(
                        self.noiseless_inj_channel_name
                    )
                    for ifo in self.noiseless_inj_channel_dict:
                        if ifo not in self.channel_dict:
                            raise ValueError(
                                "Must specify one hoft channel_name for each"
                                " noiseless_inj_channel_name as {Detector:name}"
                            )

                # Validate noiseless injection frame cache exists
                if self.noiseless_inj_frame_cache:
                    if not os.path.exists(self.noiseless_inj_frame_cache):
                        raise ValueError("Inj frame cahce file does not exist")

            elif self.data_source in FAKE_DATASOURCES:
                # gwdata-noise determines its own sample rate from PSD
                if (
                    self.data_source != "gwdata-noise"
                    and self.input_sample_rate is None
                ):
                    raise ValueError(
                        "Must specify input_sample_rate when data_source is one of"
                        f" {[ds for ds in FAKE_DATASOURCES if ds != 'gwdata-noise']}"
                    )

    @staticmethod
    def from_options(options):
        return DataSourceInfo(
            data_source=options.data_source,
            channel_name=options.channel_name,
            gps_start_time=options.gps_start_time,
            gps_end_time=options.gps_end_time,
            frame_cache=options.frame_cache,
            frame_segments_file=options.frame_segments_file,
            frame_segments_name=options.frame_segments_name,
            noiseless_inj_frame_cache=options.noiseless_inj_frame_cache,
            noiseless_inj_channel_name=options.noiseless_inj_channel_name,
            state_channel_name=options.state_channel_name,
            state_vector_on_bits=options.state_vector_on_bits,
            shared_memory_dir=options.shared_memory_dir,
            discont_wait_time=options.discont_wait_time,
            source_queue_timeout=options.source_queue_timeout,
            input_sample_rate=options.input_sample_rate,
            impulse_position=options.impulse_position,
            real_time=getattr(options, "real_time", False),
        )

    @staticmethod
    def append_options(parser):
        group = parser.add_argument_group("Data source", "Options for data source.")
        group.add_argument(
            "--data-source",
            action="store",
            required=True,
            help=f"The type of the input source. Supported: {KNOWN_DATASOURCES}",
        )
        group.add_argument(
            "--channel-name",
            metavar="ifo=channel-name",
            action="append",
            required=True,
            help="Name of the data channel to analyze. Can be given multiple times as "
            "--channel-name=IFO=CHANNEL-NAME. For fake sources, channel name is used"
            " to derive the ifo names",
        )
        group.add_argument(
            "--gps-start-time",
            metavar="seconds",
            type=int,
            help="Set the start time of the segment to analyze in GPS seconds. "
            "For frame cache data source",
        )
        group.add_argument(
            "--gps-end-time",
            metavar="seconds",
            type=int,
            help="Set the end time of the segment to analyze in GPS seconds. "
            "For frame cache data source",
        )
        group.add_argument(
            "--frame-cache",
            metavar="filename",
            help="Set the path to the frame cache file to analyze.",
        )
        group.add_argument(
            "--frame-segments-file",
            metavar="filename",
            help="Set the name of the LIGO light-weight XML file from which to load"
            " frame segments.",
        )
        group.add_argument(
            "--frame-segments-name",
            metavar="name",
            help="Set the name of the segments to extract from the segment tables."
            " Required iff --frame-segments-file is given",
        )
        group.add_argument(
            "--noiseless-inj-frame-cache",
            metavar="filename",
            help="Set the name of the LAL cache listing the noiseless LIGO-Virgo"
            " injection .gwf frame files (optional, must also provide --frame-cache).",
        )
        group.add_argument(
            "--noiseless-inj-channel-name",
            metavar="name",
            action="append",
            help="Set the name of the noiseless injection channels to process. Can be"
            " given multiple times as --channel-name=IFO=CHANNEL-NAME (optional, must"
            " also provide --channel-name per ifo)",
        )
        group.add_argument(
            "--state-channel-name",
            metavar="ifo=channel-name",
            action="append",
            help="Set the state vector channel name. "
            "Can be given multiple times as --state-channel-name=IFO=CHANNEL-NAME",
        )
        group.add_argument(
            "--state-vector-on-bits",
            metavar="ifo=number",
            action="append",
            help="Set the state vector on bits. "
            "Can be given multiple times as --state-vector-on-bits=IFO=NUMBER",
        )
        group.add_argument(
            "--shared-memory-dir",
            metavar="ifo=directory",
            action="append",
            help="Set the name of the shared memory directory. "
            "Can be given multiple times as --shared-memory-dir=IFO=DIR-NAME",
        )
        group.add_argument(
            "--discont-wait-time",
            metavar="seconds",
            type=float,
            default=60,
            help="Time to wait for new files in seconds before dropping data. "
            "Default wait time is 60 seconds.",
        )
        group.add_argument(
            "--source-queue-timeout",
            metavar="seconds",
            type=float,
            default=1,
            help="Time to wait for new files from the queue in seconds before sending "
            "a hearbeat buffer. In online mode, new files should always arrive every "
            "second, unless there are problems. Default timeout is 1 second.",
        )
        group.add_argument(
            "--input-sample-rate",
            metavar="Hz",
            type=int,
            help="Input sample rate. Required if data-source one of [white|sin| "
            "white-realtime]",
        )
        group.add_argument(
            "--impulse-position",
            type=int,
            action="store",
            help="The sample point to put the impulse at.",
        )
        group.add_argument(
            "--real-time",
            action="store_true",
            help="Generate data in real time (for white-realtime and "
            "gwdata-noise sources).",
        )

datasource(pipeline, info, source_latency=False, verbose=False)

Wrapper around sgn source elements

Parameters:

Name Type Description Default
pipeline Pipeline

Pipeline, the sgn pipeline

required
data_source_info

DataSoureInfo, the data source info object containing all the data source options

required
Source code in sgnligo/sources/datasource.py
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
def datasource(
    pipeline: Pipeline,
    info: DataSourceInfo,
    source_latency: bool = False,
    verbose: bool = False,
):
    """Wrapper around sgn source elements

    Args:
        pipeline:
            Pipeline, the sgn pipeline
        data_source_info:
            DataSoureInfo, the data source info object containing all the data source
            options
    """

    if info.frame_segments_file is not None:
        frame_segments = ligolw_segments.segmenttable_get_by_name(
            ligolw_utils.load_filename(
                info.frame_segments_file,
                contenthandler=ligolw_segments.LIGOLWContentHandler,
            ),
            info.frame_segments_name,
        ).coalesce()
        if info.seg is not None:
            # Clip frame segments to seek segment if it
            # exists (not required, just saves some
            # memory and I/O overhead)
            frame_segments = segments.segmentlistdict(
                (ifo, seglist & segments.segmentlist([info.seg]))
                for ifo, seglist in frame_segments.items()
            )
        for ifo, segs in frame_segments.items():
            frame_segments[ifo] = [segments.segment(s[0].ns(), s[1].ns()) for s in segs]

        # FIXME: find a better way to get the analysis ifos. In gstlal this is obtained
        # from the time-slide file
        info.all_analysis_ifos = list(frame_segments.keys())
    else:
        # if no frame segments provided, set them to an empty segment list dictionary
        frame_segments = segments.segmentlistdict((ifo, None) for ifo in info.ifos)
        info.all_analysis_ifos = info.ifos

    source_out_links = {}
    pad_names = {}
    if source_latency:
        source_latency_links: Optional[dict[Any, Any]] = {}
    else:
        source_latency_links = None

    if info.data_source == "devshm":
        source_name = "_Gate"
        channel_names = {}
        for ifo in info.ifos:
            pad_names[ifo] = ifo
            channel_name_ifo = f"{ifo}:{info.channel_dict[ifo]}"
            state_channel_name_ifo = f"{ifo}:{info.state_channel_dict[ifo]}"
            channel_names[ifo] = [channel_name_ifo, state_channel_name_ifo]
        devshm = DevShmSource(
            name="DevShm",
            channel_names=channel_names,
            shared_memory_dirs=info.shared_memory_dict,
            discont_wait_time=info.discont_wait_time,
            queue_timeout=info.source_queue_timeout,
            verbose=verbose,
        )
        pipeline.insert(devshm)
        for ifo in info.ifos:
            bit_mask = BitMask(
                name=ifo + "_Mask",
                sink_pad_names=(ifo,),
                source_pad_names=(ifo,),
                bit_mask=int(info.state_vector_on_dict[ifo]),
            )
            gate = Gate(
                name=ifo + source_name,
                sink_pad_names=("strain", "state_vector"),
                control="state_vector",
                source_pad_names=(ifo,),
            )
            info.input_sample_rate = devshm.rates[ifo][channel_names[ifo][0]]
            pipeline.insert(
                bit_mask,
                gate,
                link_map={
                    ifo + "_Gate:snk:strain": "DevShm:src:" + channel_names[ifo][0],
                    ifo + "_Mask:snk:" + ifo: "DevShm:src:" + channel_names[ifo][1],
                    ifo + "_Gate:snk:state_vector": ifo + "_Mask:src:" + ifo,
                },
            )
            source_out_links[ifo] = ifo + source_name + ":src:" + pad_names[ifo]

    elif info.data_source == "arrakis":
        # Prepare for ArrakisSource which handles all channels with a single source
        _channel_names = []

        # Create channel names list and set up pad_names in one loop
        for ifo in info.ifos:
            channel_name = f"{ifo}:{info.channel_dict[ifo]}"
            _channel_names.append(channel_name)
            pad_names[ifo] = channel_name
            source_out_links[ifo] = f"ArrakisSource:src:{channel_name}"

        # Create a single ArrakisSource for all channels
        arrakis_source = ArrakisSource(
            name="ArrakisSource",
            source_pad_names=_channel_names,
            start_time=info.gps_start_time,
            duration=(
                None
                if info.gps_end_time is None
                else (
                    info.gps_end_time - info.gps_start_time
                    if info.gps_start_time is not None
                    else None
                )
            ),
            in_queue_timeout=int(info.source_queue_timeout),
        )
        pipeline.insert(arrakis_source)

        # For Arrakis source, we need to set a default sample rate if not provided
        if info.input_sample_rate is None:
            info.input_sample_rate = 16384  # Default LIGO sample rate for h(t) data
    else:
        for ifo in info.ifos:
            if info.data_source == "frames":
                pad_name = ifo + ":" + info.channel_dict[ifo]
                pad_names[ifo] = pad_name
                source_name = "_FrameSource"
                frame_reader = FrameReader(
                    name=ifo + source_name,
                    framecache=info.frame_cache,
                    channel_names=[
                        ifo + ":" + info.channel_dict[ifo],
                    ],
                    instrument=ifo,
                    t0=info.gps_start_time,
                    end=info.gps_end_time,
                )
                info.input_sample_rate = next(iter(frame_reader.rates.values()))
                pipeline.insert(
                    frame_reader,
                )
                if info.noiseless_inj_frame_cache is not None:
                    print("Connecting noiseless injection frame source")
                    pipeline.insert(
                        FrameReader(
                            name=ifo + "_InjSource",
                            framecache=info.noiseless_inj_frame_cache,
                            channel_names=[
                                ifo + ":" + info.noiseless_inj_channel_dict[ifo]
                            ],
                            instrument=ifo,
                            t0=info.gps_start_time,
                            end=info.gps_end_time,
                        ),
                        Adder(
                            name=ifo + "_InjAdd",
                            sink_pad_names=("frame", "inj"),
                            source_pad_names=(ifo,),
                        ),
                        link_map={
                            ifo
                            + "_InjAdd:snk:frame": ifo
                            + "_FrameSource:src:"
                            + ifo
                            + ":"
                            + info.channel_dict[ifo],
                            ifo
                            + "_InjAdd:snk:inj": ifo
                            + "_InjSource:src:"
                            + ifo
                            + ":"
                            + info.noiseless_inj_channel_dict[ifo],
                        },
                    )
                    source_name = "_InjAdd"
                    pad_names[ifo] = ifo
            elif info.data_source == "gwdata-noise":
                # Handle GWDataNoiseSource differently as it creates all
                # channels at once
                break  # Exit the loop after setting up pad_names
            elif info.data_source == "white-realtime":
                pad_names[ifo] = ifo
                source_name = "_FakeSource"
                source_pad_names = (ifo,)
                pipeline.insert(
                    FakeSeriesSource(
                        name=ifo + "_FakeSource",
                        source_pad_names=source_pad_names,
                        rate=info.input_sample_rate,
                        real_time=True,
                    ),
                )
            else:
                pad_names[ifo] = ifo
                source_name = "_FakeSource"
                source_pad_names = (ifo,)
                pipeline.insert(
                    FakeSeriesSource(
                        name=ifo + "_FakeSource",
                        source_pad_names=source_pad_names,
                        rate=info.input_sample_rate,
                        signal_type=info.data_source,
                        impulse_position=info.impulse_position,
                        t0=info.gps_start_time,
                        end=info.gps_end_time,
                    ),
                )

            source_out_links[ifo] = ifo + source_name + ":src:" + pad_names[ifo]

            if info.frame_segments_file is not None:
                pipeline.insert(
                    SegmentSource(
                        name=ifo + "_SegmentSource",
                        source_pad_names=(ifo,),
                        rate=info.input_sample_rate,
                        t0=info.gps_start_time,
                        end=info.gps_end_time,
                        segments=frame_segments[ifo],
                    ),
                    Gate(
                        name=ifo + "_Gate",
                        sink_pad_names=("strain", "control"),
                        source_pad_names=(ifo,),
                        control="control",
                    ),
                    link_map={
                        ifo + "_Gate:snk:strain": source_out_links[ifo],  # type: ignore
                        ifo + "_Gate:snk:control": ifo + "_SegmentSource:src:" + ifo,
                    },
                )
                assert source_out_links is not None
                source_out_links[ifo] = ifo + "_Gate:src:" + ifo

        # Handle GWDataNoiseSource after the loop since it creates all channels at once
        if info.data_source == "gwdata-noise":
            # Prepare channel dict with full channel names for GWDataNoiseSource
            gwdata_channel_dict = {}
            for ifo in info.ifos:
                # If channel name doesn't start with IFO:, add it
                channel = info.channel_dict[ifo]
                if not channel.startswith(f"{ifo}:"):
                    channel = f"{ifo}:{channel}"
                gwdata_channel_dict[ifo] = channel

            # GWDataNoiseSource handles all channels in a single source
            gwdata_source = GWDataNoiseSource(
                name="GWDataNoiseSource",
                channel_dict=gwdata_channel_dict,
                t0=info.gps_start_time,
                end=info.gps_end_time,
                real_time=info.real_time,
                verbose=verbose,
            )
            pipeline.insert(gwdata_source)

            # Set up the output links for each channel
            for ifo in info.ifos:
                # Use the full channel name from gwdata_channel_dict
                channel_name = gwdata_channel_dict[ifo]
                pad_names[ifo] = channel_name
                source_out_links[ifo] = f"GWDataNoiseSource:src:{channel_name}"

            # Set the input sample rate from the source
            if info.input_sample_rate is None:
                # GWDataNoiseSource uses detector-specific rates, default to 16384 Hz
                info.input_sample_rate = 16384

    if source_latency:
        for ifo in info.ifos:
            pipeline.insert(
                Latency(
                    name=ifo + "_SourceLatency",
                    sink_pad_names=("data",),
                    source_pad_names=("latency",),
                    route=ifo + "_datasource_latency",
                    interval=1,
                ),
                link_map={
                    ifo
                    + "_SourceLatency:snk:data": source_out_links[ifo]  # type: ignore
                },
            )
            assert source_latency_links is not None
            source_latency_links[ifo] = ifo + "_SourceLatency:src:latency"

    return source_out_links, source_latency_links