I am new to Csound and to PureData. We have used PureData to implement a gesture-controlled sound generator - our emphasis is on minimizing latency between gesture (sent via OSC to Pd) and sound. I like Csound’s programmatic approach much more - but I have read here and there that PureData has more of a real-time emphasis and so may be “quicker” to respond. Is this correct ? I would use Csound if I could… and I think I would get some feedback here before/in addition to testing it myself.
There should be absoltuely no difference in response time between Pd and Csound. If you prefer to programmatic approach of Csound then I think you have your answer
@suresh I don’t know which gestures are you detecting but since Csound has a very nice python bindings (ctcsound.py) you could run and control csound instruments directly from a python script and then you could e.g. take MediaPipe library (GitHub - google-ai-edge/mediapipe: Cross-platform, customizable ML solutions for live and streaming media.) for tracking e.g. hands/fingers. Here below is a script that tracks index finger of the left hand and uses it to control amplitude (y axis) and frequency (x axis):
CAMERA_ID = 0 # if there are multiple cameras connected, with this you can select which one to use
# MediaPipe parameters
MAX_NUM_HANDS = 2
MODEL_COMPLEXITY = 0
MIN_DETECTION_CONFIDENCE = 0.5
MIN_TRACKING_CONFIDENCE = 0.5
# ------------------------------------------------------------
# Imports
import cv2
import mediapipe as mp
import ctcsound
# ------------------------------------------------------------
# MediaPipe setup
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
# ------------------------------------------------------------
# ------------------------------------------------------------
# Csound setup
# ------------------------------------------------------------
orc = '''
ksmps = 128
nchnls = 2
0dbfs = 1
instr 1
; IO channels
kamp chnget "amp" ; reading value from amp input channel
kfreq chnget "freq" ; reading value from freq input channel
aout poscil kamp, kfreq ; audio synthesis
outs aout, aout ; sending audio to speakers
endin
'''
cs = ctcsound.Csound()
pt = None # csound performance thread
#cs.compileCsdText(csd_text)
cs.setOption('-odac')
cs.setOption('-b1024') # SW buffer size
cs.setOption('-B1024') # HW buffer size
cs.compileOrc(orc)
cs.readScore('i 1 0 10000') # run for a day
cs.start()
pt = ctcsound.CsoundPerformanceThread(cs.csound())
pt.play()
# Create control IO channels
def createChannel(channelName):
chn, _ = cs.channelPtr(channelName,
ctcsound.CSOUND_CONTROL_CHANNEL | ctcsound.CSOUND_INPUT_CHANNEL)
return chn
ampChannel = createChannel("amp") # uses utility method to create a channel and get numpy array to write to
freqChannel = createChannel("freq")
# ------------------------------------------------------------
# ------------------------------------------------------------
# Processing part
pressed_button = False
cap = cv2.VideoCapture(CAMERA_ID)
with mp_hands.Hands(
static_image_mode = False,
max_num_hands = MAX_NUM_HANDS,
model_complexity = MODEL_COMPLEXITY,
min_detection_confidence = MIN_DETECTION_CONFIDENCE,
min_tracking_confidence = MIN_TRACKING_CONFIDENCE) as hands:
# Main loop
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
continue
# To improve performance, optionally mark the image as not writeable to pass by reference.
image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(image)
image_height, image_width, _ = image.shape
# Iterate through hands, draw hand landmarks on image and write to csound control channels
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
ampChannel[0] = 0.0 # if hand is not detected, set amp to 0 to turn off sound
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
image,
hand_landmarks,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
handIndex = results.multi_hand_landmarks.index(hand_landmarks)
handLabel = results.multi_handedness[handIndex].classification[0].label
# image is mirrored so we need to invert left and right hand detection
if handLabel == 'Left': handLabel = 'Right'
elif handLabel == 'Right': handLabel = 'Left'
# Write to control channels
if handLabel == 'Left':
freqChannel[0] = 400 - abs(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x)*300
ampChannel[0] = 1-abs(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y)
# Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Hands', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()