Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 212 additions & 10 deletions devices/ios.go
Original file line number Diff line number Diff line change
Expand Up @@ -898,15 +898,57 @@ func (d *IOSDevice) StartScreenCapture(config ScreenCaptureConfig) error {
// handle avc format via DeviceKit
if config.Format == "avc" {
if config.OnProgress != nil {
config.OnProgress("Starting DeviceKit for H.264 streaming")
config.OnProgress("Checking DeviceKit status")
}

// start DeviceKit
// Note: passing nil registry since this is internal call from StartScreenCapture
// ScreenCapture callers should have already registered the device via StartAgent
deviceKitInfo, err := d.StartDeviceKit(nil)
if err != nil {
return fmt.Errorf("failed to start DeviceKit: %w", err)
var deviceKitInfo *DeviceKitInfo
var err error

// check if DeviceKit is already running
if d.isDeviceKitRunning() {
utils.Verbose("DeviceKit already running, reusing existing session")

// check if we need to create port forwarders
d.mu.Lock()
hasHTTPForwarder := d.portForwarderDeviceKit != nil && d.portForwarderDeviceKit.IsRunning()
hasStreamForwarder := d.portForwarderAvc != nil && d.portForwarderAvc.IsRunning()
d.mu.Unlock()

if hasHTTPForwarder && hasStreamForwarder {
// reuse existing forwarders
d.mu.Lock()
httpPort, _ := d.portForwarderDeviceKit.GetPorts()
streamPort, _ := d.portForwarderAvc.GetPorts()
d.mu.Unlock()

deviceKitInfo = &DeviceKitInfo{
HTTPPort: httpPort,
StreamPort: streamPort,
}
} else {
// DeviceKit running but we need to create forwarders
deviceKitInfo, err = d.ensureDeviceKitPortForwarders()
if err != nil {
return fmt.Errorf("failed to create port forwarders: %w", err)
}
}

if config.OnProgress != nil {
config.OnProgress("Using existing DeviceKit session")
}
} else {
// DeviceKit not running, start it normally
if config.OnProgress != nil {
config.OnProgress("Starting DeviceKit for H.264 streaming")
}

// start DeviceKit
// Note: passing nil registry since this is internal call from StartScreenCapture
// ScreenCapture callers should have already registered the device via StartAgent
deviceKitInfo, err = d.StartDeviceKit(nil)
if err != nil {
return fmt.Errorf("failed to start DeviceKit: %w", err)
}
}

if config.OnProgress != nil {
Expand Down Expand Up @@ -1224,6 +1266,134 @@ func filterButtons(elements []ScreenElement) []ScreenElement {
return buttons
}

func (d *IOSDevice) ensureDeviceKitPortForwarders() (*DeviceKitInfo, error) {
var httpPort, streamPort int
var err error

// check if HTTP forwarder exists, create if needed
d.mu.Lock()
hasHTTPForwarder := d.portForwarderDeviceKit != nil && d.portForwarderDeviceKit.IsRunning()
d.mu.Unlock()

if !hasHTTPForwarder {
httpPort, err = findAvailablePortInRange(portRangeStart, portRangeEnd)
if err != nil {
return nil, fmt.Errorf("failed to find available port for HTTP: %w", err)
}

forwarder := ios.NewPortForwarder(d.ID())
err = forwarder.Forward(httpPort, deviceKitHTTPPort)
if err != nil {
return nil, fmt.Errorf("failed to forward HTTP port: %w", err)
}

d.mu.Lock()
d.portForwarderDeviceKit = forwarder
d.mu.Unlock()
utils.Verbose("Port forwarding created: localhost:%d -> device:%d (HTTP)", httpPort, deviceKitHTTPPort)
} else {
d.mu.Lock()
httpPort, _ = d.portForwarderDeviceKit.GetPorts()
d.mu.Unlock()
}

// check if stream forwarder exists, create if needed
d.mu.Lock()
hasStreamForwarder := d.portForwarderAvc != nil && d.portForwarderAvc.IsRunning()
d.mu.Unlock()

if !hasStreamForwarder {
streamPort, err = findAvailablePortInRange(portRangeStart, portRangeEnd)
if err != nil {
if !hasHTTPForwarder {
_ = d.portForwarderDeviceKit.Stop()
}
return nil, fmt.Errorf("failed to find available port for stream: %w", err)
}

d.mu.Lock()
d.portForwarderAvc = ios.NewPortForwarder(d.ID())
d.mu.Unlock()

err = d.portForwarderAvc.Forward(streamPort, deviceKitStreamPort)
if err != nil {
if !hasHTTPForwarder {
_ = d.portForwarderDeviceKit.Stop()
}
return nil, fmt.Errorf("failed to forward stream port: %w", err)
}
utils.Verbose("Port forwarding created: localhost:%d -> device:%d (H.264 stream)", streamPort, deviceKitStreamPort)
} else {
d.mu.Lock()
streamPort, _ = d.portForwarderAvc.GetPorts()
d.mu.Unlock()
}

return &DeviceKitInfo{
HTTPPort: httpPort,
StreamPort: streamPort,
}, nil
}

func (d *IOSDevice) isDeviceKitRunning() bool {
// check if we already have port forwarders running
d.mu.Lock()
hasHTTPForwarder := d.portForwarderDeviceKit != nil && d.portForwarderDeviceKit.IsRunning()
hasStreamForwarder := d.portForwarderAvc != nil && d.portForwarderAvc.IsRunning()
d.mu.Unlock()

// if both forwarders exist, DeviceKit is definitely running from our perspective
if hasHTTPForwarder && hasStreamForwarder {
utils.Verbose("DeviceKit port forwarders already running")
return true
}

// find an available local port for testing
testPort, err := findAvailablePortInRange(portRangeStart, portRangeEnd)
if err != nil {
utils.Verbose("Could not find available port for DeviceKit check: %v", err)
return false
}

// create temporary port forwarder to device port 12005 (stream)
testForwarder := ios.NewPortForwarder(d.ID())
err = testForwarder.Forward(testPort, deviceKitStreamPort)
if err != nil {
utils.Verbose("Could not create test port forwarder: %v", err)
return false
}

// ensure cleanup of test forwarder
defer func() {
_ = testForwarder.Stop()
}()

// try to connect with timeout
conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", testPort), 2*time.Second)
if err != nil {
utils.Verbose("DeviceKit not responding on port %d: %v", deviceKitStreamPort, err)
return false
}
defer func() { _ = conn.Close() }()

// set read deadline and try to read 1 byte
err = conn.SetReadDeadline(time.Now().Add(1 * time.Second))
if err != nil {
utils.Verbose("Could not set read deadline: %v", err)
return false
}

buffer := make([]byte, 1)
_, err = conn.Read(buffer)
if err != nil {
utils.Verbose("DeviceKit not serving data on port %d: %v", deviceKitStreamPort, err)
return false
}

utils.Verbose("DeviceKit is already running on device port %d", deviceKitStreamPort)
return true
}

// StartDeviceKit starts the devicekit-ios XCUITest which provides:
// - An HTTP server for tap/dumpUI commands (port 12004)
// - A broadcast extension for H.264 screen streaming (port 12005)
Expand Down Expand Up @@ -1308,16 +1478,23 @@ func (d *IOSDevice) StartDeviceKit(hook *ShutdownHook) (*DeviceKitInfo, error) {
return nil, fmt.Errorf("failed to launch DeviceKit app: %w", err)
}

// Wait for the app to launch and show the broadcast picker
utils.Verbose("Waiting %v for DeviceKit app to launch...", deviceKitAppLaunchTimeout)
time.Sleep(deviceKitAppLaunchTimeout)
// wait for the app to be in foreground
utils.Verbose("Waiting for DeviceKit app to be in foreground...")
err = d.waitForAppInForeground(devicekitMainAppBundleId, deviceKitAppLaunchTimeout)
if err != nil {
// clean up port forwarders on failure
_ = d.portForwarderDeviceKit.Stop()
_ = d.portForwarderAvc.Stop()
return nil, fmt.Errorf("failed to wait for DeviceKit app: %w", err)
}

// Start WebDriverAgent to be able to tap on the screen
err = d.StartAgent(StartAgentConfig{
OnProgress: func(message string) {
utils.Verbose(message)
},
})

if err != nil {
// clean up port forwarders on failure
_ = d.portForwarderDeviceKit.Stop()
Expand Down Expand Up @@ -1350,6 +1527,31 @@ func (d *IOSDevice) StartDeviceKit(hook *ShutdownHook) (*DeviceKitInfo, error) {
}, nil
}

// waitForAppInForeground polls WDA to check if the specified app is in foreground
func (d *IOSDevice) waitForAppInForeground(bundleID string, timeout time.Duration) error {
deadline := time.After(timeout)
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()

for {
select {
case <-deadline:
return fmt.Errorf("timeout waiting for app %s to be in foreground", bundleID)
case <-ticker.C:
activeApp, err := d.wdaClient.GetActiveAppInfo()
if err != nil {
// continue trying on error
continue
}

if activeApp.BundleID == bundleID {
utils.Verbose("App %s is now in foreground", bundleID)
return nil
}
}
}
}

// findAvailablePortInRange finds an available port in the specified range
func findAvailablePortInRange(start, end int) (int, error) {
for port := start; port <= end; port++ {
Expand Down
59 changes: 57 additions & 2 deletions devices/wda/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package wda
import (
"encoding/json"
"fmt"
"strings"
"time"

"github.com/mobile-next/mobilecli/types"
Expand Down Expand Up @@ -92,7 +93,8 @@ func (c *WdaClient) GetSource() (map[string]interface{}, error) {

// GetSourceRaw gets the raw page source from WDA's /source endpoint
func (c *WdaClient) GetSourceRaw() (interface{}, error) {
endpoint := "source?format=json"
startTime := time.Now()
endpoint := "source?format=json&excluded_attributes="

result, err := c.getEndpointWithTimeout(endpoint, 60*time.Second)
if err != nil {
Expand All @@ -104,11 +106,64 @@ func (c *WdaClient) GetSourceRaw() (interface{}, error) {
return nil, fmt.Errorf("no 'value' field found in WDA response")
}

elapsed := time.Since(startTime)
utils.Verbose("GetSourceRaw took %.2f seconds", elapsed.Seconds())

return value, nil
}

// GetSourceRawWithAttributes gets the raw page source with only the specified attributes included
func (c *WdaClient) GetSourceRawWithAttributes(attributes []string) (interface{}, error) {
startTime := time.Now()

// all possible attributes that can be excluded
allAttributes := []string{
"type", "value", "name", "label", "enabled", "visible", "accessible", "focused",
"x", "y", "width", "height", "index", "hittable", "bundleId", "processId",
"placeholderValue", "nativeFrame", "traits", "minValue", "maxValue", "customActions",
}

// build excluded list by removing requested attributes from all attributes
excludedAttrs := []string{}
for _, attr := range allAttributes {
include := false
for _, requestedAttr := range attributes {
if attr == requestedAttr {
include = true
break
}
}
if !include {
excludedAttrs = append(excludedAttrs, attr)
}
}

excludedStr := ""
if len(excludedAttrs) > 0 {
excludedStr = fmt.Sprintf("&excluded_attributes=%s", strings.Join(excludedAttrs, ","))
}

endpoint := fmt.Sprintf("source?format=json%s", excludedStr)

result, err := c.getEndpointWithTimeout(endpoint, 60*time.Second)
if err != nil {
return nil, fmt.Errorf("failed to get source: %w", err)
}

value, ok := result["value"]
if !ok {
return nil, fmt.Errorf("no 'value' field found in WDA response")
}

elapsed := time.Since(startTime)
utils.Verbose("GetSourceRawWithAttributes took %.2f seconds (attributes: %v)", elapsed.Seconds(), attributes)

return value, nil
Comment on lines +115 to 161
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

🧩 Analysis chain

🌐 Web query:

WDA WebDriverAgent /source endpoint excluded_attributes specification

💡 Result:

WebDriverAgent itself exposes the standard WebDriver endpoint:

  • GET /session/:sessionId/source → returns the XML page source (there is no formal, W3C-defined excluded_attributes query parameter).

In the Appium + WDA ecosystem, “exclude attributes from source” is specified via the Appium XCUITest driver (which then instructs WDA accordingly), using camelCase:

1) mobile: source (recommended; per-call control)

Execute Script:

  • script: mobile: source
  • args:
    • format: xml (default), description, or json
    • excludedAttributes: comma-separated string of attribute names to omit (only applies to xml) [1]

Example args:

{"format":"xml","excludedAttributes":"visible,accessible"}

[1]

2) Persistent setting: pageSourceExcludedAttributes

Set the XCUITest driver setting:

  • pageSourceExcludedAttributes: comma-separated string (defaults to empty). Example: "visible,accessible" [2]

Notes:

  • Appium specifically calls out excluding expensive attributes like visible / accessible as a way to speed up page source retrieval [3].
  • If you are using Appium’s JSON source mode, the doc lists excludable attributes (for that mode) as: frame, enabled, visible, accessible, focused [2].

Sources: [1] [2] [3]


🌐 Web query:

WDA source attributes list accessible visible x y width height

💡 Result:

In WebDriverAgent (WDA) page source as exposed by Appium’s XCUITest driver, the supported element attributes are: name, label, type, visible, focused, accessible, enabled, selected, index, rect, value (and hittable, but it’s not included in the XML source for performance reasons). [1]

For position/size, WDA exposes them via rect, which contains x, y, width, height (derived from the element’s frame). [1]

So if you’re looking for: accessible, visible, x, y, width, height → use accessible, visible, and rect.{x,y,width,height}. [1]

Sources: [1]


🏁 Script executed:

# Search the codebase for WDA source-related code
find . -type f -name "*.go" | xargs grep -l "source" | head -20

Repository: mobile-next/mobilecli

Length of output: 249


🏁 Script executed:

# Look for any documentation or comments about WDA attributes
rg -i "wda.*attribute|excluded.*attribute" --type go

Repository: mobile-next/mobilecli

Length of output: 464


🏁 Script executed:

# Check the specific file and surrounding context
fd "source.go" --type f

Repository: mobile-next/mobilecli

Length of output: 86


🏁 Script executed:

# Read the source.go file to understand the current implementation
cat -n ./devices/wda/source.go

Repository: mobile-next/mobilecli

Length of output: 6890


🏁 Script executed:

# Check if there are tests for this method
fd -e "test.go" -e "_test.go" | xargs grep -l "GetSourceRawWithAttributes" 2>/dev/null

Repository: mobile-next/mobilecli

Length of output: 49


🏁 Script executed:

# Look for any other source-related methods to understand the pattern
rg -A 20 "func.*Source" devices/wda/source.go --type go

Repository: mobile-next/mobilecli

Length of output: 3309


🏁 Script executed:

# Check how GetSourceRawWithAttributes is being called
rg "GetSourceRawWithAttributes" --type go

Repository: mobile-next/mobilecli

Length of output: 584


🏁 Script executed:

# Check if there are any other references to excluded_attributes in the codebase
rg "excluded_attributes|excludedAttributes" --type go

Repository: mobile-next/mobilecli

Length of output: 252


🏁 Script executed:

# Look for any WDA client implementation details
rg "getEndpointWithTimeout" devices/wda/ -A 5 --type go | head -50

Repository: mobile-next/mobilecli

Length of output: 1780


🏁 Script executed:

# Check the types file to understand the API better
find . -name "*.go" -path "*/types/*" | head -5

Repository: mobile-next/mobilecli

Length of output: 82


🏁 Script executed:

# Search for any WDA configuration or documentation
fd -e "README" -e "*.md" | xargs grep -l -i "wda\|webdriver" 2>/dev/null

Repository: mobile-next/mobilecli

Length of output: 49


🏁 Script executed:

# Check the requests.go file to understand how the endpoint is called
cat -n ./devices/wda/requests.go | head -100

Repository: mobile-next/mobilecli

Length of output: 3394


🏁 Script executed:

# Search for any usage patterns or tests that might show actual WDA behavior
rg "excluded_attributes" --type go -B 3 -A 3

Repository: mobile-next/mobilecli

Length of output: 871


🏁 Script executed:

# Check if there's any configuration or constants related to WDA attributes
rg "allAttributes\|attributes.*=" --type go

Repository: mobile-next/mobilecli

Length of output: 47


🏁 Script executed:

# Search for any comments or documentation about the attribute names
rg -i "attribute|excluded" devices/wda/ --type go

Repository: mobile-next/mobilecli

Length of output: 1483


🏁 Script executed:

# Check if there are integration tests or examples showing how GetSourceRawWithAttributes is used
find . -name "*.go" -type f | xargs grep -B 5 -A 5 "GetSourceRawWithAttributes\|excludedAttributes" 2>/dev/null | head -60

Repository: mobile-next/mobilecli

Length of output: 1898


🏁 Script executed:

# Look at the commented line in GetSourceElements to see the intended usage
sed -n '164,170p' ./devices/wda/source.go

Repository: mobile-next/mobilecli

Length of output: 384


Add validation for requested attribute names against the supported WDA attributes list.

The method silently accepts and silently ignores unknown or typo'd attribute names, potentially excluding required fields and returning partial/unexpected data. Implement validation to return an error for any attribute not in the allAttributes list. Additionally, refactor the nested loop to use sets for better performance.

🧩 Suggested refactor
-	// build excluded list by removing requested attributes from all attributes
-	excludedAttrs := []string{}
-	for _, attr := range allAttributes {
-		include := false
-		for _, requestedAttr := range attributes {
-			if attr == requestedAttr {
-				include = true
-				break
-			}
-		}
-		if !include {
-			excludedAttrs = append(excludedAttrs, attr)
-		}
-	}
+	// build excluded list by removing requested attributes from all attributes
+	allAttrSet := map[string]struct{}{}
+	for _, attr := range allAttributes {
+		allAttrSet[attr] = struct{}{}
+	}
+
+	requested := map[string]struct{}{}
+	for _, attr := range attributes {
+		if _, ok := allAttrSet[attr]; !ok {
+			return nil, fmt.Errorf("unknown WDA source attribute: %s", attr)
+		}
+		requested[attr] = struct{}{}
+	}
+
+	excludedAttrs := make([]string, 0, len(allAttributes))
+	for _, attr := range allAttributes {
+		if _, ok := requested[attr]; !ok {
+			excludedAttrs = append(excludedAttrs, attr)
+		}
+	}
🤖 Prompt for AI Agents
In `@devices/wda/source.go` around lines 115 - 161, GetSourceRawWithAttributes
currently ignores invalid attribute names; change it to validate each entry in
the input attributes slice against the allAttributes list and return an error
listing any unknown attributes instead of silently continuing. Replace the
nested loop membership test by building a set/map from allAttributes (e.g.,
allowedAttrs map[string]struct{}) and checking requested attributes against that
map, then build excludedAttrs from allowedAttrs minus requested ones; keep
existing behavior for building excludedStr, calling getEndpointWithTimeout, and
logging.

}

func (c *WdaClient) GetSourceElements() ([]types.ScreenElement, error) {
value, err := c.GetSourceRaw()
// only fetch the attributes we actually use
value, err := c.GetSourceRawWithAttributes([]string{"type", "name", "label", "value", "visible", "x", "y", "width", "height"})
if err != nil {
return nil, err
}
Expand Down
Loading