VisionClaw/concatenate-pipeline.sh at main · DreamLab-AI/VisionClaw · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/bin/bash
# VisionFlow Pipeline Context Concatenation Tool
# Generated by Hive Mind Swarm - 2025-01-03
#
# Usage: ./concatenate-pipeline.sh
# Output: /home/devuser/workspace/project/TotalContext.txt

set -e

PROJECT_ROOT="."
FILE_LIST="$PROJECT_ROOT/pipeline-files.txt"
OUTPUT_FILE="$PROJECT_ROOT/TotalContext.txt"

echo "================================================================"
echo "VisionFlow Data Pipeline - Context Concatenation"
echo "================================================================"
echo ""
echo "Project Root: $PROJECT_ROOT"
echo "File List:    $FILE_LIST"
echo "Output:       $OUTPUT_FILE"
echo ""

# Check if file list exists
if [ ! -f "$FILE_LIST" ]; then
    echo "ERROR: File list not found: $FILE_LIST"
    exit 1
fi

# Create/clear output file
> "$OUTPUT_FILE"

# Add header
cat >> "$OUTPUT_FILE" << 'EOF'
################################################################################
#                                                                              #
#                    VISIONFLOW DATA PIPELINE - TOTAL CONTEXT                 #
#                                                                              #
#              From GitHub Sync → GPU Physics → Client Visualization          #
#                                                                              #
#              Generated: 2025-01-03 by Hive Mind Swarm Analysis              #
#                                                                              #
################################################################################

TABLE OF CONTENTS:
==================

PHASE 1: GitHub Synchronization & Data Ingestion
PHASE 2: Parsing & Extraction
PHASE 3: Ontology Enrichment & Classification
PHASE 4: Database Persistence
PHASE 5: Graph Loading & Actor Orchestration
PHASE 6: GPU Physics Computation
PHASE 7: WebSocket Streaming
PHASE 8: Client-Side Visualization
SUPPORTING: Infrastructure & Utilities

Total Files Processed: (will be calculated below)

================================================================================

EOF

# Initialize counters
total_files=0
found_files=0
missing_files=0

# Process each line in file list
while IFS= read -r line; do
    # Skip empty lines and comments
    if [[ -z "$line" ]] || [[ "$line" =~ ^[[:space:]]*# ]]; then
        # If it's a phase header, add it to output
        if [[ "$line" =~ ^#.*PHASE.*$ ]]; then
            echo "" >> "$OUTPUT_FILE"
            echo "$line" >> "$OUTPUT_FILE"
            echo "" >> "$OUTPUT_FILE"
        fi
        continue
    fi

    total_files=$((total_files + 1))

    # Construct full path
    full_path="$PROJECT_ROOT/$line"

    # Check if file exists
    if [ -f "$full_path" ]; then
        found_files=$((found_files + 1))

        # Get file size
        file_size=$(wc -c < "$full_path" 2>/dev/null || echo "0")

        # Get line count
        line_count=$(wc -l < "$full_path" 2>/dev/null || echo "0")

        # Add separator and metadata
        cat >> "$OUTPUT_FILE" << EOF

################################################################################
# FILE: $line
# FULL PATH: $full_path
# SIZE: $file_size bytes
# LINES: $line_count
################################################################################

EOF

        # Append file contents
        cat "$full_path" >> "$OUTPUT_FILE" 2>/dev/null || echo "ERROR: Could not read file" >> "$OUTPUT_FILE"

        # Add footer separator
        echo "" >> "$OUTPUT_FILE"
        echo "# END OF FILE: $line" >> "$OUTPUT_FILE"
        echo "" >> "$OUTPUT_FILE"

        # Progress indicator
        if [ $((found_files % 10)) -eq 0 ]; then
            echo "Processed $found_files files..."
        fi
    else
        missing_files=$((missing_files + 1))
        echo "WARNING: File not found: $line" | tee -a "$OUTPUT_FILE"
    fi

done < "$FILE_LIST"

# Add footer statistics
cat >> "$OUTPUT_FILE" << EOF

################################################################################
#                              CONCATENATION COMPLETE                          #
################################################################################

Statistics:
-----------
Total Files Listed:     $total_files
Files Successfully Processed: $found_files
Files Not Found:        $missing_files

Output File: $OUTPUT_FILE
Output Size: $(wc -c < "$OUTPUT_FILE" | numfmt --to=iec-i --suffix=B)
Total Lines: $(wc -l < "$OUTPUT_FILE")

Generated: $(date '+%Y-%m-%d %H:%M:%S %Z')

################################################################################
EOF

# Final report
echo ""
echo "================================================================"
echo "Concatenation Complete!"
echo "================================================================"
echo ""
echo "Statistics:"
echo "  Total files listed:     $total_files"
echo "  Files found & processed: $found_files"
echo "  Files not found:        $missing_files"
echo ""
echo "Output file: $OUTPUT_FILE"
echo "Output size: $(wc -c < "$OUTPUT_FILE" | numfmt --to=iec-i --suffix=B)"
echo "Total lines: $(wc -l < "$OUTPUT_FILE")"
echo ""

if [ $missing_files -gt 0 ]; then
    echo "WARNING: $missing_files files were not found!"
    echo "Check the output file for details."
    echo ""
fi

echo "To view the output:"
echo "  less $OUTPUT_FILE"
echo ""
echo "To search within output:"
echo "  grep -n 'pattern' $OUTPUT_FILE"
echo ""