
    |jwI                       d Z ddlmZ ddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlmZ ddlmZ dZdZdZd	Zd
Zd
Zd	Zg dZg dZ e
j2                  dj5                  e            Z e
j2                  dj5                  e            ZddZddZd dZd!dZ d"dZ!d#dZ"d#dZ#d$dZ$d%dZ%d&dZ&d'dZ'd(dZ(d)dZ)d*dZ*d+dZ+e,dk(  r ejZ                   e+              yy),u  L1 + L2: 从 Claude Code session jsonl 抽取结构化 turn，按规则筛工作流信号点。

L1 抽取：jsonl → 结构化 turn (role, text, tool_calls, ts, turn_gap_seconds)
         去重 system-reminder / 裁 tool_result 内容 / 敏感词过滤
L2 信号筛：跑规则匹配，输出候选信号点（含 ±3 turn 上下文）

输出：JSON 给 run.py 喂 LLM；也可直接 print --human 看人话调试。
    )annotationsN)Path)Any   i  
         )u	   为什么u   不对u   重做u   还有别的u   还有其他u	   别这样u   不要这样u	   停一下u   等等u   打住u	   你看错u   你理解错u	   再想想)zsk-[a-zA-Z0-9]{32,}zghp_[a-zA-Z0-9]{36,}zghs_[a-zA-Z0-9]{36,}zffai_[A-Za-z0-9_-]{20,}z1eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+z/-----BEGIN (RSA |EC |OPENSSH |)PRIVATE KEY-----|c                    d| g g dS )u   空 signals payload — 当 jsonl 缺失 / opt-out / 抽取失败时占位。

    单源定义避免 run.py / run_all.py 多处重复字面量。改字段时只动这里。
    F	opted_outdatesessionssignals )date_strs    `/home/chentao/Code/ffworkspace-wt/.agent-pool/slot-3/scripts/ops/daily-report/extract_signals.pyempty_payloadr   8   s    
 bRPP    c                &    | j                  dd      S )u,   Claude Code 用 / → - 编码 cwd 路径。/-)replace)	repo_roots    r   encode_project_pathr   C   s    S#&&r   c                   | dz  dz  }|j                         sg S t        j                  j                  |      }g }|j	                  d      D ]u  }	 t        j
                  j                  |j                         j                        j                         }t        ||z
  j                        dk  se|j                  |       w t        |      S # t        $ r Y w xY w)u7   所有项目目录下当日有改动的 jsonl 文件。z.claudeprojectsz	*/*.jsonl   )is_dirdtr   fromisoformatglobdatetimefromtimestampstatst_mtimeOSErrorabsdaysappendsorted)homer   	proj_roottarget_datefilesjsonlmtimes          r   discover_jsonl_filesr2   H   s    y :-I	''''1KE,  	KK--ejjl.C.CDIIKE #))*a/LL  %=  		s   AC	CCc                   d}t        |t              r| dv rt        |j                  dd            }n| dv r!t        |j                  dd            dd }|}nq| dv rt        |j                  d	d            dd
 }nNt	        j
                  t        j                  |dt              j                               j                         dd }|  d| S )u_   为 tool 调用生成简短指纹：name + 关键参数 hash。Edit/Read/Write 用 file_path。 )EditReadWriteNotebookEdit	file_path)BashcommandNP   )Greppattern<   T)	sort_keysdefault   z::)

isinstancedictstrgethashlibmd5jsondumpsencode	hexdigest)	tool_name
tool_inputkey_argscmds       r   args_signaturerQ   Z   s    H*d#AA:>>+r:;H)#jnnY34Sb9CH)#:>>)R89#2>H{{

:sCJJLik"1H [8*%%r   c                L   t        | t              r| S t        | t              syg }| D ]\  }t        |t              s|j	                  d      dk(  s)|j	                  dd      }t        |t              sL|j                  |       ^ dj                  |      j                         S )u]   从 message.content (list of blocks) 抽 plain text，跳过 thinking/tool_use/tool_result。r4   typetext
)rC   rE   listrD   rF   r*   joinstrip)contentpartsblkts       r   extract_text_from_contentr]   l   s    '3gt$E  c4 SWWV_%>#A!S!Q	 
 99U!!##r   c                b   t        | t              sg S g }| D ]  }t        |t              s|j                  d      dk(  s)|j	                  |j                  d      |j                  dd      |j                  d      t        |j                  dd      |j                  d            d        |S )u4   从 assistant message.content 抽 tool_use blocks。rS   tool_useidnamer4   input)r`   ra   rb   sig)rC   rV   rD   rF   r*   rQ   )rY   outr[   s      r   extract_tool_callsre   {   s    gt$	C 	c4 SWWV_
%BJJ''$-GGFB/ WWW-)#''&"*=swww?OP		 Jr   c           
     B   t        | t              sg S g }| D ]  }t        |t              s|j                  d      dk(  s*|j                  d      }t        |t              rYd}|D ]Q  }t        |t              s|j                  d      dk(  s)||j                  dd      z  }t	        |      t
        kD  sQ n nt        |xs d      }t	        |      }|dt
         }|j                  |j                  d      t        |j                  dd	            ||d
        |S )u2   从 user message.content 抽 tool_result blocks。rS   tool_resultrY   r4   rT   Ntool_use_idis_errorF)rh   ri   result_headresult_total_len)	rC   rV   rD   rF   lenRESULT_HEAD_CHARSrE   r*   bool)rY   rd   r[   raw_cheadsub	total_len
head_truncs           r   extract_tool_resultsrt      s   gt$	C c4 SWWV_%EGGI&E%&  "C!#t,F1J 33t9'88!	" 5;B'D	I001JJJ#&77=#9 $SWWZ%? @#-(1	. Jr   c                >    t        t        j                  |             S )N)rn   	REDACT_REsearch)rT   s    r   is_redactedrx      s    	  &''r   c                   g }	 t        | ddd      5 }|D ]m  }|j                         }|s	 t        j                  |      }|j                  d      }|dvrC|j                  d      }|sW	 t        j                  j                  |j                  dd	            }|j                         j                         |k7  r|j                  d
      xs i }	|	j                  d      }
t        |
      }|rt        |      r|dk(  rt        |
      }g }ng }t!        |
      }|j#                  |j                         |||||j                  d      |j                  d      |j                  d      |j                  dd      d	       p 	 ddd       |S # t        j                  $ r Y w xY w# t        $ r Y w xY w# 1 sw Y   |S xY w# t$        $ r g cY S w xY w)u   读单个 jsonl → 仅当日 turn list (按时间序)。

    每 turn dict 含: ts / role / text / tool_calls / tool_results / session_id / git_branch / cwd
    rzutf-8r   )encodingerrorsrS   )user	assistant	timestampZz+00:00messagerY   r~   	sessionId	gitBranchcwdisSidechainF)	tsrolerT   
tool_callstool_results
session_id
git_branchr   is_sidechainN)openrX   rI   loadsJSONDecodeErrorrF   r    r#   r!   r   
ValueErrorr   	isoformatr]   rx   re   rt   r*   r'   )
jsonl_pathr.   turnsflineobjr\   ts_strr   msgrY   rT   r   r   s                 r   
load_turnsr      s   
 E0*cGIF -	! ,zz|**T*C GGFO11-226>>#x3PQB 779&&(K7ggi(.B''),09K-#!3G!<J#%L!#J#7#@L lln ! $&0(4&)ggk&:&)ggk&:"wwu~(+u(E
A,-	` LS ++  " #-	` L  	sv   G	 F<F*F<./F,C*F<G	 F)%F<(F))F<,	F95F<8F99F<<GG	 G	 	GGc                    d}| D ]N  }	 t         j                  j                  |d         }|d|d<   nt	        ||z
  j                               |d<   |}P y# t        $ r d|d<   Y bw xY w)uA   in-place 填 turn_gap_seconds（跟上一个 turn 的时差）。Nr   turn_gap_seconds)r    r#   r!   r   inttotal_seconds)r   prev_tsr\   curs       r   compute_turn_gapsr      s    "&G 
	++++AdG4C ?$(A !$'w(E(E(G$HA !
  	$(A !	s   "AA('A(c                t   g }g }t        |       D ]  \  }}|j                  dg       D ]  }|d   dv s|j                  d      xs i j                  dd      s3|j                  |f       |t        z
  t	        fd|D              }|t
        k\  sn|j                  d||d	d
         d}t        |       D ]R  \  }}t        d |j                  dg       D              }|r'|dz  }|t        k\  s9|j                  d|d|id
       Qd}T t        |       D ]:  \  }}|j                  d      }	|	s|	t        kD  s$|j                  d|d|	id
       < t        |       D ]S  \  }}|d   dk7  r|j                  dd      }
|
s$t        j                  |
      s:|j                  d|d|
dd id
       U t        |       D cg c]*  \  }}t        d |j                  dg       D              s)|, }}}t        t        |            D ]]  }|D cg c]  }d||   |z
  cxk  rdk  sn n| }}t        |      t        k\  s:|j                  d||   dt        |      id
        n i }|D ]&  }|j                  |d   g       j                  |       ( g }i }|j                  dg       D ](  }|d    d   |vs|d    d   |   d    d   kD  s$||<   * |j                  |j!                                t#        |j                  dg       d! "      }|rr|d   gg}|dd D ]=  }|d#   |d$   d$   d#   z
  dk  r|d$   j                  |       ,|j                  |g       ? |D ]!  }t%        |d% "      }|j                  |       # |j                  dg       }|r|j                  t%        |d& "             |j                  dg       }|j                  |dd'        |j                  dg       }|r|j                  t%        |d( "             |j'                  d) "       |S c c}}w c c}w )*ua   规则匹配出候选信号点。每个 signal 含 type / turn_index / detail / context_window。r   ra   )r5   r7   r8   rb   r9   r4   c              3  @   K   | ]  \  }}|k\  s|k(  sd   yw)r   Nr   ).0idxr   fp	window_los      r   	<genexpr>zl2_signals.<locals>.<genexpr>  s%     YaC9<LQRVXQX!Ys   edit_repeat)r9   count_in_window)rS   
turn_indexdetailr   c              3  >   K   | ]  }|j                  d         yw)ri   NrF   )r   rz   s     r   r   zl2_signals.<locals>.<genexpr>  s     KAaeeJ'Ks   r   r   tool_error_streakstreakr   long_gapgap_secondsr   r}   rT   frustration	text_headNx   c              3  ,   K   | ]  }|d    dk(    yw)ra   AskUserQuestionNr   )r   cs     r   r   zl2_signals.<locals>.<genexpr>B  s     1rUV!F)?P2P1rs      ask_clusterr   rS   r   c                    | d   S Nr   r   xs    r   <lambda>zl2_signals.<locals>.<lambda>d  s
    Q|_ r   keyr   c                    | d   d   S )Nr   r   r   r   s    r   r   zl2_signals.<locals>.<lambda>m  s    (H(= r   c                    | d   d   S )Nr   r   r   r   s    r   r   zl2_signals.<locals>.<lambda>s  s    1X;}+E r   r   c                    | d   d   S )Nr   r   r   r   s    r   r   zl2_signals.<locals>.<lambda>|  s    1X;7H+I r   c                    | d   S r   r   r   s    r   r   zl2_signals.<locals>.<lambda>~  s
    Q|_ r   )	enumeraterF   r*   EDIT_REPEAT_WINDOWsumEDIT_REPEAT_THRESHOLDanyTOOL_ERROR_STREAKTURN_GAP_LONG_SECONDSFRUSTRATION_RErw   rangerl   ASK_USER_QUESTION_THRESHOLD
setdefaultextendvaluesr+   maxsort)r   r   edit_historyir\   callcntr   had_errgaprT   asksjawindowby_typesdeduper_besttesgroupsgbestlgfracr   r   s                             @@r   
l2_signalsr      s   G +-L%  1EE,+ 	DF|@@hhw'-222;C##QG, 22	YLYY//NN$1*+463&O	( F%  1K~r1JKKaKF** 3&'#+V"4 F  %  	1ee&'3..NN&"#,c2	 %  1V9uuVR N))$/NN)"#*D#J7 $E*s$!Qc1rZ[Z_Z_`lnpZq1r.rAsDs3t9 
!;Q$q'A+%:%:!;;v;55NN)"&q'0#f+> 
$ &(G 41V9b)0034 E  "G[[+ x[%W(,= >XAVWhAi iGBK 
LL!" 0"5;T
UC
%(VH:QR 	#AB!==Br
!!!$qc"		#
  	Aq=>DLL	
 
Z	$B	S!EFG 
]B	'B	LLBQ 
]B	'B	S!IJK	JJ,J-L{ t;s   *P/8P/P51P5c           	     t   g }| D ]%  }|d   }t        d|t        z
        }t        t        |      |t        z   dz         }g }t	        ||      D ]  }||   }	||z
  |	d   |	d   r|	d   dd t        |	d         dkD  rdnd	z   nd	d
}
|	j                  d      r|	d   D cg c]  }|d   	 c}|
d<   |	j                  d      rA|	d   D cg c]/  }|j                  d      |j                  d      r|d   dd nd	d1 c}|
d<   |j                  |
        |j                  i |d|i       ( |S c c}w c c}w )u4   给每个信号点附 ±N turn 的精简上下文。r   r   r   r   rT   Ni,  u   …r4   )relr   rT   r   rc   toolsr   ri   rj   r<   )ri   rp   resultscontext)r   SIGNAL_CONTEXT_TURNSminrl   r   rF   r*   )r   r   rd   r   r   lohir   r   r\   entryr   rz   s                r   add_context_to_signalsr     st   C -lOA,,-UQ!559:r2 	!AaA1u&	WXY_W`64CS6^c5IErRfhE
 uu\"45lO!Dq!E(!Dguu^$ ~.$
  %&EE*$59:}9M- 0" 5SU$i  MM% #	!$ 	

+a+F+,/-0 J "E$s   D04D5c                   |r|j                         rd| g g dS t        ||       }i }|D ]X  }t        ||       }|s|D ]B  }|j                  d      xs |j                  }|j                  |g       j                  |       D Z g }	g }
|j                         D ]  \  }}|j                  d        t        |       t        |      }t        ||      }|	j                  |t        |      |d   d   |d   d   t        d	 |D        d       t        d
 |D        d       t        |      d       |D ]  }|
j                  i |d|i         d| t        |	d       |
dS )NTr   r   c                    | d   S )Nr   r   r   s    r   r   zrun.<locals>.<lambda>  s
    4 r   r   r   r   r   c              3  b   K   | ]'  }|j                  d       s|j                  d        ) yw)r   Nr   r   r\   s     r   r   zrun.<locals>.<genexpr>  s%     #^A!%%P\J]AEE,$7#^   //c              3  b   K   | ]'  }|j                  d       s|j                  d        ) yw)r   Nr   r   s     r   r   zrun.<locals>.<genexpr>  s"     IaAEE%LQUU5\Ir   )r   
turn_countfirst_tslast_tsr   r   signal_countFc                    | d   S )Nr   r   r   s    r   r   zrun.<locals>.<lambda>  s
    1Z= r   )existsr2   r   rF   stemr   r*   itemsr   r   r   r   rl   nextr+   )r   r,   opt_out_flagr/   all_turns_by_sessionr   r   r\   sidsession_payloadsall_signalssigssigs_with_ctxr   s                 r   runr    s   ++-!8PRSS x0E24 ?1h' 	?A%%%/C ++C4;;A>	?	? $& K*002 9
U

(
)% % .tU;!!%j!!HTN 9T?"#^#^`deI5I4P #M 2
	
  	9A7!7\378	9#9( +1HI	 r   c                   | j                  d      rt        d       y t        d| d    d       t        dt        | d          dt        | d	                 | d   D ]Q  }t        d
|d   d d  d|j                  d      xs d d|j                  d      xs dd d  d|d    d|d    
       S t        d       | d	   D ](  }t        d|d    d|d    d|d   d d  d|d           * y ) Nr   z((opted-out via ~/.claude-insight-optout)z
=== Date: r   z ===z
Sessions: r   z, total signals: r   z

[session r   rB   z	] branch=r   ?z cwd=r   r4   2   z turns=r   z	 signals=r   z
--- Signals ---z  [rS   z] turn#r   z	 session=z detail=r   )rF   printrl   )payloadr   rc   s      r   print_humanr    sH   {{;89	Jwv't
,-	Js7:./00A#giFXBYAZ
[\Z  
!L/"1-. /eeL)0S1e8JCR7P6Q R|_%Yq/@.AC	

 

y! 
#f+gc,&7%8	#lBSTVUVBWAX Y(m_&	

r   c                 t   t        j                  d      } | j                  dt        j                  j                         j                         d       | j                  dt        t        j                               d       | j                  dd	
       | j                  ddd       | j                         }t        |j                        }|dz  }t        |j                  ||      }|j                  rt        |       y|j                  rt        |j                        j                  t!        j"                  |dd             t%        d|j                   dt'        |d          dt'        |d          dt(        j*                         yt!        j,                  |t(        j.                  dd       y)Nz5Extract L1+L2 workflow signals from Claude Code jsonl)descriptionz--datezYYYY-MM-DD, default today)rA   helpz--homezuser home, default $HOMEz--outz)write JSON to this path (default: stdout))r  z--human
store_truez,print human-readable summary instead of JSON)actionr  z.claude-insight-optoutr   Fr	   )ensure_asciiindentzwrote z (r   z
 signals, r   z
 sessions))file)argparseArgumentParseradd_argumentr    r   todayr   rE   r   r,   
parse_argsr  humanr  rd   
write_textrI   rJ   r  rl   sysstderrdumpstdout)pargsr,   opt_outr  s        r   mainr)    sO   ,cdANN8RWW]]_%>%>%@GbNcNN8S%5<VNWNN7!LNMNN9\8fNg<<>D		?D--G$))T7+GzzGxxTXX!!$**W5QR"STtxxj3wy'9#:";:c'R\J]F^E__ijqtq{q{|  			'3::E!Dr   __main__)r   rE   returnrD   )r   rE   r+  rE   )r,   r   r   rE   r+  z
list[Path])rM   rE   rN   r   r+  rE   )rY   r   r+  rE   )rY   r   r+  
list[dict])rT   rE   r+  rn   )r   r   r.   rE   r+  r,  )r   r,  r+  None)r   r,  r+  r,  )r   r,  r   r,  r+  r,  )r   rE   r,   r   r  zPath | Noner+  rD   )r  rD   r+  r-  )r+  r   ).__doc__
__future__r   r  r#   r    r"   rG   rI   osrer"  pathlibr   typingr   rm   r   r   r   r   r   r   FRUSTRATION_PATTERNSREDACT_PATTERNScompilerW   r   rv   r   r   r2   rQ   r]   re   rt   rx   r   r   r   r   r  r  r)  __name__exitr   r   r   <module>r9     s   #      	 	 
            CHH%9:;BJJsxx01	Q'
$&$$$>(7t&BJB&R
(0 zCHHTV r   